# LAB 5: Web Scraping using Python

---

## Question 1: Basic HTML Request and Parsing
Write a Python program to fetch the HTML content of https://www.geeksforgeeks.org using requests.

In [None]:
import requests

url = 'https://www.geeksforgeeks.org'
headers = {'User-Agent': 'Mozilla/5.0'}

response = requests.get(url, headers=headers)
print(response.text[:500])

## Question 2: Parse HTML and Print Page Title
Parse the HTML using BeautifulSoup and print the title of the page.

In [None]:
from bs4 import BeautifulSoup

soup = BeautifulSoup(response.text, 'html.parser')
print(soup.title.string)

## Question 3: Handle HTTP and Network Errors
Handle HTTP errors and network exceptions while fetching the webpage.

In [None]:
from requests.exceptions import HTTPError, ConnectionError, Timeout, RequestException

try:
    r = requests.get(url, timeout=10)
    r.raise_for_status()
    print('Success')
except HTTPError as e:
    print('HTTP Error', e)
except ConnectionError:
    print('Connection Error')
except Timeout:
    print('Timeout Error')
except RequestException as e:
    print('Request Error', e)

## Question 4: Extract Hyperlinks
Extract the first five hyperlinks using find() and find_all().

In [None]:
links = soup.find_all('a', limit=5)
for link in links:
    print(link.get_text(strip=True), link.get('href'))

## Question 5: Extract H2 Headings and Save to CSV
Scrape all <h2> headings and store them in headings.csv.

In [None]:
import csv

headings = soup.find_all('h2')

with open('headings.csv', 'w', newline='', encoding='utf-8') as f:
    writer = csv.writer(f)
    writer.writerow(['Heading Number', 'H2 Text'])
    for i, h in enumerate(headings, 1):
        writer.writerow([i, h.get_text(strip=True)])

print('headings.csv created')

## Question 6: Scrape Wikipedia Table
Scrape all rows from the first table of Wikipedia page: List of countries by population.

In [None]:
wiki_url = 'https://en.wikipedia.org/wiki/List_of_countries_and_dependencies_by_population'
wiki_res = requests.get(wiki_url)
wiki_soup = BeautifulSoup(wiki_res.text, 'html.parser')

table = wiki_soup.find('table', class_='wikitable')
for row in table.find_all('tr'):
    cells = row.find_all(['th', 'td'])
    data = [cell.get_text(strip=True) for cell in cells]
    if data:
        print(data)