Part 1 : Write a Python program to scrape all available books from the website 

(a) Title 

In [9]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

# Website URL pattern (pages go from 1 to 50)
base_url = "https://books.toscrape.com/catalogue/page-{}.html"

all_books = []  # to store all book details

# Loop through pages (1 to 50)
for page in range(1, 51):
    url = base_url.format(page)
    response = requests.get(url)   # download the page
    soup = BeautifulSoup(response.text, "html.parser")

    # find all book containers
    books = soup.find_all("article", class_="product_pod")

    # if no books found, stop the loop
    if not books:
        break

    # extract details of each book
    for book in books:
        title = book.h3.a["title"]  # book title
        price = book.find("p", class_="price_color").text.strip()
        availability = book.find("p", class_="instock availability").text.strip()
        star_class = book.find("p", class_="star-rating")["class"]
        star_rating = [c for c in star_class if c != "star-rating"][0]

        all_books.append({
            "Title": title,
            "Price": price,
            "Availability": availability,
            "Star Rating": star_rating
        })

    print(f"Scraped page {page}")  # just to see progress

# Convert list to DataFrame
df = pd.DataFrame(all_books)

# Save to CSV
df.to_csv("books.csv", index=False, encoding="utf-8")

print("✅ Scraping finished! Total books scraped:", len(df))



Scraped page 1
Scraped page 2
Scraped page 3
Scraped page 4
Scraped page 5
Scraped page 6
Scraped page 7
Scraped page 8
Scraped page 9
Scraped page 10
Scraped page 11
Scraped page 12
Scraped page 13
Scraped page 14
Scraped page 15
Scraped page 16
Scraped page 17
Scraped page 18
Scraped page 19
Scraped page 20
Scraped page 21
Scraped page 22
Scraped page 23
Scraped page 24
Scraped page 25
Scraped page 26
Scraped page 27
Scraped page 28
Scraped page 29
Scraped page 30
Scraped page 31
Scraped page 32
Scraped page 33
Scraped page 34
Scraped page 35
Scraped page 36
Scraped page 37
Scraped page 38
Scraped page 39
Scraped page 40
Scraped page 41
Scraped page 42
Scraped page 43
Scraped page 44
Scraped page 45
Scraped page 46
Scraped page 47
Scraped page 48
Scraped page 49
Scraped page 50
✅ Scraping finished! Total books scraped: 1000


Q2. Write a Python program to scrape the IMDB Top 250 Movies list 
(https://www.imdb.com/chart/top/) . For each movie, extract the following details: 
1. Rank (1–250) 
2. Movie Title 
3. Year of Release 
4. IMDB Rating 

Store the results in a Pandas DataFrame and export it to a CSV file named imdb_top250.csv. 
(Note: Use Selenium/Playwright to scrape the required details from this website) 


In [10]:
from selenium import webdriver
from selenium.webdriver.common.by import By
import pandas as pd
import time

# Start the Selenium WebDriver (here we use Chrome)
# Make sure you have Chrome + ChromeDriver installed
driver = webdriver.Chrome()

# Open IMDb Top 250 page
url = "https://www.imdb.com/chart/top/"
driver.get(url)

# Wait for page to load
time.sleep(3)

# Find all movie rows
movies = driver.find_elements(By.CSS_SELECTOR, "li.ipc-metadata-list-summary-item")

data = []

for rank, movie in enumerate(movies, start=1):
    # Movie Title
    title = movie.find_element(By.CSS_SELECTOR, "h3").text
    
    # Extract year (inside span)
    year = movie.find_element(By.CSS_SELECTOR, "span.cli-title-metadata-item").text
    
    # IMDB Rating
    rating = movie.find_element(By.CSS_SELECTOR, "span.ipc-rating-star--imdb").text
    
    data.append({
        "Rank": rank,
        "Title": title,
        "Year": year,
        "IMDB Rating": rating
    })

# Close browser
driver.quit()

# Save to DataFrame
df = pd.DataFrame(data)
df.to_csv("imdb_top250.csv", index=False, encoding="utf-8")

print("✅ Scraping finished! imdb_top250.csv created successfully.")


✅ Scraping finished! imdb_top250.csv created successfully.


Q3. Write a Python program to scrape the weather information for top world cities from the 
given website (https://www.timeanddate.com/weather/) . For each city, extract the following 
details: 
1. City Name 
2. Temperature 
3. Weather Condition (e.g., Clear, Cloudy, Rainy, etc.) 

Store the results in a Pandas DataFrame and export it to a CSV file named weather.csv.

In [12]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

# Correct URL with world cities weather
url = "https://www.timeanddate.com/weather/?sort=1"

response = requests.get(url)
soup = BeautifulSoup(response.text, "html.parser")

# Find the weather table
table = soup.find("table", class_="zebra tb-wt fw va-m tb-hover")

data = []

# Check if table exists
if table:
    for row in table.find_all("tr")[1:]:  # skip header row
        cols = row.find_all("td")
        if len(cols) >= 3:
            city = cols[0].text.strip()
            temp = cols[1].text.strip()
            condition = cols[2].text.strip()

            data.append({
                "City": city,
                "Temperature": temp,
                "Condition": condition
            })

# Save results
df = pd.DataFrame(data)
df.to_csv("weather.csv", index=False, encoding="utf-8")

print("✅ Scraping finished! Total cities scraped:", len(df))



✅ Scraping finished! Total cities scraped: 0
