Q1. Write a Python program to scrape all available books from the website
(https://books.toscrape.com/) Books to Scrape – a live site built for practicing scraping (safe,
legal, no anti-bot). For each book, extract the following details:
1. Title
2. Price
3. Availability (In stock / Out of stock)
4. Star Rating (One, Two, Three, Four, Five)
Store the scraped results into a Pandas DataFrame and export them to a CSV file named
books.csv.
(Note: Use the requests library to fetch the HTML page. Use BeautifulSoup to parse and extract
book details and handle pagination so that books from all pages are scraped)

In [4]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
from google.colab import files

BASE_URL = "https://books.toscrape.com/"
books = []

def get_star_rating(tag):
    classes = tag.get("class", [])
    ratings = ["One", "Two", "Three", "Four", "Five"]
    for r in ratings:
        if r in classes:
            return r
    return "Unknown"

def scrape_page(url):
    response = requests.get(url)
    soup = BeautifulSoup(response.content, "html.parser")
    articles = soup.find_all("article", class_="product_pod")
    for article in articles:
        title = article.h3.a["title"]
        price = article.find("p", class_="price_color").text.strip()
        availability = article.find("p", class_="instock availability").text.strip()
        rating = get_star_rating(article.find("p", class_="star-rating"))
        books.append({
            "Title": title,
            "Price": price,
            "Availability": availability,
            "Star Rating": rating
        })
    next_btn = soup.find("li", class_="next")
    if next_btn:
        next_href = next_btn.a["href"]
        if "catalogue/" not in url and "catalogue/" not in next_href:
            next_url = BASE_URL + "catalogue/" + next_href
        else:
            base = url.rsplit("/", 1)[0]
            next_url = base + "/" + next_href
        scrape_page(next_url)

scrape_page(BASE_URL + "catalogue/page-1.html")
df = pd.DataFrame(books)
df.to_csv("books.csv", index=False)

files.download("books.csv")


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [6]:
!pip install selenium


Collecting selenium
  Downloading selenium-4.35.0-py3-none-any.whl.metadata (7.4 kB)
Collecting trio~=0.30.0 (from selenium)
  Downloading trio-0.30.0-py3-none-any.whl.metadata (8.5 kB)
Collecting trio-websocket~=0.12.2 (from selenium)
  Downloading trio_websocket-0.12.2-py3-none-any.whl.metadata (5.1 kB)
Collecting typing_extensions~=4.14.0 (from selenium)
  Downloading typing_extensions-4.14.1-py3-none-any.whl.metadata (3.0 kB)
Collecting outcome (from trio~=0.30.0->selenium)
  Downloading outcome-1.3.0.post0-py2.py3-none-any.whl.metadata (2.6 kB)
Collecting wsproto>=0.14 (from trio-websocket~=0.12.2->selenium)
  Downloading wsproto-1.2.0-py3-none-any.whl.metadata (5.6 kB)
Downloading selenium-4.35.0-py3-none-any.whl (9.6 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m9.6/9.6 MB[0m [31m92.8 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading trio-0.30.0-py3-none-any.whl (499 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m499.2/499.2 kB[0m [31m37.

Q2. Write a Python program to scrape the IMDB Top 250 Movies list
(https://www.imdb.com/chart/top/) . For each movie, extract the following details:
1. Rank (1–250)
2. Movie Title
3. Year of Release
4. IMDB Rating
Store the results in a Pandas DataFrame and export it to a CSV file named imdb_top250.csv.
(Note: Use Selenium/Playwright to scrape the required details from this website)

In [None]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
import pandas as pd
import time
from google.colab import files

options = Options()
options.add_argument("--headless")
driver = webdriver.Chrome(options=options)

url = "https://www.imdb.com/chart/top/"
driver.get(url)
time.sleep(3)

movies = []
rows = driver.find_elements(By.CSS_SELECTOR, "tbody.lister-list tr")

for i, row in enumerate(rows, start=1):
    title_column = row.find_element(By.CSS_SELECTOR, "td.titleColumn")
    title = title_column.find_element(By.TAG_NAME, "a").text
    year = title_column.find_element(By.CLASS_NAME, "secondaryInfo").text.strip("()")
    rating = row.find_element(By.CSS_SELECTOR, "td.imdbRating strong").text
    movies.append({
        "Rank": i,
        "Movie Title": title,
        "Year of Release": year,
        "IMDB Rating": rating
    })

driver.quit()
df = pd.DataFrame(movies)
df.to_csv("imdb_top250.csv", index=False)
files.download("imdb_top250.csv")

Q3. Write a Python program to scrape the weather information for top world cities from the
given website (https://www.timeanddate.com/weather/) . For each city, extract the following
details:
1. City Name
2. Temperature
3. Weather Condition (e.g., Clear, Cloudy, Rainy, etc.)
Store the results in a Pandas DataFrame and export it to a CSV file named weather.csv.

In [5]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

cities = [
    ("London", "uk/london"),
    ("New York", "usa/new-york"),
    ("Tokyo", "japan/tokyo"),
    ("Paris", "france/paris"),
    ("Sydney", "australia/sydney")
]

base_url = "https://www.timeanddate.com/weather/"
headers = {"User-Agent": "Mozilla/5.0"}

records = []

for city_name, path in cities:
    url = base_url + path
    try:
        resp = requests.get(url, headers=headers, timeout=10)
        resp.raise_for_status()
        soup = BeautifulSoup(resp.content, "html.parser")

        qlook = soup.select_one("#qlook")
        if qlook:
            temp_elem = qlook.find("div", class_="h2")
            cond_elem = qlook.find("p")

            temp = temp_elem.text.strip() if temp_elem else "N/A"
            cond = cond_elem.text.strip() if cond_elem else "N/A"

            records.append({
                "City Name": city_name,
                "Temperature": temp,
                "Weather Condition": cond
            })
        else:
            records.append({
                "City Name": city_name,
                "Temperature": "N/A",
                "Weather Condition": "N/A"
            })

    except Exception as e:
        records.append({
            "City Name": city_name,
            "Temperature": "Error",
            "Weather Condition": str(e)
        })

df = pd.DataFrame(records)
df.to_csv("weather.csv", index=False)
print("Weather data saved to weather.csv")
files.download('weather.csv')

Weather data saved to weather.csv


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>