In [1]:
# Import libraries
import requests
from bs4 import BeautifulSoup

import pandas as pd

Q1: Books to Scrape - Scraping all books with pagination

In [5]:
base_url = "https://books.toscrape.com/catalogue/page-{}.html"
books = []

page = 1
while True:
    r = requests.get(base_url.format(page))
    if r.status_code != 200:
        break

    soup = BeautifulSoup(r.text, "html.parser")
    items = soup.find_all("article", class_="product_pod")
    if not items:
        break

    for item in items:
        title = item.h3.a["title"]
        price = item.find("p", class_="price_color").text.strip()
        availability = item.find("p", class_="instock availability").text.strip()
        rating = item.find("p", class_="star-rating")["class"][1]
        books.append([title, price, availability, rating])

    page += 1

df_books = pd.DataFrame(books, columns=["Title", "Price", "Availability", "Star Rating"])
df_books.to_csv("books.csv", index=False)

print("Total books scraped:", df_books.shape[0])
print("\nSample data:\n")
print(df_books.head())

Total books scraped: 1000

Sample data:

                                   Title    Price Availability Star Rating
0                   A Light in the Attic  Â£51.77     In stock       Three
1                     Tipping the Velvet  Â£53.74     In stock         One
2                             Soumission  Â£50.10     In stock         One
3                          Sharp Objects  Â£47.82     In stock        Four
4  Sapiens: A Brief History of Humankind  Â£54.23     In stock        Five


Q2: IMDB Top 250 Movies

**Note:**

This code uses Selenium to scrape IMDB Top 250 as requested.  

However, in Google Colab, headless Chrome does not fully render the JavaScript content on IMDB's page.  

As a result, **rows** is empty, and **df_imdb** contains 0 rows.  


In [24]:
!pip install selenium
!pip install chromedriver_autoinstaller

import chromedriver_autoinstaller
from selenium import webdriver
from selenium.webdriver.common.by import By
import pandas as pd
import time

chromedriver_autoinstaller.install()
options = webdriver.ChromeOptions()
options.add_argument("--headless")
options.add_argument("--no-sandbox")
options.add_argument("--disable-dev-shm-usage")

driver = webdriver.Chrome(options=options)
driver.get("https://www.imdb.com/chart/top/")
time.sleep(3)

movies = []
rows = driver.find_elements(By.CSS_SELECTOR, "table.chart.full-width tbody tr")

for row in rows:
    rank = int(row.find_element(By.CSS_SELECTOR, "td.titleColumn").text.split(".")[0])
    title = row.find_element(By.CSS_SELECTOR, "td.titleColumn a").text
    year = row.find_element(By.CSS_SELECTOR, "td.titleColumn span").text.strip("()")
    rating = row.find_element(By.CSS_SELECTOR, "td.imdbRating strong").text
    movies.append([rank, title, year, rating])

driver.quit()

df_imdb = pd.DataFrame(movies, columns=["Rank", "Title", "Year", "IMDB Rating"])
df_imdb.to_csv("imdb_top250.csv", index=False)

print("Total movies scraped:", df_imdb.shape[0])
print(df_imdb.head())

Total movies scraped: 0
Empty DataFrame
Columns: [Rank, Title, Year, IMDB Rating]
Index: []


Q3: Weather Information for Top World Cities

In [19]:
base_url = "https://www.timeanddate.com/weather/"
cities = ["usa/new-york", "uk/london", "japan/tokyo", "india/mumbai", "australia/sydney"]

weather_data = []

for city in cities:
    url = base_url + city
    r = requests.get(url)
    soup = BeautifulSoup(r.text, "html.parser")

    city_name = soup.find("h1").text.strip().replace("Weather in ", "")
    temp = soup.find("div", class_="h2").text.strip()
    condition = soup.find("div", class_="h2").find_next("p").text.strip().rstrip(".")

    weather_data.append([city_name, temp, condition])

df_weather = pd.DataFrame(weather_data, columns=["City", "Temperature", "Condition"])
df_weather.to_csv("weather.csv", index=False)
print(df_weather)

                                 City Temperature  \
0             New York, New York, USA       65 °F   
1     London, England, United Kingdom       54 °F   
2                        Tokyo, Japan       82 °F   
3          Mumbai, Maharashtra, India       81 °F   
4  Sydney, New South Wales, Australia       61 °F   

                      Condition  
0                         Clear  
1                         Sunny  
2  Thunderstorms. Broken clouds  
3              Scattered clouds  
4              Scattered clouds  
