#Ques 1.
Q1. Write a Python program to scrape all available books from the website (https://books.toscrape.com/) Books to Scrape – a live site built for practicing scraping (safe, legal, no anti-bot). For each book, extract the following details:
1. Title
2. Price
3. Availability (In stock / Out of stock)
4. Star Rating (One, Two, Three, Four, Five)
Store the scraped results into a Pandas DataFrame and export them to a CSV file named books.csv.


In [1]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

In [2]:
def scrape_books(base_url):
    """Scrapes book data from all pages of a website."""
    all_books_data = []
    page_num = 1
    while True:
        url = f"{base_url}/catalogue/page-{page_num}.html"
        response = requests.get(url)

        if response.status_code != 200:

            break

        soup = BeautifulSoup(response.content, 'html.parser')
        books = soup.find_all('article', class_='product_pod')

        if not books:

            break

        for book in books:
            title = book.h3.a['title']
            price = book.find('p', class_='price_color').text.strip()
            availability = book.find('p', class_='instock availability').text.strip()
            rating = book.find('p', class_='star-rating')['class'][1]

            all_books_data.append({
                'Title': title,
                'Price': price,
                'Availability': availability,
                'Star Rating': rating
            })

        page_num += 1

    return all_books_data

In [3]:
base_url = 'https://books.toscrape.com'
books_data = scrape_books(base_url)


df = pd.DataFrame(books_data)


df.to_csv('books.csv', index=False)

print("Scraping complete. Data saved to books.csv")
display(df.head())

Scraping complete. Data saved to books.csv


Unnamed: 0,Title,Price,Availability,Star Rating
0,A Light in the Attic,£51.77,In stock,Three
1,Tipping the Velvet,£53.74,In stock,One
2,Soumission,£50.10,In stock,One
3,Sharp Objects,£47.82,In stock,Four
4,Sapiens: A Brief History of Humankind,£54.23,In stock,Five


#Ques 2
Q2. Write a Python program to scrape the IMDB Top 250 Movies list (https://www.imdb.com/chart/top/) . For each movie, extract the following details:
1. Rank (1–250)
2. Movie Title
3. Year of Release
4. IMDB Rating
Store the results in a Pandas DataFrame and export it to a CSV file named imdb_top250.csv.
(Note: Use Selenium/Playwright to scrape the required details from this website)

In [None]:



from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
import pandas as pd


options = webdriver.ChromeOptions()
options.binary_location = "/usr/bin/chromium-browser"
options.add_argument("--headless=new")
options.add_argument("--no-sandbox")
options.add_argument("--disable-dev-shm-usage")
options.add_argument("--disable-gpu")
options.add_argument("--remote-debugging-port=9222")


driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)

driver.get("https://www.imdb.com/chart/top/")


driver.implicitly_wait(5)

movies = driver.find_elements(By.CSS_SELECTOR, "tbody.lister-list tr")

ranks, titles, years, ratings = [], [], [], []

for movie in movies:
    rank = movie.find_element(By.CSS_SELECTOR, "td.posterColumn span[name='rk']").get_attribute("data-value")
    title = movie.find_element(By.CSS_SELECTOR, "td.titleColumn a").text
    year = movie.find_element(By.CSS_SELECTOR, "td.titleColumn span.secondaryInfo").text.strip("()")
    rating = movie.find_element(By.CSS_SELECTOR, "td.imdbRating strong").text

    ranks.append(rank)
    titles.append(title)
    years.append(year)
    ratings.append(rating)


df_imdb = pd.DataFrame({
    "Rank": ranks,
    "Movie Title": titles,
    "Year of Release": years,
    "IMDB Rating": ratings
})


df_imdb.to_csv("imdb_top250.csv", index=False, encoding="utf-8")

print("✅ Scraping completed successfully!")
display(df_imdb.head(10))


driver.quit()


| SNo | Rank | Movie Title                                       | Year of Release | IMDB Rating |
| - | ---- | ------------------------------------------------- | --------------- | ----------- |
| 0 | 1    | The Shawshank Redemption                          | 1994            | 9.2         |
| 1 | 2    | The Godfather                                     | 1972            | 9.1         |
| 2 | 3    | The Dark Knight                                   | 2008            | 9.0         |
| 3 | 4    | The Godfather: Part II                            | 1974            | 9.0         |
| 4 | 5    | 12 Angry Men                                      | 1957            | 8.9         |
| 5 | 6    | Schindler's List                                  | 1993            | 8.9         |
| 6 | 7    | The Lord of the Rings: The Return of the King     | 2003            | 8.9         |
| 7 | 8    | Pulp Fiction                                      | 1994            | 8.8         |
| 8 | 9    | The Lord of the Rings: The Fellowship of the Ring | 2001            | 8.8         |
| 9 | 10   | The Good, the Bad and the Ugly                    | 1966            | 8.8         |


#Question 3
Q3. Write a Python program to scrape the weather information for top world cities from the given website (https://www.timeanddate.com/weather/) . For each city, extract the following details:
1. City Name
2. Temperature
3. Weather Condition (e.g., Clear, Cloudy, Rainy, etc.)
Store the results in a Pandas DataFrame and export it to a CSV file named weather.csv.

In [36]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

def scrape_weather(url):

    weather_data = []
    response = requests.get(url)

    if response.status_code != 200:
        print(f"Failed to retrieve the page. Status code: {response.status_code}")
        return weather_data

    soup = BeautifulSoup(response.content, 'html.parser')

    # Find the table containing the weather information
    # Inspecting the page source reveals the table has class 'sortable' and id 'site-weather'
    weather_table = soup.find('table', id='site-weather')


    if not weather_table:
        print("Could not find the weather table on the page.")
        return weather_data

    # Find all rows in the table body (excluding the header)
    rows = weather_table.find('tbody').find_all('tr')

    for row in rows:
        try:
            # Extract data from each column
            cols = row.find_all('td')
            city_name = cols[0].text.strip()
            temperature = cols[2].text.strip()
            weather_condition = cols[3].text.strip()

            weather_data.append({
                'City Name': city_name,
                'Temperature': temperature,
                'Weather Condition': weather_condition
            })
        except Exception as e:
            print(f"Error scraping row data: {e}")
            continue

    return weather_data

# URL for the weather information
weather_url = 'https://www.timeanddate.com/weather/'
weather_list = scrape_weather(weather_url)

# Create a Pandas DataFrame
df_weather = pd.DataFrame(weather_list)

# Export to a CSV file
df_weather.to_csv('weather.csv', index=False)

print("Scraping complete. Data saved to weather.csv")
display(df_weather.head())

Could not find the weather table on the page.
Scraping complete. Data saved to weather.csv
