In [1]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

URL_PATTERN = "https://books.toscrape.com/catalogue/page-{}.html"

all_books = []

for pg in range(1, 51): 
    page_url = URL_PATTERN.format(pg)
    response = requests.get(page_url)

    if response.status_code != 200:
        print(f"Page {pg} could not be loaded.")
        break

    soup = BeautifulSoup(response.content, "html.parser")

    for book in soup.select("article.product_pod"):
        book_title = book.h3.a["title"]
        book_price = book.select_one("p.price_color").text.strip()
        stock_status = book.select_one("p.instock.availability").get_text(strip=True)
        
        rating_tag = book.select_one("p.star-rating")
        star_text = [c for c in rating_tag["class"] if c != "star-rating"][0]

        all_books.append({
            "Title": book_title,
            "Price": book_price,
            "Availability": stock_status,
            "Star Rating": star_text
        })
df = pd.DataFrame(all_books)
df.to_csv("books.csv", index=False)

print("Scraping finished! Total books:", len(df))
print(df.sample(5)) 


Scraping finished! Total books: 1000
                                                 Title   Price Availability  \
308                   Eligible (The Austen Project #4)  £27.09     In stock   
436  If I Gave You God's Phone Number....: Searchin...  £20.91     In stock   
799              One for the Money (Stephanie Plum #1)  £32.87     In stock   
691                                       'Salem's Lot  £49.56     In stock   
508              The Bourne Identity (Jason Bourne #1)  £42.78     In stock   

    Star Rating  
308       Three  
436         One  
799         Two  
691        Four  
508        Four  


In [1]:
import pandas as pd
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from bs4 import BeautifulSoup
import time
import random
import tempfile

options = Options()
options.add_argument("--headless")
options.add_argument("--no-sandbox")
options.add_argument("--disable-dev-shm-usage")
options.add_argument(f"--user-data-dir={tempfile.mkdtemp()}")
options.add_argument("--user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36")
options.add_experimental_option("excludeSwitches", ["enable-automation"])

driver = webdriver.Chrome(options=options)
driver.get("https://www.imdb.com/chart/top/")
time.sleep(random.uniform(3, 5))

soup = BeautifulSoup(driver.page_source, "html.parser")
driver.quit()

movies = []
for rank, row in enumerate(soup.select("li.ipc-metadata-list-summary-item")[:250], 1):
    title_elem = row.select_one("h3.ipc-title__text")
    title = title_elem.text.split('.', 1)[1].strip() if title_elem and '.' in title_elem.text else "N/A"
    year_elem = row.select_one("span.sc-b0691f29-8, span.cli-title-metadata-item")
    year = year_elem.text.strip("()") if year_elem else "N/A"
    rating_elem = row.select_one("span.ipc-rating-star--rating")
    rating = rating_elem.text if rating_elem else "N/A"
    movies.append([rank, title, year, rating])

pd.DataFrame(movies, columns=["Rank", "Title", "Year", "Rating"]).to_csv("imdb_top250.csv", index=False)



In [None]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import time
import random

headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"
}

base_url = "https://www.timeanddate.com"
start_url = f"{base_url}/weather/india"

resp = requests.get(start_url, headers=headers)
soup = BeautifulSoup(resp.content, "html.parser")

city_links = []
for a in soup.select("section tbody tr td a"):
    href = a.get("href")
    if href and href.startswith("/weather/india/"):
        city_links.append(base_url + href)

print("Found", len(city_links), "city links")

all_cities = []
for link in city_links:
    time.sleep(random.uniform(1, 2))  
    r = requests.get(link, headers=headers)
    s = BeautifulSoup(r.content, "html.parser")

    city = s.find("h1").get_text(strip=True).replace("Weather", "").strip()
    temp = s.find("div", class_="h2").get_text(strip=True)
    cond = s.find("div", id="qlook").p.get_text(strip=True)

    all_cities.append([city, temp, cond])

df = pd.DataFrame(all_cities, columns=["City", "Temperature", "Condition"])
df.to_csv("weather.csv", index=False)

print("Scraped weather data for", len(df), "cities.")
print(df.head())


Found 3 city links
Scraped weather data for 3 cities.
                               City Temperature          Condition
0    in Bhubaneshwar, Odisha, India       34 °C  Scattered clouds.
1         in Gangtok, Sikkim, India        7 °C        Quite cool.
2  in Courtallam, Tamil Nadu, India       31 °C  Scattered clouds.
