In [None]:
import requests
from bs4 import BeautifulSoup
import csv
import time
import pandas as pd

In [None]:
url = "https://www.imdb.com/india/top-rated-indian-movies/"

In [None]:
#HEADERS = {"usser":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/138.0.0.0 Safari/537.36"}
HEADERS = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
                  "AppleWebKit/537.36 (KHTML, like Gecko) "
                  "Chrome/115.0.0.0 Safari/537.36",
    "Accept-Language": "en-US,en;q=0.9",
    "Referer": "https://www.google.com/"
}

In [None]:
def fetch_url(url):
  try:
    response = requests.get(url, headers = HEADERS, timeout = 20)
    response.raise_for_status()
    return response.text
  except requests.exceptions.HTTPError as http_error:
    print(f"HTTP Error occured: {http_error}")
  except requests.exceptions.RequestException as request_error:
    print(f"Request Error occured: {request_error}")
  return None

In [None]:
def parse_movies(html):
  soup = BeautifulSoup(html, "html.parser")
  movies = []

  movie_blocks = soup.find_all("div", class_="ipc-metadata-list-item__content-container")

  if not movie_blocks:
    print("Could not find movie table, Site structure might have changed")
    return movies

  for movie in movie_blocks:
    title = movie.find("span", attrs={"data-testid":"rank-list-item-title"}).find("span").extract() or None
    title = movie.find("span", attrs={"data-testid":"rank-list-item-title"}).text.strip()

    rating_tag = movie.find("span", class_="ipc-rating-star--rating")
    rating = rating_tag.text.strip() if rating_tag else "N/A"

    movies.append({
        "title": title,
        "rating": rating
    })

  return movies

In [None]:
def save_to_csv(movies):
  df = pd.DataFrame(movies)
  df.to_csv("movies.csv", index=False)

In [None]:
def main():
    print("Fetching IMDb Top 250 Indian movies...")
    html_content = fetch_url(url)

    if not html_content:
        print("Failed to fetch the webpage.")
        return

    print("Parsing movie data...")
    movies = parse_movies(html_content)

    if not movies:
        print("No movies found. Exiting.")
        return

    print("Saving data to CSV...")
    save_to_csv(movies)

    print("Data fetching, parsing, and saving completed successfully.")

if __name__ == "__main__":
    main()

Fetching IMDb Top 250 Indian movies...
Parsing movie data...
Saving data to CSV...
Data fetching, parsing, and saving completed successfully.
