In [21]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

In [22]:
# List of IMDb IDs for the selected movies (including additional 6 movies)
movie_ids = [
    'tt0111161',  # The Shawshank Redemption
    'tt0068646',  # The Godfather
    'tt0468569',  # The Dark Knight
    'tt0110912',  # Pulp Fiction
    'tt0167260',  # The Lord of the Rings: The Return of the King
    'tt0108052',  # Schindler's List
    'tt0137523',  # Fight Club
    'tt0109830',  # Forrest Gump
    'tt1375666',  # Inception
    'tt0133093',  # The Matrix
    'tt0185183',  # Battlefield Earth
    'tt0804492',  # The Hottie & the Nottie
    'tt2724064',  # Sharknado
    'tt0317676',  # House of the Dead
    'tt0368226',  # The Room
    'tt2103085'   # Run for Your Wife
]

# Initialize an empty list to store the review data
all_review_data = []

In [23]:
# Base URL for IMDb reviews
base_url = 'https://www.imdb.com/title/{}/reviews?ref_=tt_urv'

# User-Agent header to mimic a real browser
headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'
}

# Loop through each movie ID and scrape reviews
for movie_id in movie_ids:
    url = base_url.format(movie_id)
    response = requests.get(url, headers=headers)

    if response.status_code == 200:
        html_content = response.text
        soup = BeautifulSoup(html_content, 'html.parser')

        # Extract the movie title using a more reliable selector
        movie_title = soup.find('div', class_='parent').find('a').get_text(strip=True)

        # Find and extract reviews
        reviews = soup.find_all('div', class_='text show-more__control')

        if reviews:
            for review in reviews:
                review_text = review.get_text(strip=True)
                # Append a dictionary with the movie title and review to the list
                all_review_data.append({"Movie Title": movie_title, "Review": review_text})
        else:
            print(f"No reviews found for {movie_title}.")
    else:
        print(f"Failed to retrieve reviews for movie ID: {movie_id}. Status code: {response.status_code}")

# Convert the list of dictionaries to a DataFrame
df = pd.DataFrame(all_review_data)

# Save the DataFrame to a CSV file
df.to_csv('imdb_movie_reviews.csv', index=False)

print(f"Reviews for the selected movies have been saved to 'imdb_movie_reviews.csv'.")

Reviews for the selected movies have been saved to 'imdb_movie_reviews.csv'.
