In [1]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import pickle
import time

In [2]:
BASE_TMDB_URL = 'https://api.themoviedb.org/3'
TMDB_API_KEY = 'KEY HERE'

In [3]:
def fetch_tmdb_reviews(tmdb_id, api_key):
    url = f"{BASE_TMDB_URL}/movie/{tmdb_id}/reviews?api_key={api_key}&language=en-US"
    response = requests.get(url)
    reviews = response.json().get('results', [])
    review_data = []
    for review in reviews:
        review_data.append({
            'review_date': review.get('created_at'),
            'rating_of_movie': review.get('author_details', {}).get('rating'),
            'actual_review': review.get('content')
        })
    return review_data

def update_reviews(movies_df, reviews_df):
    updated_reviews = []
    for index, row in movies_df.iterrows():
        tmdb_id = row['movie_code']
        imdb_id = row['IMDb_code']
        new_reviews = fetch_tmdb_reviews(tmdb_id, TMDB_API_KEY)
        
        for new_review in new_reviews:
            review_date = new_review['review_date']
            rating_of_movie = new_review['rating_of_movie']
            actual_review = new_review['actual_review']
            
            existing_reviews = reviews_df[(reviews_df['IMDb_code'] == imdb_id) & 
                                          (reviews_df['review_date'] == review_date)]
            
            if existing_reviews.empty:
                # If no existing review, add a new row
                reviews_df = reviews_df.append({
                    'IMDb_code': imdb_id,
                    'review_date': review_date,
                    'rating_of_movie': rating_of_movie,
                    'actual_review': actual_review
                }, ignore_index=True)
                updated_reviews.append((row['Title'], actual_review))
                print(f"Added new review for: {row['Title']}")
            else:
                # Updates the data if needed
                existing_review = existing_reviews.iloc[0]
                if (existing_review['actual_review'] != actual_review):
                    reviews_df.loc[existing_reviews.index, 'actual_review'] = actual_review
                    updated_reviews.append((row['Title'], actual_review))
                    print(f"Updated review for: {row['Title']}")
            
            time.sleep(1)
    
    # keep log of changes
    if updated_reviews:
        with open('update_log.txt', 'w') as log_file:
            for title, review in updated_reviews:
                log_file.write(f"Updated review for {title}: {review}\n")
        print("Update log saved to update_log.txt")
    else:
        print("No new reviews found.")
    
    return reviews_df, updated_reviews

In [None]:
movies_df = pd.read_pickle('movies_data.pkl')
reviews_df = pd.read_pickle('reviews_data.pkl')
    
reviews_df, updated_reviews = update_reviews(movies_df, reviews_df)
    
# Save updated DF, dont forget to upload the updated file for analysis instead of the first iteration
reviews_df.to_pickle('updated_reviews_data.pkl')

In [8]:
if __name__ == "__main__":
    movies_df = pd.read_pickle('movies_data.pkl')
    reviews_df = pd.read_pickle('reviews_data.pkl')
    
    reviews_df, updated_reviews = update_reviews(movies_df, reviews_df)
    
    reviews_df.to_pickle('updated_reviews_data.pkl')

No new reviews found.
