In [None]:
#pip install pytrends

In [None]:
import pandas as pd
from pytrends.request import TrendReq
import time
import json


In [26]:
# Read the CSV file with movie titles. Adjust the file path as needed.
movies_df = pd.read_csv("omdb_movie_data.csv")  # CSV must include a column named "title"
movies_df.head()

Unnamed: 0,Title,Year,IMDb Rating,IMDb Votes
0,My Little Pony: A New Generation,2021,6.8,4730
1,The Starling,2021,6.4,15804
2,Je Suis Karl,2021,5.6,1624
3,Confessions of an Invisible Girl,2021,5.3,1438
4,Intrusion,2021,5.3,22287


In [None]:


# Initialize an empty list to collect result dataframes.
all_data = []

# Set up pytrends with the desired settings (using a timeout for connection and read)
pytrends = TrendReq(hl='en-US', tz=360, timeout=(30,60))

# Loop through each movie title in the CSV.
for title in movies_df['Title']:
    kw_list = [title]
    try:
        # Build payload with the movie category (cat=78 for movies)
        pytrends.build_payload(kw_list, cat=78, timeframe='today 5-y', geo='', gprop='')
        
        # Sleep to allow Google to recover (adjust timing based on your setup)
        time.sleep(15)
        
        # Query interest over time
        data = pytrends.interest_over_time()
        
        # Check if returned data is not empty
        if not data.empty:
            # Optional: Add a column to keep track of the movie title
            data['movie_title'] = title
            # Append the dataframe to our list
            all_data.append(data)
        else:
            print(f"No data returned for title: {title}")
        
    except Exception as e:
        # If an error occurs (e.g., rate limiting), output the error with the movie title.
        print(f"Error processing title '{title}': {e}")

# Combine all individual dataframes into one final dataframe
if all_data:  # make sure there is at least one dataframe to concatenate
    final_df = pd.concat(all_data)
    # Optionally, save the combined dataframe to a new CSV file
    final_df.to_csv("trend_results.csv")
    print("Combined DataFrame:")
    print(final_df.head())
else:
    print("No data was collected.")

No data returned for title: My Little Pony: A New Generation
No data returned for title: Je Suis Karl
No data returned for title: Confessions of an Invisible Girl
No data returned for title: The Father Who Moves Mountains
No data returned for title: A StoryBots Space Adventure


KeyboardInterrupt: 

In [None]:
all_data

In [None]:
normalized_data = []

for df in all_data:
    if not df.empty:
        df = df.reset_index()  # Move 'date' from index to a column
        df = df.rename(columns={df.columns[1]: 'trend'})  # Rename movie title column to 'trend'
        df = df[['date', 'movie_title', 'trend']]  # Keep only needed columns
        normalized_data.append(df)

In [None]:
final_df = pd.concat(normalized_data, ignore_index=True)
final_df = final_df.sort_values(by=['movie_title', 'date']).reset_index(drop=True)

In [None]:
grouped_json = final_df.groupby('movie_title').apply(
    lambda x: x.assign(date=x['date'].dt.strftime('%Y-%m-%d'))[['date', 'trend']].to_dict(orient='records')
).to_dict()

In [None]:
final_df.to_csv('movie_trends.csv', index=False)

In [None]:
with open('movie_trends.json', 'w') as f:
    json.dump(grouped_json, f, indent=4)