# Notebook for Rotten Tomatoes reviews adapted to our site

In [53]:
import pandas as pd

## Load the two CSV files : movies.csv and reviews.csv, in a dataframe

In [54]:
movies_csv_path = "data/movies.csv"
reviews_csv_path = "data/reviews.csv"

movies_df = pd.read_csv(movies_csv_path)
reviews_df = pd.read_csv(reviews_csv_path)

## Merge the two DataFrames into a common key (film title)

In [55]:
merged_df = reviews_df.merge(
    movies_df,
    left_on="movie_title",  # Reviews file column
    right_on="name",  # Column in movies file
    how="inner"  # Merge matches only
)

## Select only the columns you need

In [56]:
final_df = merged_df[[
    "id",                
    "critic_name",       
    "review_content",    
    "review_date"        
]]

## Check that columns do not contain empty values

In [57]:
if final_df.isnull().any().any():
    print(final_df.isnull().sum())
    final_df = final_df.dropna()
    
# Rename the columns to match the desired structure
final_df.columns = ["room", "userId", "message", "timestamp"]

id                     0
critic_name        57296
review_content    205150
review_date            0
dtype: int64


Sort the DataFrame by id (room) in ascending order

In [58]:
final_df = final_df.sort_values(by="room")

# Add a column `__v` with the value 0 for each row to match mongoDB

In [59]:
final_df["__v"] = 0

## Comparison between the initial and final reviews

In [60]:
reviews_df

Unnamed: 0,rotten_tomatoes_link,movie_title,critic_name,top_critic,publisher_name,review_type,review_score,review_date,review_content
0,m/0814255,Percy Jackson & the Olympians: The Lightning T...,Andrew L. Urban,False,Urban Cinefile,Fresh,,2010-02-06,A fantasy adventure that fuses Greek mythology...
1,m/0814255,Percy Jackson & the Olympians: The Lightning T...,Louise Keller,False,Urban Cinefile,Fresh,,2010-02-06,"Uma Thurman as Medusa, the gorgon with a coiff..."
2,m/0814255,Percy Jackson & the Olympians: The Lightning T...,,False,FILMINK (Australia),Fresh,,2010-02-09,With a top-notch cast and dazzling special eff...
3,m/0814255,Percy Jackson & the Olympians: The Lightning T...,Ben McEachen,False,Sunday Mail (Australia),Fresh,3.5/5,2010-02-09,Whether audiences will get behind The Lightnin...
4,m/0814255,Percy Jackson & the Olympians: The Lightning T...,Ethan Alter,True,Hollywood Reporter,Rotten,,2010-02-10,What's really lacking in The Lightning Thief i...
...,...,...,...,...,...,...,...,...,...
1129882,m/zulu_dawn,Zulu Dawn,Chuck O'Leary,False,Fantastica Daily,Rotten,2/5,2005-11-02,
1129883,m/zulu_dawn,Zulu Dawn,Ken Hanke,False,"Mountain Xpress (Asheville, NC)",Fresh,3.5/5,2007-03-07,"Seen today, it's not only a startling indictme..."
1129884,m/zulu_dawn,Zulu Dawn,Dennis Schwartz,False,Dennis Schwartz Movie Reviews,Fresh,B+,2010-09-16,A rousing visual spectacle that's a prequel of...
1129885,m/zulu_dawn,Zulu Dawn,Christopher Lloyd,False,Sarasota Herald-Tribune,Rotten,3.5/5,2011-02-28,"A simple two-act story: Prelude to war, and th..."


In [61]:
final_df

Unnamed: 0,room,userId,message,timestamp,__v
2109941,1000002,Alex Sandell,Makes Demi Moore's later crappy movies look li...,2005-09-28,0
2110011,1000002,Mike Massie,"It's surprising how rapidly the film devolves,...",2020-09-06,0
2110001,1000002,Dennis Schwartz,"A dreadful sci-fi film, that plays to the larg...",2020-07-15,0
2109991,1000002,Variety Staff,Parasite is lethargic between its terror scene...,2009-03-26,0
2109911,1000002,Film4 Staff,Parasite is one of the genre greats and somewh...,2003-05-24,0
...,...,...,...,...,...
3704219,1941533,Charles Mudede,Without is the region's first erotic tour de f...,2020-05-27,0
3704225,1941533,Rob Hunter,"This is also Jackson's feature debut, and like...",2020-05-27,0
3704231,1941533,Kim Voynar,"Jackson shows a lot of promise as a director, ...",2020-06-11,0
3704189,1941533,Erik McClanahan,"Watch this film closely, and you will be rewar...",2020-05-22,0


## Export the combined DataFrame to a CSV file

In [62]:
output_path = "final_reviews.csv"
final_df.to_csv(output_path, index=False)

print(f"Combined file created : {output_path}")

Combined file created : final_reviews.csv
