# Rotten Tomatoes Movies Rating Prediction
source: https://platform.stratascratch.com/data-projects/rotten-tomatoes-movies-rating-prediction

In [None]:
import pandas as pd
from ydata_profiling import ProfileReport

### (1) Load datasets

In [30]:
movies = pd.read_csv("./datasets/rotten_tomatoes_movies.csv")
print(movies.info())

# Is the data set unique by rotten_tomatoes_link? - Yes
assert movies.drop_duplicates(["rotten_tomatoes_link"]).shape[0] == movies.shape[0]

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 17712 entries, 0 to 17711
Data columns (total 22 columns):
 #   Column                            Non-Null Count  Dtype  
---  ------                            --------------  -----  
 0   rotten_tomatoes_link              17712 non-null  object 
 1   movie_title                       17712 non-null  object 
 2   movie_info                        17391 non-null  object 
 3   critics_consensus                 9134 non-null   object 
 4   content_rating                    17712 non-null  object 
 5   genres                            17693 non-null  object 
 6   directors                         17518 non-null  object 
 7   authors                           16170 non-null  object 
 8   actors                            17360 non-null  object 
 9   original_release_date             16546 non-null  object 
 10  streaming_release_date            17328 non-null  object 
 11  runtime                           17398 non-null  float64
 12  prod

In [27]:
critics = pd.read_csv("./datasets/rotten_tomatoes_critic_reviews_50k.csv")
critics.info()
critics

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 50000 entries, 0 to 49999
Data columns (total 8 columns):
 #   Column                Non-Null Count  Dtype 
---  ------                --------------  ----- 
 0   rotten_tomatoes_link  50000 non-null  object
 1   critic_name           48068 non-null  object
 2   top_critic            50000 non-null  bool  
 3   publisher_name        50000 non-null  object
 4   review_type           50000 non-null  object
 5   review_score          36409 non-null  object
 6   review_date           50000 non-null  object
 7   review_content        41195 non-null  object
dtypes: bool(1), object(7)
memory usage: 2.7+ MB


Unnamed: 0,rotten_tomatoes_link,critic_name,top_critic,publisher_name,review_type,review_score,review_date,review_content
0,m/0814255,Andrew L. Urban,False,Urban Cinefile,Fresh,,2010-02-06,A fantasy adventure that fuses Greek mythology...
1,m/0814255,Louise Keller,False,Urban Cinefile,Fresh,,2010-02-06,"Uma Thurman as Medusa, the gorgon with a coiff..."
2,m/0814255,,False,FILMINK (Australia),Fresh,,2010-02-09,With a top-notch cast and dazzling special eff...
3,m/0814255,Ben McEachen,False,Sunday Mail (Australia),Fresh,3.5/5,2010-02-09,Whether audiences will get behind The Lightnin...
4,m/0814255,Ethan Alter,True,Hollywood Reporter,Rotten,,2010-02-10,What's really lacking in The Lightning Thief i...
...,...,...,...,...,...,...,...,...
49995,m/1110242-collateral_damage,Chris Hewitt,False,St. Paul Pioneer Press,Rotten,,2002-02-07,The smarter an Arnold Schwarzenegger movie tri...
49996,m/1110242-collateral_damage,Nick Carter,False,Milwaukee Journal Sentinel,Rotten,,2002-02-07,A relative letdown.
49997,m/1110242-collateral_damage,Gary Dowell,True,Dallas Morning News,Rotten,,2002-02-07,The movie straddles the fence between escapism...
49998,m/1110242-collateral_damage,Walter Chaw,False,Film Freak Central,Rotten,0/4,2002-02-07,Enough similarities to Gymkata and Howie Long'...


### (2) EDA

In [31]:
movies.describe()

Unnamed: 0,runtime,tomatometer_rating,tomatometer_count,audience_rating,audience_count,tomatometer_top_critics_count,tomatometer_fresh_critics_count,tomatometer_rotten_critics_count
count,17398.0,17668.0,17668.0,17416.0,17415.0,17712.0,17712.0,17712.0
mean,102.214048,60.884763,57.139801,60.55426,143940.1,14.586326,36.374831,20.703139
std,18.702511,28.443348,68.370047,20.543369,1763577.0,15.146349,52.601038,30.248435
min,5.0,0.0,5.0,0.0,5.0,0.0,0.0,0.0
25%,90.0,38.0,12.0,45.0,707.5,3.0,6.0,3.0
50%,99.0,67.0,28.0,63.0,4277.0,8.0,16.0,8.0
75%,111.0,86.0,75.0,78.0,24988.0,23.0,44.0,24.0
max,266.0,100.0,574.0,100.0,35797640.0,69.0,497.0,303.0


In [43]:
movies.isna().sum()

rotten_tomatoes_link                   0
movie_title                            0
movie_info                           321
critics_consensus                   8578
content_rating                         0
genres                                19
directors                            194
authors                             1542
actors                               352
original_release_date               1166
streaming_release_date               384
runtime                              314
production_company                   499
tomatometer_status                    44
tomatometer_rating                    44
tomatometer_count                     44
audience_status                      448
audience_rating                      296
audience_count                       297
tomatometer_top_critics_count          0
tomatometer_fresh_critics_count        0
tomatometer_rotten_critics_count       0
dtype: int64

In [40]:
catVars = movies.select_dtypes("object").columns.to_list()

for var in catVars:
    print(f"{var} {movies[var].nunique()} : {movies[var].unique()[0:10]}")

rotten_tomatoes_link 17712 : ['m/0814255' 'm/0878835' 'm/10' 'm/1000013-12_angry_men'
 'm/1000079-20000_leagues_under_the_sea' 'm/10000_bc' 'm/1000121-39_steps'
 'm/1000123-310_to_yuma' 'm/10002008-charly' 'm/1000204-abraham_lincoln']
movie_title 17106 : ['Percy Jackson & the Olympians: The Lightning Thief' 'Please Give' '10'
 '12 Angry Men (Twelve Angry Men)' '20,000 Leagues Under The Sea'
 '10,000 B.C.' 'The 39 Steps' '3:10 to Yuma' 'Charly (A Heartbeat Away)'
 'Abraham Lincoln']
movie_info 17389 : ["Always trouble-prone, the life of teenager Percy Jackson (Logan Lerman) gets a lot more complicated when he learns he's the son of the Greek god Poseidon. At a training ground for the children of deities, Percy learns to harness his divine powers and prepare for the adventure of a lifetime: he must prevent a feud among the Olympians from erupting into a devastating war on Earth, and rescue his mother from the clutches of Hades, god of the underworld."
 "Kate (Catherine Keener) and her hu

### (3) 