### Merging datasets

In [137]:
import pandas as pd

def remove_year(string):
    return string.split('(')[0]

def get_year(date):
    return date.year

# Load datasets
imdb = pd.read_pickle('imdb.pkl')
rt = pd.read_pickle('rt_clean.pkl')
bom = pd.read_pickle('bom_clean.pkl')

# Remove years from bom titles
bom['title'] = bom['title'].apply(remove_year)

# Rename columns in imdb
imdb.rename(columns = {'primaryTitle': 'title', 'startYear': 'year'}, inplace = True)

# Make year column in Rotten Tomatoes
rt['year'] = rt['releaseDate'].apply(get_year)

# Lowercase titles
imdb_lower = imdb.copy()
imdb_lower['title'] = imdb['title'].str.lower()
rt['title'] = rt['title'].str.lower()
bom['title'] = bom['title'].str.lower()

# Merge datasets
df = rt.merge(imdb_lower, on = ['title', 'year'])
df = df.merge(bom, on = ['title', 'year'])

### Structuring data

In [138]:
# Drop NA
#df = df.dropna()

# Select years
#df = df[df['year'] >= 2013]

# Set index
df = df.set_index('tconst')

# Get titles from IMDB
imdb = imdb.set_index('tconst')
df['title'] = imdb['title']

df.to_pickle('full.pkl')

### Running scrape

In [139]:
movie_list = df['title'].tolist()

df['sentiment'] = sentiments

['Godzilla: King of the Monsters',
 'The Secret Life of Pets 2',
 'The Hustle',
 'The Sun Is Also a Star',
 "A Dog's Journey",
 'Booksmart',
 'Rocketman',
 'Brightburn',
 'The Last Black Man in San Francisco',
 'The Tomorrow Man',
 'Unplanned',
 'Ode to Joy',
 'Avengers: Endgame',
 'Poms',
 'The Souvenir',
 'Tolkien',
 'Sword of Trust',
 'Shazam!',
 'Alita: Battle Angel',
 'UglyDolls',
 'Long Shot',
 'Breakthrough',
 'Missing Link',
 'The Curse of La Llorona',
 'The Best of Enemies',
 'The Best of Enemies',
 'Armstrong',
 'Wonder Park',
 'The Aftermath',
 'May It Last: A Portrait of the Avett Brothers',
 'Hale County This Morning, This Evening',
 'Little',
 'The Quiet One',
 'Captain Marvel',
 'Captive State',
 'Us',
 'Five Feet Apart',
 'The Beach Bum',
 'No manches Frida 2',
 'The Mustang',
 'Framing John DeLorean',
 'How to Train Your Dragon: The Hidden World',
 "Isn't It Romantic",
 'Fighting with My Family',
 'Apollo 11',
 'Greta',
 'The Brink',
 'The Lego Movie 2: The Second Part

In [61]:
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline 

# Density plots
# data = df['audienceScore']
# sns.set_style('whitegrid')
# sns.kdeplot(np.array(data), bw = 5)

plt.figure(figsize = (10, 6))

sns.regplot(x = 'audienceScore', y = 'averageRating', data = df)

In [152]:
dfcopy = df.copy()

dfcopy = dfcopy.set_index('boWorldwide')

dfcopy

Unnamed: 0_level_0,actors,mpaaRating,synopsis,title,tomatoIcon,releaseDate,genres,directors,studio,tomatoMeter,...,titleType,isAdult,runtime,averageRating,numVotes,boRank,studioAcronym,totalTheaters,boOpening,openingTheaters
boWorldwide,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
110442744,"[Vera Farmiga, Ken Watanabe, Sally Hawkins]",PG13,The new story follows the heroic efforts of th...,Godzilla: King of the Monsters,rotten,2019-05-31,"[Action & Adventure, Science Fiction & Fantasy]",[Michael Dougherty],Warner Bros. Pictures,41,...,movie,0,132,6.3,66894,100,WB,4108,47776293.0,4108
157213045,"[Kevin Hart, Tiffany Haddish, Harrison Ford]",PG13,THE SECRET LIFE OF PETS 2 will follow summer 2...,The Secret Life of Pets 2,rotten,2019-06-07,"[Animation, Comedy]","[Chris Renaud, Jonathan Del Val]",Universal Pictures,59,...,movie,0,86,6.5,15017,42,Uni.,4564,46652680.0,4561
35417038,"[Rebel Wilson, Anne Hathaway, Alex Sharp]",PG13,Rebel Wilson and Anne Hathaway have winning ch...,The Hustle,rotten,2019-05-10,[Comedy],[Chris Addison],MGM,14,...,movie,0,93,5.3,14343,1,UAR,3077,13007709.0,3007
4950029,"[Yara Shahidi, Charles Melton, John Leguizamo]",PG13,College-bound romantic Daniel Bae and Jamaica-...,The Sun Is Also a Star,rotten,2019-05-17,"[Drama, Romance]",[Ry Russo-Young],Warner Bros. Pictures,51,...,movie,0,100,5.5,1713,733,WB,2073,2511530.0,2073
22546590,"[Marg Helgenberger, Betty Gilpin, Henry Lau]",PG13,Bailey (voiced again by Josh Gad) is living th...,A Dog's Journey,rotten,2019-05-17,"[Drama, Kids & Family]",[Gail Mancuso],Universal Pictures,49,...,movie,0,109,7.4,5296,409,Uni.,3279,8030085.0,3267
22680962,"[Kaitlyn Dever, Beanie Feldstein, Jessica Will...",PG13,The story follows Dever and Feldstein's charac...,Booksmart,certified_fresh,2019-05-24,[Comedy],[Olivia Wilde],,97,...,movie,0,102,7.4,31461,3,UAR,2518,6933620.0,2505
96139553,"[Taron Egerton, Jamie Bell, Richard Madden]",PG13,ROCKETMAN is an epic musical fantasy about the...,Rocketman,certified_fresh,2019-05-31,[Drama],[Dexter Fletcher],Paramount Pictures,89,...,movie,0,121,7.5,53862,104,Par.,3610,25725722.0,3610
17300439,"[Elizabeth Banks, David Denman, Jackson A. Dunn ]",PG13,What if a child from another world crash-lande...,Brightburn,rotten,2019-05-24,"[Horror, Science Fiction & Fantasy]",[David Yarovesky],,57,...,movie,0,90,6.2,33662,533,SGem,2607,7845658.0,2607
4508773,"[Jimmie Fails, Jonathan Majors, Tichina Arnold]",PG13,Jimmie Fails dreams of reclaiming the Victoria...,The Last Black Man in San Francisco,certified_fresh,2019-06-07,[Drama],[Joe Talbot],,93,...,movie,0,121,7.7,2503,23,A24,207,235272.0,7
354103,"[John Lithgow, Blythe Danner, Derek Cecil]",PG13,Ed Hemsler spends his life preparing for a dis...,The Tomorrow Man,rotten,2019-05-22,"[Drama, Romance]",[Noble Jones],Bleecker Street,44,...,movie,0,94,5.4,242,30,BST,207,18281.0,4
