In [1]:
import requests
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from bs4 import BeautifulSoup as BS
from lxml import html
import re
import difflib
from functools import partial
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)

In [2]:
pip install fuzzywuzzy

Note: you may need to restart the kernel to use updated packages.


In [3]:
from fuzzywuzzy import fuzz
from fuzzywuzzy import process



In [4]:
imdbdf = pd.read_csv("../data/imdbdf.csv")

In [5]:
imdbdf = imdbdf.drop("Unnamed: 0",axis=1)

In [6]:
imdbdf.loc[(imdbdf['Title'] == 'SMILE')]

Unnamed: 0,Title,Release Year,Rating,Runtime,IMDB Score,Metascore
3,SMILE,2022.0,R,115.0,6.6,68
1006,SMILE,2009.0,Not Rated,84.0,3.3,NO METASCORE
1698,SMILE,2022.0,NO RATING,,3.8,NO METASCORE


In [7]:
imdbdf = imdbdf.drop(1698)

In [8]:
imdbdf.loc[(imdbdf['Title'] == 'SMILE')]

Unnamed: 0,Title,Release Year,Rating,Runtime,IMDB Score,Metascore
3,SMILE,2022.0,R,115.0,6.6,68
1006,SMILE,2009.0,Not Rated,84.0,3.3,NO METASCORE


In [9]:
imdbdf['Release Year'] = pd.to_numeric(imdbdf['Release Year'])
imdbdf.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 9999 entries, 0 to 9999
Data columns (total 6 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   Title         9999 non-null   object 
 1   Release Year  9607 non-null   float64
 2   Rating        9999 non-null   object 
 3   Runtime       9234 non-null   float64
 4   IMDB Score    9999 non-null   object 
 5   Metascore     9999 non-null   object 
dtypes: float64(2), object(4)
memory usage: 546.8+ KB


In [10]:
imdbdf = imdbdf[imdbdf['Release Year'].notna()]

In [11]:
imdbdf.index = np.arange(1, len(imdbdf) + 1)
imdbdf['Title'] = imdbdf['Title'].str.upper()

In [12]:
imdbdf['Title'] = imdbdf['Title'].fillna(value=1408)

In [13]:
imdbdf.head(300)

Unnamed: 0,Title,Release Year,Rating,Runtime,IMDB Score,Metascore
1,THE MENU,2022.0,R,107.0,7.5,71
2,X,2022.0,R,105.0,6.6,79
3,BONES AND ALL,2022.0,R,131.0,7.3,74
4,SMILE,2022.0,R,115.0,6.6,68
5,BARBARIAN,2022.0,R,102.0,7.1,78
6,BHEDIYA,2022.0,NO RATING,156.0,7.7,NO METASCORE
7,NOPE,2022.0,R,130.0,6.9,77
8,THE MEAN ONE,2022.0,NO RATING,93.0,6.3,32
9,TERRIFIER 2,2022.0,Not Rated,138.0,6.3,59
10,PEARL,2022.0,R,103.0,7.0,73


In [14]:
wwbo = pd.read_excel("../data/thenumbers.xlsx", sheet_name=('All Time Worldwide Box Office'))
wwbo.index = np.arange(1, len(wwbo) + 1)
wwbo = wwbo.rename(columns={'Movie' : 'Title', 'Released' : 'Release Year'})
wwbo['Worldwide']=wwbo['Worldwide'].apply('{:,}'.format)
wwbo['Domestic']=wwbo['Domestic'].apply('{:,}'.format)
wwbo['International']=wwbo['International'].apply('{:,}'.format)

In [15]:
wwbo['Title'] = wwbo['Title'].str.upper()

In [16]:
wwbo

Unnamed: 0,Release Year,Title,Worldwide,Domestic,International
1,2017,IT,701012746,328828874.0,372183872.0
2,1975,JAWS,482947378,272257035.0,210690343.0
3,2007,I AM LEGEND,585532684,256393010.0,329139674.0
4,1973,THE EXORCIST,428214478,230347346.0,197867132.0
5,2019,IT: CHAPTER TWO,467563955,211593228.0,255970727.0
...,...,...,...,...,...
1851,2018,MANIAC TALES,155,,155.0
1852,1933,THE INVISIBLE MAN,132,,132.0
1853,2016,CAPTURE KILL RELEASE,72,,72.0
1854,2021,SLUMBER PARTY MASSACRE,28,,28.0


In [17]:
#df[['a', 'b']] = df[['a','b']].fillna(value=0)

wwbo['Title'] = wwbo['Title'].fillna(value=1408)

In [18]:
wwbo.head(100)

Unnamed: 0,Release Year,Title,Worldwide,Domestic,International
1,2017,IT,701012746,328828874.0,372183872.0
2,1975,JAWS,482947378,272257035.0,210690343.0
3,2007,I AM LEGEND,585532684,256393010.0,329139674.0
4,1973,THE EXORCIST,428214478,230347346.0,197867132.0
5,2019,IT: CHAPTER TWO,467563955,211593228.0,255970727.0
6,2018,A QUIET PLACE,334876670,188024361.0,146852309.0
7,2017,GET OUT,252297405,175837935.0,76459470.0
8,2001,HANNIBAL,350100280,165092266.0,185008014.0
9,2021,A QUIET PLACE: PART II,296650356,160215764.0,136434592.0
10,2018,HALLOWEEN,255416089,159366015.0,96050074.0


In [19]:
tdbo = pd.read_excel("../data/thenumbers.xlsx", sheet_name=('Top 100 Domestic Box Office'))
tdbo.index = np.arange(1, len(tdbo) + 1)
tdbo = tdbo.rename(columns={'Movie' : 'Title', 'Released' : 'Release Year', 'Infl. Adj. Dom.' : 'Domestic - Adjusted for Inflation'})
tdbo['Domestic']=tdbo['Domestic'].apply('{:,}'.format)
tdbo['Domestic - Adjusted for Inflation']=tdbo['Domestic - Adjusted for Inflation'].apply('{:,}'.format)
tdbo['Title'] = tdbo['Title'].str.upper()

In [20]:
tdbo['Title'] = tdbo['Title'].fillna(value=1408)

In [21]:
tdbo

Unnamed: 0,Release Year,Title,Domestic,Domestic - Adjusted for Inflation
1,2017,IT,328828874,334783459
2,1973,THE EXORCIST,230347346,1063304892
3,2019,IT CHAPTER TWO,211593228,211824221
4,2018,A QUIET PLACE,188024361,189262720
5,2017,GET OUT,175837935,179752944
6,2020,A QUIET PLACE PART II,160215764,160215764
7,2018,HALLOWEEN,159366015,160409948
8,1999,THE BLAIR WITCH PROJECT,140539099,253689664
9,2016,SPLIT,138141585,141221659
10,2013,THE CONJURING,137400141,154976540


In [22]:
top100comb = pd.merge(tdbo, imdbdf, left_on = ['Title', 'Release Year'], right_on = ['Title', 'Release Year'], how='left')
top100comb.index = np.arange(1, len(top100comb) + 1)

In [23]:
top100comb

Unnamed: 0,Release Year,Title,Domestic,Domestic - Adjusted for Inflation,Rating,Runtime,IMDB Score,Metascore
1,2017,IT,328828874,334783459,R,135.0,7.3,69
2,1973,THE EXORCIST,230347346,1063304892,R,122.0,8.1,81
3,2019,IT CHAPTER TWO,211593228,211824221,R,169.0,6.5,58
4,2018,A QUIET PLACE,188024361,189262720,PG-13,90.0,7.5,82
5,2017,GET OUT,175837935,179752944,R,104.0,7.7,85
6,2020,A QUIET PLACE PART II,160215764,160215764,PG-13,97.0,7.2,71
7,2018,HALLOWEEN,159366015,160409948,R,106.0,6.5,67
8,1999,THE BLAIR WITCH PROJECT,140539099,253689664,R,81.0,6.5,81
9,2016,SPLIT,138141585,141221659,PG-13,117.0,7.3,62
10,2013,THE CONJURING,137400141,154976540,R,112.0,7.5,68


In [24]:
imdbdf['Release Year'] = imdbdf['Release Year'].astype(str)

In [25]:
imdbdf.dtypes

Title            object
Release Year     object
Rating           object
Runtime         float64
IMDB Score       object
Metascore        object
dtype: object

In [26]:
imdbdf['Title and Year'] = imdbdf['Title'] + " " + imdbdf['Release Year']

In [27]:
imdbdf.head()

Unnamed: 0,Title,Release Year,Rating,Runtime,IMDB Score,Metascore,Title and Year
1,THE MENU,2022.0,R,107.0,7.5,71,THE MENU 2022.0
2,X,2022.0,R,105.0,6.6,79,X 2022.0
3,BONES AND ALL,2022.0,R,131.0,7.3,74,BONES AND ALL 2022.0
4,SMILE,2022.0,R,115.0,6.6,68,SMILE 2022.0
5,BARBARIAN,2022.0,R,102.0,7.1,78,BARBARIAN 2022.0


In [28]:
wwbo['Release Year'] = wwbo['Release Year'].astype(str)

In [29]:
wwbo['Title'] = wwbo['Title'].astype(str)

In [30]:
wwbo['Title and Year'] = wwbo['Title'] + " " + wwbo['Release Year']

In [31]:
wwbo

Unnamed: 0,Release Year,Title,Worldwide,Domestic,International,Title and Year
1,2017,IT,701012746,328828874.0,372183872.0,IT 2017
2,1975,JAWS,482947378,272257035.0,210690343.0,JAWS 1975
3,2007,I AM LEGEND,585532684,256393010.0,329139674.0,I AM LEGEND 2007
4,1973,THE EXORCIST,428214478,230347346.0,197867132.0,THE EXORCIST 1973
5,2019,IT: CHAPTER TWO,467563955,211593228.0,255970727.0,IT: CHAPTER TWO 2019
...,...,...,...,...,...,...
1851,2018,MANIAC TALES,155,,155.0,MANIAC TALES 2018
1852,1933,THE INVISIBLE MAN,132,,132.0,THE INVISIBLE MAN 1933
1853,2016,CAPTURE KILL RELEASE,72,,72.0,CAPTURE KILL RELEASE 2016
1854,2021,SLUMBER PARTY MASSACRE,28,,28.0,SLUMBER PARTY MASSACRE 2021


In [32]:
wwbo.dtypes

Release Year      object
Title             object
Worldwide         object
Domestic          object
International     object
Title and Year    object
dtype: object

In [33]:
wwbo

Unnamed: 0,Release Year,Title,Worldwide,Domestic,International,Title and Year
1,2017,IT,701012746,328828874.0,372183872.0,IT 2017
2,1975,JAWS,482947378,272257035.0,210690343.0,JAWS 1975
3,2007,I AM LEGEND,585532684,256393010.0,329139674.0,I AM LEGEND 2007
4,1973,THE EXORCIST,428214478,230347346.0,197867132.0,THE EXORCIST 1973
5,2019,IT: CHAPTER TWO,467563955,211593228.0,255970727.0,IT: CHAPTER TWO 2019
...,...,...,...,...,...,...
1851,2018,MANIAC TALES,155,,155.0,MANIAC TALES 2018
1852,1933,THE INVISIBLE MAN,132,,132.0,THE INVISIBLE MAN 1933
1853,2016,CAPTURE KILL RELEASE,72,,72.0,CAPTURE KILL RELEASE 2016
1854,2021,SLUMBER PARTY MASSACRE,28,,28.0,SLUMBER PARTY MASSACRE 2021


In [34]:
wwbo.to_csv('wwbo.csv')

In [35]:
imdbdf.dtypes

Title              object
Release Year       object
Rating             object
Runtime           float64
IMDB Score         object
Metascore          object
Title and Year     object
dtype: object

In [36]:
imdbdf['Runtime'] = imdbdf['Runtime'].astype(str)

In [37]:
imdbdf

Unnamed: 0,Title,Release Year,Rating,Runtime,IMDB Score,Metascore,Title and Year
1,THE MENU,2022.0,R,107.0,7.5,71,THE MENU 2022.0
2,X,2022.0,R,105.0,6.6,79,X 2022.0
3,BONES AND ALL,2022.0,R,131.0,7.3,74,BONES AND ALL 2022.0
4,SMILE,2022.0,R,115.0,6.6,68,SMILE 2022.0
5,BARBARIAN,2022.0,R,102.0,7.1,78,BARBARIAN 2022.0
...,...,...,...,...,...,...,...
9603,WALKING DEAD - TOMATE,2022.0,NO RATING,79.0,3.6,NO METASCORE,WALKING DEAD - TOMATE 2022.0
9604,MANNEQUIN,2023.0,NO RATING,,NO IMDB RATING,NO METASCORE,MANNEQUIN 2023.0
9605,UNDEAD OR ALIVE: A ZOMBEDY,2007.0,R,91.0,5.1,NO METASCORE,UNDEAD OR ALIVE: A ZOMBEDY 2007.0
9606,SECOND ORIGIN,2015.0,NO RATING,106.0,4.5,NO METASCORE,SECOND ORIGIN 2015.0


In [38]:
imdbdf.loc[(imdbdf['Title'] == '1408')]

Unnamed: 0,Title,Release Year,Rating,Runtime,IMDB Score,Metascore,Title and Year
325,1408,2007.0,PG-13,104.0,6.8,64,1408 2007.0


In [39]:
wwbo.loc[(wwbo['Title'] == '1408')]

Unnamed: 0,Release Year,Title,Worldwide,Domestic,International,Title and Year
55,2007,1408,131263370,71985628.0,59277742.0,1408 2007
594,2016,1408,922727,779820.0,142907.0,1408 2016
744,2011,1408,2756875,38100.0,2718775.0,1408 2011
959,2018,1408,3130260,,3130260.0,1408 2018
1347,2020,1408,123784,,123784.0,1408 2020


In [40]:
#imdbdf = imdbdf.drop(1698) drop extra 1408

In [41]:
wwbo = wwbo.drop(594)
wwbo = wwbo.drop(744)
wwbo = wwbo.drop(959)
wwbo = wwbo.drop(1347)

In [42]:
wwbo.loc[(wwbo['Title'] == '1408')]

Unnamed: 0,Release Year,Title,Worldwide,Domestic,International,Title and Year
55,2007,1408,131263370,71985628.0,59277742.0,1408 2007


In [43]:
wwbo.head()

Unnamed: 0,Release Year,Title,Worldwide,Domestic,International,Title and Year
1,2017,IT,701012746,328828874.0,372183872.0,IT 2017
2,1975,JAWS,482947378,272257035.0,210690343.0,JAWS 1975
3,2007,I AM LEGEND,585532684,256393010.0,329139674.0,I AM LEGEND 2007
4,1973,THE EXORCIST,428214478,230347346.0,197867132.0,THE EXORCIST 1973
5,2019,IT: CHAPTER TWO,467563955,211593228.0,255970727.0,IT: CHAPTER TWO 2019


In [44]:
wwbo.shape

(1851, 6)

In [45]:
imdbdf.head()

Unnamed: 0,Title,Release Year,Rating,Runtime,IMDB Score,Metascore,Title and Year
1,THE MENU,2022.0,R,107.0,7.5,71,THE MENU 2022.0
2,X,2022.0,R,105.0,6.6,79,X 2022.0
3,BONES AND ALL,2022.0,R,131.0,7.3,74,BONES AND ALL 2022.0
4,SMILE,2022.0,R,115.0,6.6,68,SMILE 2022.0
5,BARBARIAN,2022.0,R,102.0,7.1,78,BARBARIAN 2022.0


In [46]:
imdbdf.shape

(9607, 7)

In [47]:
# title a / title b / calc ratio of similarity - abi's suggestion

#df['candidate'] = get_close_matches()

In [48]:
# worldwidecomb = pd.merge(imdbdf, wwbo, left_on = ['Title', 'Release Year'], right_on = ['Title', 'Release Year'], how='inner')
# worldwidecomb.index = np.arange(1, len(top100comb) + 1)

# df2['team'] = df2['team'].apply(lambda x: difflib.get_close_matches(x, df1['team'])[0])

#wwbo['Title and Year'] = wwbo['Title and Year'].map(lambda x: difflib.get_close_matches(x, imdbdf['Title and Year'])[0])

#test = imdbdf.merge(wwbo)

#df2.index = df2.index.map(lambda x: difflib.get_close_matches(x, df1.index)[0])

# for x in wwbo['Title and Year']:
#     wwbo['Match'] = difflib.get_close_matches(x, imdbdf['Title and Year'])[0]
#     print(x)
#     print(difflib.get_close_matches(x, imdbdf['Title and Year'])[0])

In [49]:
wwbo

Unnamed: 0,Release Year,Title,Worldwide,Domestic,International,Title and Year
1,2017,IT,701012746,328828874.0,372183872.0,IT 2017
2,1975,JAWS,482947378,272257035.0,210690343.0,JAWS 1975
3,2007,I AM LEGEND,585532684,256393010.0,329139674.0,I AM LEGEND 2007
4,1973,THE EXORCIST,428214478,230347346.0,197867132.0,THE EXORCIST 1973
5,2019,IT: CHAPTER TWO,467563955,211593228.0,255970727.0,IT: CHAPTER TWO 2019
...,...,...,...,...,...,...
1851,2018,MANIAC TALES,155,,155.0,MANIAC TALES 2018
1852,1933,THE INVISIBLE MAN,132,,132.0,THE INVISIBLE MAN 1933
1853,2016,CAPTURE KILL RELEASE,72,,72.0,CAPTURE KILL RELEASE 2016
1854,2021,SLUMBER PARTY MASSACRE,28,,28.0,SLUMBER PARTY MASSACRE 2021


In [50]:
#df.to_csv('file1.csv')

In [51]:
wwbo.to_csv('wwbo.csv')

In [52]:
imdbdf.to_csv('imdbdf.csv')

In [53]:
wwbo['match'] = wwbo['Title'].str[0:8]

In [54]:
wwbo

Unnamed: 0,Release Year,Title,Worldwide,Domestic,International,Title and Year,match
1,2017,IT,701012746,328828874.0,372183872.0,IT 2017,IT
2,1975,JAWS,482947378,272257035.0,210690343.0,JAWS 1975,JAWS
3,2007,I AM LEGEND,585532684,256393010.0,329139674.0,I AM LEGEND 2007,I AM LEG
4,1973,THE EXORCIST,428214478,230347346.0,197867132.0,THE EXORCIST 1973,THE EXOR
5,2019,IT: CHAPTER TWO,467563955,211593228.0,255970727.0,IT: CHAPTER TWO 2019,IT: CHAP
...,...,...,...,...,...,...,...
1851,2018,MANIAC TALES,155,,155.0,MANIAC TALES 2018,MANIAC T
1852,1933,THE INVISIBLE MAN,132,,132.0,THE INVISIBLE MAN 1933,THE INVI
1853,2016,CAPTURE KILL RELEASE,72,,72.0,CAPTURE KILL RELEASE 2016,CAPTURE
1854,2021,SLUMBER PARTY MASSACRE,28,,28.0,SLUMBER PARTY MASSACRE 2021,SLUMBER


In [55]:
imdbdf['match'] = imdbdf['Title and Year'].str[0:8]

In [56]:
imdbdf.head()

Unnamed: 0,Title,Release Year,Rating,Runtime,IMDB Score,Metascore,Title and Year,match
1,THE MENU,2022.0,R,107.0,7.5,71,THE MENU 2022.0,THE MENU
2,X,2022.0,R,105.0,6.6,79,X 2022.0,X 2022.0
3,BONES AND ALL,2022.0,R,131.0,7.3,74,BONES AND ALL 2022.0,BONES AN
4,SMILE,2022.0,R,115.0,6.6,68,SMILE 2022.0,SMILE 20
5,BARBARIAN,2022.0,R,102.0,7.1,78,BARBARIAN 2022.0,BARBARIA


In [57]:
mergedf = pd.merge(imdbdf, wwbo, how='inner', on='match')

In [58]:
mergedf

Unnamed: 0,Title_x,Release Year_x,Rating,Runtime,IMDB Score,Metascore,Title and Year_x,match,Release Year_y,Title_y,Worldwide,Domestic,International,Title and Year_y
0,BARBARIAN,2022.0,R,102.0,7.1,78,BARBARIAN 2022.0,BARBARIA,2022,BARBARIAN,44734287,40842944.0,3891343.0,BARBARIAN 2022
1,BARBARIANS,2021.0,NO RATING,89.0,4.7,57,BARBARIANS 2021.0,BARBARIA,2022,BARBARIAN,44734287,40842944.0,3891343.0,BARBARIAN 2022
2,TERRIFIER 2,2022.0,Not Rated,138.0,6.3,59,TERRIFIER 2 2022.0,TERRIFIE,2022,TERRIFIER 2,11287125,10640105.0,647020.0,TERRIFIER 2 2022
3,TERRIFIER,2016.0,Unrated,85.0,5.6,NO METASCORE,TERRIFIER 2016.0,TERRIFIE,2022,TERRIFIER 2,11287125,10640105.0,647020.0,TERRIFIER 2 2022
4,TERRIFIED,2017.0,Not Rated,87.0,6.5,NO METASCORE,TERRIFIED 2017.0,TERRIFIE,2022,TERRIFIER 2,11287125,10640105.0,647020.0,TERRIFIER 2 2022
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5005,FRANCESCA,2015.0,NO RATING,80.0,5.3,NO METASCORE,FRANCESCA 2015.0,FRANCESC,2016,FRANCESCA,2008,,2008.0,FRANCESCA 2016
5006,KANCHANA: MUNI 2,2011.0,Not Rated,171.0,6.6,NO METASCORE,KANCHANA: MUNI 2 2011.0,KANCHANA,2019,KANCHANA 3,59691,,59691.0,KANCHANA 3 2019
5007,13 GRAVES,2019.0,NO RATING,83.0,4.6,NO METASCORE,13 GRAVES 2019.0,13 GRAVE,2019,13 GRAVES,10921,,10921.0,13 GRAVES 2019
5008,THE ANTENNA,2019.0,NO RATING,115.0,5.2,58,THE ANTENNA 2019.0,THE ANTE,2020,THE ANTENNA,2983,,2983.0,THE ANTENNA 2020


In [59]:
mergedf['ratio'] = mergedf.apply((lambda x: fuzz.ratio(x['Title and Year_x'], x['Title and Year_y'])), axis=1)

In [60]:
mergedf.head()

Unnamed: 0,Title_x,Release Year_x,Rating,Runtime,IMDB Score,Metascore,Title and Year_x,match,Release Year_y,Title_y,Worldwide,Domestic,International,Title and Year_y,ratio
0,BARBARIAN,2022.0,R,102.0,7.1,78,BARBARIAN 2022.0,BARBARIA,2022,BARBARIAN,44734287,40842944.0,3891343.0,BARBARIAN 2022,93
1,BARBARIANS,2021.0,NO RATING,89.0,4.7,57,BARBARIANS 2021.0,BARBARIA,2022,BARBARIAN,44734287,40842944.0,3891343.0,BARBARIAN 2022,84
2,TERRIFIER 2,2022.0,Not Rated,138.0,6.3,59,TERRIFIER 2 2022.0,TERRIFIE,2022,TERRIFIER 2,11287125,10640105.0,647020.0,TERRIFIER 2 2022,94
3,TERRIFIER,2016.0,Unrated,85.0,5.6,NO METASCORE,TERRIFIER 2016.0,TERRIFIE,2022,TERRIFIER 2,11287125,10640105.0,647020.0,TERRIFIER 2 2022,75
4,TERRIFIED,2017.0,Not Rated,87.0,6.5,NO METASCORE,TERRIFIED 2017.0,TERRIFIE,2022,TERRIFIER 2,11287125,10640105.0,647020.0,TERRIFIER 2 2022,69


In [61]:
mergedffilter = mergedf.loc[mergedf['ratio'] > 80]

In [62]:
mergedffilter
mergedffilter.index = np.arange(1, len(mergedffilter) + 1)

In [63]:
mergedffilter

Unnamed: 0,Title_x,Release Year_x,Rating,Runtime,IMDB Score,Metascore,Title and Year_x,match,Release Year_y,Title_y,Worldwide,Domestic,International,Title and Year_y,ratio
1,BARBARIAN,2022.0,R,102.0,7.1,78,BARBARIAN 2022.0,BARBARIA,2022,BARBARIAN,44734287,40842944.0,3891343.0,BARBARIAN 2022,93
2,BARBARIANS,2021.0,NO RATING,89.0,4.7,57,BARBARIANS 2021.0,BARBARIA,2022,BARBARIAN,44734287,40842944.0,3891343.0,BARBARIAN 2022,84
3,TERRIFIER 2,2022.0,Not Rated,138.0,6.3,59,TERRIFIER 2 2022.0,TERRIFIE,2022,TERRIFIER 2,11287125,10640105.0,647020.0,TERRIFIER 2 2022,94
4,SCREAM 2,1997.0,R,120.0,6.3,63,SCREAM 2 1997.0,SCREAM 2,1997,SCREAM 2,172363301,101363301.0,71000000.0,SCREAM 2 1997,93
5,WHITE NOISE,2022.0,R,136.0,6.6,67,WHITE NOISE 2022.0,WHITE NO,2005,WHITE NOISE,92094360,56094360.0,36000000.0,WHITE NOISE 2005,88
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1224,SEYTANIN ÇOCUKLARI-EL EBYAZ,2016.0,NO RATING,92.0,3.6,NO METASCORE,SEYTANIN ÇOCUKLARI-EL EBYAZ 2016.0,SEYTANIN,2016,SEYTANIN ÇOCUKLARI-EL EBYAZ,3893,,3893.0,SEYTANIN ÇOCUKLARI-EL EBYAZ 2016,97
1225,FRANCESCA,2015.0,NO RATING,80.0,5.3,NO METASCORE,FRANCESCA 2015.0,FRANCESC,2016,FRANCESCA,2008,,2008.0,FRANCESCA 2016,87
1226,13 GRAVES,2019.0,NO RATING,83.0,4.6,NO METASCORE,13 GRAVES 2019.0,13 GRAVE,2019,13 GRAVES,10921,,10921.0,13 GRAVES 2019,93
1227,THE ANTENNA,2019.0,NO RATING,115.0,5.2,58,THE ANTENNA 2019.0,THE ANTE,2020,THE ANTENNA,2983,,2983.0,THE ANTENNA 2020,88


In [64]:
mergedffilter = mergedffilter.drop(['Release Year_x', 'Title and Year_x', 'match', 'Title_y', 'Title and Year_y', 'ratio'], axis=1)

In [65]:
mergedffilter

Unnamed: 0,Title_x,Rating,Runtime,IMDB Score,Metascore,Release Year_y,Worldwide,Domestic,International
1,BARBARIAN,R,102.0,7.1,78,2022,44734287,40842944.0,3891343.0
2,BARBARIANS,NO RATING,89.0,4.7,57,2022,44734287,40842944.0,3891343.0
3,TERRIFIER 2,Not Rated,138.0,6.3,59,2022,11287125,10640105.0,647020.0
4,SCREAM 2,R,120.0,6.3,63,1997,172363301,101363301.0,71000000.0
5,WHITE NOISE,R,136.0,6.6,67,2005,92094360,56094360.0,36000000.0
...,...,...,...,...,...,...,...,...,...
1224,SEYTANIN ÇOCUKLARI-EL EBYAZ,NO RATING,92.0,3.6,NO METASCORE,2016,3893,,3893.0
1225,FRANCESCA,NO RATING,80.0,5.3,NO METASCORE,2016,2008,,2008.0
1226,13 GRAVES,NO RATING,83.0,4.6,NO METASCORE,2019,10921,,10921.0
1227,THE ANTENNA,NO RATING,115.0,5.2,58,2020,2983,,2983.0


In [66]:
cleanmerge = mergedffilter.rename(columns={'Title_x': 'Title', 'Release Year_y': 'Release Year'})

In [67]:
pd.set_option('display.max_rows', None)

In [68]:
cleanmerge

Unnamed: 0,Title,Rating,Runtime,IMDB Score,Metascore,Release Year,Worldwide,Domestic,International
1,BARBARIAN,R,102.0,7.1,78,2022,44734287,40842944.0,3891343.0
2,BARBARIANS,NO RATING,89.0,4.7,57,2022,44734287,40842944.0,3891343.0
3,TERRIFIER 2,Not Rated,138.0,6.3,59,2022,11287125,10640105.0,647020.0
4,SCREAM 2,R,120.0,6.3,63,1997,172363301,101363301.0,71000000.0
5,WHITE NOISE,R,136.0,6.6,67,2005,92094360,56094360.0,36000000.0
6,WHITE NOISE,PG-13,101.0,5.5,30,2005,92094360,56094360.0,36000000.0
7,WHITE NOISE 2: THE LIGHT,PG-13,99.0,5.7,NO METASCORE,2008,8243567,,8243567.0
8,SOMETHING IN THE DIRT,R,116.0,6.0,76,2022,107114,,107114.0
9,SOMETHING IN THE WOODS,NO RATING,76.0,2.6,NO METASCORE,2022,107114,,107114.0
10,MIDSOMMAR,R,148.0,7.1,72,2019,46757893,27426363.0,19331530.0


In [69]:
duplicateRows = cleanmerge[cleanmerge.duplicated(['Title'])]

In [70]:
duplicateRows

Unnamed: 0,Title,Rating,Runtime,IMDB Score,Metascore,Release Year,Worldwide,Domestic,International
6,WHITE NOISE,PG-13,101.0,5.5,30,2005,92094360,56094360.0,36000000.0
19,THE INVITATION,Not Rated,100.0,6.6,74,2022,33689288,25100080.0,8589208.0
20,THE INVISIBLE MAN,TV-PG,71.0,7.6,87,2020,139011965,64914050.0,74097915.0
21,THE INVISIBLE MAN,TV-PG,71.0,7.6,87,1933,132,,132.0
23,THE INVISIBLE MANIAC,R,86.0,4.1,NO METASCORE,1933,132,,132.0
27,HELLRAISER,R,94.0,6.9,56,1987,14575148,14564000.0,11148.0
33,HALLOWEEN,R,106.0,6.5,67,2018,255416089,159366015.0,96050074.0
34,HALLOWEEN,R,106.0,6.5,67,2007,77514401,58269151.0,19245250.0
35,HALLOWEEN,R,106.0,6.5,67,2009,38512850,33392973.0,5119877.0
36,HALLOWEEN,R,109.0,6.0,47,2007,77514401,58269151.0,19245250.0


In [71]:
len(duplicateRows)

252

In [72]:
cleanmerge = cleanmerge[cleanmerge['Metascore'] != 'NO METASCORE']

In [73]:
cleanmerge

Unnamed: 0,Title,Rating,Runtime,IMDB Score,Metascore,Release Year,Worldwide,Domestic,International
1,BARBARIAN,R,102.0,7.1,78,2022,44734287,40842944.0,3891343.0
2,BARBARIANS,NO RATING,89.0,4.7,57,2022,44734287,40842944.0,3891343.0
3,TERRIFIER 2,Not Rated,138.0,6.3,59,2022,11287125,10640105.0,647020.0
4,SCREAM 2,R,120.0,6.3,63,1997,172363301,101363301.0,71000000.0
5,WHITE NOISE,R,136.0,6.6,67,2005,92094360,56094360.0,36000000.0
6,WHITE NOISE,PG-13,101.0,5.5,30,2005,92094360,56094360.0,36000000.0
8,SOMETHING IN THE DIRT,R,116.0,6.0,76,2022,107114,,107114.0
10,MIDSOMMAR,R,148.0,7.1,72,2019,46757893,27426363.0,19331530.0
12,BODIES BODIES BODIES,R,94.0,6.3,69,2022,13454835,11446602.0,2008233.0
13,DOCTOR SLEEP,R,152.0,7.3,59,2019,71720838,31581712.0,40139126.0


In [74]:
cleanmerge.to_csv('cleanmerge.csv')

MOVIES RANKED BY IMDB SCORE

In [135]:
rankbyimdb = cleanmerge.sort_values(by=['IMDB Score'], ascending=False)
rankbyimdb.index = np.arange(1, len(rankbyimdb) + 1)

In [137]:
rankbyimdb.head(15)

Unnamed: 0,Title,Rating,Runtime,IMDB Score,Metascore,Release Year,Worldwide,Domestic,International
1,THE SHINING,R,146.0,8.4,66,1980,45601938,44568631.0,
2,THE THING,R,109.0,8.2,57,1982,13841714,13782838.0,
3,THE EXORCIST,R,122.0,8.1,81,1973,428214478,230347346.0,
4,ROSEMARY'S BABY,Approved,137.0,8.0,96,1968,33396740,33395426.0,
5,KING KONG,Passed,100.0,7.9,90,1933,10001781,10000000.0,
6,DAWN OF THE DEAD,Unrated,127.0,7.8,71,2004,103452875,58990765.0,
7,DAWN OF THE DEAD,Unrated,127.0,7.8,71,1979,55000000,5100000.0,
8,FRANKENSTEIN,Passed,70.0,7.8,91,1931,12001435,12000000.0,
9,FRANKENSTEIN,Passed,70.0,7.8,91,1994,112006296,22006296.0,
10,NIGHT OF THE LIVING DEAD,Not Rated,96.0,7.8,89,1968,30087064,12087064.0,


In [138]:
rankbyimdb = rankbyimdb.drop(6)
rankbyimdb = rankbyimdb.drop(9)
rankbyimdb = rankbyimdb.drop(11)

In [153]:
rankbyimdb.index = np.arange(1, len(rankbyimdb) + 1)
rankbyimdb.head(10)

Unnamed: 0,Title,Rating,Runtime,IMDB Score,Metascore,Release Year,Worldwide,Domestic,International
1,THE SHINING,R,146.0,8.4,66,1980,45601938,44568631.0,
2,THE THING,R,109.0,8.2,57,1982,13841714,13782838.0,
3,THE EXORCIST,R,122.0,8.1,81,1973,428214478,230347346.0,
4,ROSEMARY'S BABY,Approved,137.0,8.0,96,1968,33396740,33395426.0,
5,KING KONG,Passed,100.0,7.9,90,1933,10001781,10000000.0,
6,DAWN OF THE DEAD,Unrated,127.0,7.8,71,1979,55000000,5100000.0,
7,FRANKENSTEIN,Passed,70.0,7.8,91,1931,12001435,12000000.0,
8,NIGHT OF THE LIVING DEAD,Not Rated,96.0,7.8,89,1968,30087064,12087064.0,
9,INVASION OF THE BODY SNATCHERS,Approved,80.0,7.7,92,1956,2044,,
10,EVIL DEAD II,R,84.0,7.7,72,1987,5927557,5923044.0,


In [160]:
rankbyimdb = rankbyimdb.drop('Rating', axis=1)
rankbyimdb = rankbyimdb.drop('Runtime', axis=1)

KeyError: "['Rating'] not found in axis"

In [162]:
rankbyimdb = rankbyimdb.drop('Metascore', axis=1)

In [163]:
rankbyimdb

Unnamed: 0,Title,IMDB Score,Release Year,Worldwide,Domestic,International
1,THE SHINING,8.4,1980,45601938,44568631.0,
2,THE THING,8.2,1982,13841714,13782838.0,
3,THE EXORCIST,8.1,1973,428214478,230347346.0,
4,ROSEMARY'S BABY,8.0,1968,33396740,33395426.0,
5,KING KONG,7.9,1933,10001781,10000000.0,
6,DAWN OF THE DEAD,7.8,1979,55000000,5100000.0,
7,FRANKENSTEIN,7.8,1931,12001435,12000000.0,
8,NIGHT OF THE LIVING DEAD,7.8,1968,30087064,12087064.0,
9,INVASION OF THE BODY SNATCHERS,7.7,1956,2044,,
10,EVIL DEAD II,7.7,1987,5927557,5923044.0,


In [147]:
cleanmerge['Metascore'] = pd.to_numeric(cleanmerge['Metascore'], errors='coerce')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  cleanmerge['Metascore'] = pd.to_numeric(cleanmerge['Metascore'], errors='coerce')


MOVIES RANKED BY METASCORE

In [148]:
rankmeta = cleanmerge.sort_values(by=['Metascore'], ascending=False)
rankmeta.index = np.arange(1, len(rankmeta) + 1)
rankmeta = rankmeta[rankmeta['Metascore'] != 'nan']
rankmeta = rankmeta.drop('IMDB Score', axis=1)

In [149]:
rankmeta

Unnamed: 0,Title,Rating,Runtime,Metascore,Release Year,Worldwide,Domestic,International
1,ROSEMARY'S BABY,Approved,137.0,96,1968,33396740,33395426.0,
2,INVASION OF THE BODY SNATCHERS,Approved,80.0,92,1978,24946533,24946533.0,
3,INVASION OF THE BODY SNATCHERS,Approved,80.0,92,1956,2044,,
4,FRANKENSTEIN,Passed,70.0,91,1994,112006296,22006296.0,
5,FRANKENSTEIN,Passed,70.0,91,1931,12001435,12000000.0,
6,KING KONG,Passed,100.0,90,1933,10001781,10000000.0,
7,THE BIRDS,PG-13,119.0,90,1963,11436929,11403529.0,
8,NIGHT OF THE LIVING DEAD,Not Rated,96.0,89,1968,30087064,12087064.0,
9,NIGHT OF THE LIVING DEAD,Not Rated,96.0,89,1990,5835247,5835247.0,
10,HALLOWEEN,R,91.0,87,1978,70274000,47274000.0,


In [150]:
rankmeta = rankmeta.drop(2)
rankmeta = rankmeta.drop(4)
rankmeta = rankmeta.drop(9)
rankmeta = rankmeta.drop(2)

KeyError: '[2] not found in axis'

In [152]:
rankmeta.index = np.arange(1, len(rankmeta) + 1)
rankmeta

Unnamed: 0,Title,Rating,Runtime,Metascore,Release Year,Worldwide,Domestic,International
1,ROSEMARY'S BABY,Approved,137.0,96,1968,33396740,33395426.0,
2,INVASION OF THE BODY SNATCHERS,Approved,80.0,92,1956,2044,,
3,FRANKENSTEIN,Passed,70.0,91,1931,12001435,12000000.0,
4,KING KONG,Passed,100.0,90,1933,10001781,10000000.0,
5,THE BIRDS,PG-13,119.0,90,1963,11436929,11403529.0,
6,NIGHT OF THE LIVING DEAD,Not Rated,96.0,89,1968,30087064,12087064.0,
7,HALLOWEEN,R,91.0,87,1978,70274000,47274000.0,
8,THE WICKER MAN,R,88.0,87,1973,177554,58341.0,
9,ERASERHEAD,Not Rated,89.0,87,1977,7097971,7000000.0,
10,HEREDITARY,R,127.0,87,2018,81263489,44069456.0,


In [166]:
rankmeta = rankmeta.drop('Rating', axis=1)
rankmeta = rankmeta.drop('Runtime', axis=1)

In [171]:
rankmeta

Unnamed: 0,Title,Metascore,Release Year,Worldwide,Domestic,International
1,ROSEMARY'S BABY,96,1968,33396740,33395426.0,
2,INVASION OF THE BODY SNATCHERS,92,1956,2044,,
3,FRANKENSTEIN,91,1931,12001435,12000000.0,
4,KING KONG,90,1933,10001781,10000000.0,
5,THE BIRDS,90,1963,11436929,11403529.0,
6,NIGHT OF THE LIVING DEAD,89,1968,30087064,12087064.0,
7,HALLOWEEN,87,1978,70274000,47274000.0,
8,THE WICKER MAN,87,1973,177554,58341.0,
9,ERASERHEAD,87,1977,7097971,7000000.0,
10,HEREDITARY,87,2018,81263489,44069456.0,


In [172]:
rankmetabtm = cleanmerge.sort_values(by=['Metascore'], ascending=True)
rankmetabtm.index = np.arange(1, len(rankmetabtm) + 1)

In [173]:
rankmetabtm

Unnamed: 0,Title,Rating,Runtime,IMDB Score,Metascore,Release Year,Worldwide,Domestic,International
1,THE HUMAN CENTIPEDE III (FINAL SEQUENCE),Not Rated,102.0,2.8,5,2015,18976,14562.0,
2,THE MANGLER,R,106.0,4.3,8,1995,1781000,1781000.0,
3,ALONE IN THE DARK,R,96.0,2.4,9,2005,10588079,5178569.0,
4,HALLOWEEN: THE CURSE OF MICHAEL MYERS,R,87.0,4.7,10,1988,17768757,17768757.0,
5,HALLOWEEN: THE CURSE OF MICHAEL MYERS,R,87.0,4.7,10,1989,11642254,11642254.0,
6,HALLOWEEN: THE CURSE OF MICHAEL MYERS,R,87.0,4.7,10,1995,15126948,15126948.0,
7,NOTHING LEFT TO FEAR,R,100.0,4.4,12,2013,1675381,7886.0,
8,FRIDAY THE 13TH PART VII: THE NEW BLOOD,R,88.0,5.2,13,1988,19170001,19170001.0,
9,FRIDAY THE 13TH PART VIII: JASON TAKES MANHATTAN,R,100.0,4.5,14,1989,14343976,14343976.0,
10,CABIN FEVER,R,99.0,3.7,14,2003,30351664,21158188.0,


In [174]:
rankmetabtm = rankmetabtm.drop(4)
rankmetabtm = rankmetabtm.drop(5)
rankmetabtm = rankmetabtm.drop(10)

In [175]:
rankmetabtm

Unnamed: 0,Title,Rating,Runtime,IMDB Score,Metascore,Release Year,Worldwide,Domestic,International
1,THE HUMAN CENTIPEDE III (FINAL SEQUENCE),Not Rated,102.0,2.8,5,2015,18976,14562.0,
2,THE MANGLER,R,106.0,4.3,8,1995,1781000,1781000.0,
3,ALONE IN THE DARK,R,96.0,2.4,9,2005,10588079,5178569.0,
6,HALLOWEEN: THE CURSE OF MICHAEL MYERS,R,87.0,4.7,10,1995,15126948,15126948.0,
7,NOTHING LEFT TO FEAR,R,100.0,4.4,12,2013,1675381,7886.0,
8,FRIDAY THE 13TH PART VII: THE NEW BLOOD,R,88.0,5.2,13,1988,19170001,19170001.0,
9,FRIDAY THE 13TH PART VIII: JASON TAKES MANHATTAN,R,100.0,4.5,14,1989,14343976,14343976.0,
11,BOOK OF SHADOWS: BLAIR WITCH 2,R,90.0,4.0,15,2000,47721314,26421314.0,
12,HOUSE OF THE DEAD,R,90.0,2.1,15,2003,13860078,10199354.0,
13,DARKNESS,PG-13,88.0,5.4,15,2004,34409206,22163442.0,


In [179]:
rankmetabtm.index = np.arange(1, len(rankmetabtm) + 1)

In [181]:
rankmetabtm = rankmetabtm.drop('Rating', axis=1)
rankmetabtm = rankmetabtm.drop('Runtime', axis=1)
rankmetabtm = rankmetabtm.drop('IMDB Score', axis=1)
rankmetabtm

Unnamed: 0,Title,Metascore,Release Year,Worldwide,Domestic,International
1,THE HUMAN CENTIPEDE III (FINAL SEQUENCE),5,2015,18976,14562.0,
2,THE MANGLER,8,1995,1781000,1781000.0,
3,ALONE IN THE DARK,9,2005,10588079,5178569.0,
4,HALLOWEEN: THE CURSE OF MICHAEL MYERS,10,1995,15126948,15126948.0,
5,NOTHING LEFT TO FEAR,12,2013,1675381,7886.0,
6,FRIDAY THE 13TH PART VII: THE NEW BLOOD,13,1988,19170001,19170001.0,
7,FRIDAY THE 13TH PART VIII: JASON TAKES MANHATTAN,14,1989,14343976,14343976.0,
8,BOOK OF SHADOWS: BLAIR WITCH 2,15,2000,47721314,26421314.0,
9,HOUSE OF THE DEAD,15,2003,13860078,10199354.0,
10,DARKNESS,15,2004,34409206,22163442.0,


MOVIES RANKED BY DOMESTIC BOX OFFICE

In [177]:
rankbybox = cleanmerge.sort_values(by=['Domestic'], ascending=False)
rankbybox.index = np.arange(1, len(rankbybox) + 1)
rankbybox = rankbybox[rankbybox['Domestic'] != 'nan']

In [178]:
rankbybox.head(10)

Unnamed: 0,Title,Rating,Runtime,IMDB Score,Metascore,Release Year,Worldwide,Domestic,International
1,THE EXORCIST,R,122.0,8.1,81,1973,428214478,230347346.0,
2,A QUIET PLACE,PG-13,90.0,7.5,82,2018,334876670,188024361.0,
3,A QUIET PLACE PART II,PG-13,97.0,7.2,71,2021,296650356,160215764.0,
4,HALLOWEEN,R,106.0,6.5,67,2018,255416089,159366015.0,
5,THE BLAIR WITCH PROJECT,R,81.0,6.5,81,1999,248300000,140539099.0,
6,THE CONJURING,R,112.0,7.5,68,2013,317740900,137400141.0,
7,THE CONJURING 2,R,134.0,7.3,65,2013,317740900,137400141.0,
8,THE RING,PG-13,115.0,7.1,57,2002,248218486,129094024.0,
9,THE GRUDGE,PG-13,91.0,5.9,49,2004,187281115,110359362.0,
10,THE GRUDGE,R,94.0,4.4,41,2004,187281115,110359362.0,


MOVIES BY RATING

In [81]:
##rank by IMDB - DONE - rankbyimdb

##rank by metascore - DONE - rankmeta

##rank by box office (domestic) - DONE - rankbybox

#earnings/critical scores by rating?

##sort by decade?

##normalize scores - imdb * 10 vs MS

##films that consistently rank high on critical/audience scores collected in one visual

In [82]:
#wwbo = wwbo.drop(594)

In [84]:
rankbyimdb.to_csv('rankbyimdb.csv')

In [85]:
rankbyimdbbottom = cleanmerge.sort_values(by=['IMDB Score'], ascending=True)
rankbyimdbbottom.index = np.arange(1, len(rankbyimdbbottom) + 1)

In [86]:
rankbyimdbbottom = rankbyimdbbottom.drop(4)
rankbyimdbbottom = rankbyimdbbottom.drop(5)

In [87]:
rankbyimdbbottom

Unnamed: 0,Title,Rating,Runtime,IMDB Score,Metascore,Release Year,Worldwide,Domestic,International
1,HOUSE OF THE DEAD,R,90.0,2.1,15,2003,13860078,10199354.0,3660724.0
2,ALONE IN THE DARK,R,96.0,2.4,9,2005,10588079,5178569.0,5409510.0
3,THE HUMAN CENTIPEDE III (FINAL SEQUENCE),Not Rated,102.0,2.8,5,2015,18976,14562.0,4414.0
6,SLENDER MAN,PG-13,93.0,3.2,30,2018,51945949,30564825.0,21381124.0
7,DELIRIUM,TV-MA,86.0,3.3,27,2018,878,,878.0
8,TEXAS CHAINSAW MASSACRE: THE NEXT GENERATION,R,87.0,3.3,50,1995,94558,94558.0,
9,FEARDOTCOM,R,101.0,3.4,16,2002,13289290,13208023.0,81267.0
10,BLACK CHRISTMAS,PG-13,92.0,3.5,49,2006,16235738,16235738.0,
11,BLACK CHRISTMAS,PG-13,92.0,3.5,49,2019,18526087,10429730.0,8096357.0
12,SACRILEGE,NO RATING,83.0,3.5,33,2020,13548,,13548.0


In [88]:
rankbyimdbbottom = rankbyimdbbottom.drop(10)
rankbyimdbbottom = rankbyimdbbottom.drop(11)

In [89]:
rankbyimdbbottom.index = np.arange(1, len(rankbyimdbbottom) + 1)
rankbyimdbbottom = rankbyimdbbottom.head(10)

In [132]:
rankbyimdbbottom

Unnamed: 0,Title,Rating,Runtime,IMDB Score,Metascore,Release Year,Domestic,International
1,HOUSE OF THE DEAD,R,90.0,2.1,15,2003,10199354.0,3660724.0
2,ALONE IN THE DARK,R,96.0,2.4,9,2005,5178569.0,5409510.0
3,THE HUMAN CENTIPEDE III (FINAL SEQUENCE),Not Rated,102.0,2.8,5,2015,14562.0,4414.0
4,SLENDER MAN,PG-13,93.0,3.2,30,2018,30564825.0,21381124.0
5,DELIRIUM,TV-MA,86.0,3.3,27,2018,,878.0
6,TEXAS CHAINSAW MASSACRE: THE NEXT GENERATION,R,87.0,3.3,50,1995,94558.0,
7,FEARDOTCOM,R,101.0,3.4,16,2002,13208023.0,81267.0
8,SACRILEGE,NO RATING,83.0,3.5,33,2020,,13548.0
9,CREATURE,R,93.0,3.6,31,2011,508714.0,24521.0
10,THRILLER,TV-MA,87.0,3.6,28,2019,,1188.0


In [158]:
rankbyimdbbottom = rankbyimdbbottom.drop('Rating', axis=1)
rankbyimdbbottom = rankbyimdbbottom.drop('Runtime', axis=1)

In [164]:
rankbyimdbbottom = rankbyimdbbottom.drop('Metascore', axis=1)

In [165]:
rankbyimdbbottom

Unnamed: 0,Title,IMDB Score,Release Year,Domestic,International
1,HOUSE OF THE DEAD,2.1,2003,10199354.0,3660724.0
2,ALONE IN THE DARK,2.4,2005,5178569.0,5409510.0
3,THE HUMAN CENTIPEDE III (FINAL SEQUENCE),2.8,2015,14562.0,4414.0
4,SLENDER MAN,3.2,2018,30564825.0,21381124.0
5,DELIRIUM,3.3,2018,,878.0
6,TEXAS CHAINSAW MASSACRE: THE NEXT GENERATION,3.3,1995,94558.0,
7,FEARDOTCOM,3.4,2002,13208023.0,81267.0
8,SACRILEGE,3.5,2020,,13548.0
9,CREATURE,3.6,2011,508714.0,24521.0
10,THRILLER,3.6,2019,,1188.0


In [91]:
rankbyimdbbottom.to_csv('rankbyimdbbottom.csv')

In [100]:
cleanmerge['Worldwide'] = cleanmerge['Worldwide'].replace(',', '', regex=True)
cleanmerge['Worldwide'] = cleanmerge['Worldwide'].apply(pd.to_numeric)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  cleanmerge['Worldwide'] = cleanmerge['Worldwide'].replace(',', '', regex=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  cleanmerge['Worldwide'] = cleanmerge['Worldwide'].apply(pd.to_numeric)


In [110]:
cleanmerge['Domestic'] = cleanmerge['Domestic'].replace(',', '', regex=True)
cleanmerge['Domestic'] = pd.to_numeric(cleanmerge['Domestic'], errors='coerce')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  cleanmerge['Domestic'] = cleanmerge['Domestic'].replace(',', '', regex=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  cleanmerge['Domestic'] = pd.to_numeric(cleanmerge['Domestic'], errors='coerce')


In [112]:
cleanmerge['Runtime'] = pd.to_numeric(cleanmerge['Runtime'], errors='coerce')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  cleanmerge['Runtime'] = pd.to_numeric(cleanmerge['Runtime'], errors='coerce')


In [114]:
cleanmerge['Metascore'] = pd.to_numeric(cleanmerge['Metascore'], errors='coerce')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  cleanmerge['Metascore'] = pd.to_numeric(cleanmerge['Metascore'], errors='coerce')


In [116]:
cleanmerge['Release Year'] = pd.to_numeric(cleanmerge['Release Year'], errors='coerce')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  cleanmerge['Release Year'] = pd.to_numeric(cleanmerge['Release Year'], errors='coerce')


In [118]:
cleanmerge['International'] = pd.to_numeric(cleanmerge['International'], errors='coerce')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  cleanmerge['International'] = pd.to_numeric(cleanmerge['International'], errors='coerce')


In [None]:
cleanmerge.dtypes