# Importing Data and Libraries

In [57]:
import sqlite3
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [8]:
con = sqlite3.connect('data/im.db')
cur = con.cursor()

In [9]:
bmg = pd.read_csv('data/bom.movie_gross.csv')
mb = pd.read_csv('data/tn.movie_budgets.csv')
m = pd.read_csv('data/tmdb.movies.csv')

In [10]:
rv = pd.read_csv('data/rt.reviews.tsv', delimiter = '\t', encoding = 'latin') 
# needed to change the encoding as it was throwing an error message about it
# tsv file, so needed to add a delimiter of tab

In [82]:
mi = pd.read_csv('data/rt.movie_info.tsv', delimiter = '\t')

In [226]:
mi.head()

Unnamed: 0,id,synopsis,rating,genre,director,writer,theater_date,dvd_date,currency,box_office,runtime,studio
0,1,"This gritty, fast-paced, and innovative police...",R,Action and Adventure|Classics|Drama,William Friedkin,Ernest Tidyman,"Oct 9, 1971","Sep 25, 2001",,,104 minutes,
1,3,"New York City, not-too-distant-future: Eric Pa...",R,Drama|Science Fiction and Fantasy,David Cronenberg,David Cronenberg|Don DeLillo,"Aug 17, 2012","Jan 1, 2013",$,600000.0,108 minutes,Entertainment One
2,5,Illeana Douglas delivers a superb performance ...,R,Drama|Musical and Performing Arts,Allison Anders,Allison Anders,"Sep 13, 1996","Apr 18, 2000",,,116 minutes,
3,6,Michael Douglas runs afoul of a treacherous su...,R,Drama|Mystery and Suspense,Barry Levinson,Paul Attanasio|Michael Crichton,"Dec 9, 1994","Aug 27, 1997",,,128 minutes,
4,7,,NR,Drama|Romance,Rodney Bennett,Giles Cooper,,,,,200 minutes,


# Exploring the im.db Data

In [147]:
%%bash

sqlite3 data/im.db
.tables

#checking the table names

directors      movie_akas     movie_ratings  principals   
known_for      movie_basics   persons        writers      


In [148]:
%%bash

sqlite3 data/im.db
.schema

#checking schema

CREATE TABLE IF NOT EXISTS "movie_basics" (
"movie_id" TEXT,
  "primary_title" TEXT,
  "original_title" TEXT,
  "start_year" INTEGER,
  "runtime_minutes" REAL,
  "genres" TEXT
);
CREATE TABLE IF NOT EXISTS "directors" (
"movie_id" TEXT,
  "person_id" TEXT
);
CREATE TABLE IF NOT EXISTS "known_for" (
"person_id" TEXT,
  "movie_id" TEXT
);
CREATE TABLE IF NOT EXISTS "movie_akas" (
"movie_id" TEXT,
  "ordering" INTEGER,
  "title" TEXT,
  "region" TEXT,
  "language" TEXT,
  "types" TEXT,
  "attributes" TEXT,
  "is_original_title" REAL
);
CREATE TABLE IF NOT EXISTS "movie_ratings" (
"movie_id" TEXT,
  "averagerating" REAL,
  "numvotes" INTEGER
);
CREATE TABLE IF NOT EXISTS "persons" (
"person_id" TEXT,
  "primary_name" TEXT,
  "birth_year" REAL,
  "death_year" REAL,
  "primary_profession" TEXT
);
CREATE TABLE IF NOT EXISTS "principals" (
"movie_id" TEXT,
  "ordering" INTEGER,
  "person_id" TEXT,
  "category" TEXT,
  "job" TEXT,
  "characters" TEXT
);
CREATE TABLE IF NOT EXISTS "writers" (
"m

In [38]:
query_result = pd.read_sql('''
    SELECT *
    FROM 
        movie_ratings AS mr 
        INNER JOIN movie_basics AS mb
            ON mb.movie_id = mr.movie_id
    
    ORDER BY "averagerating" DESC

    LIMIT 50
    
''', con)

query_result

Unnamed: 0,movie_id,averagerating,numvotes,movie_id.1,primary_title,original_title,start_year,runtime_minutes,genres
0,tt5390098,10.0,5,tt5390098,The Paternal Bond: Barbary Macaques,Atlas Mountain: Barbary Macaques - Childcaring...,2015,59.0,Documentary
1,tt6295832,10.0,5,tt6295832,Requiem voor een Boom,Requiem voor een Boom,2016,48.0,Documentary
2,tt1770682,10.0,5,tt1770682,Freeing Bernie Baran,Freeing Bernie Baran,2010,100.0,"Crime,Documentary"
3,tt2632430,10.0,5,tt2632430,Hercule contre Hermès,Hercule contre Hermès,2012,72.0,Documentary
4,tt8730716,10.0,5,tt8730716,Pick It Up! - Ska in the '90s,Pick It Up! - Ska in the '90s,2019,99.0,Documentary
5,tt4960818,10.0,8,tt4960818,Revolution Food,Revolution Food,2015,70.0,Documentary
6,tt6991826,10.0,5,tt6991826,A Dedicated Life: Phoebe Brand Beyond the Group,A Dedicated Life: Phoebe Brand Beyond the Group,2015,93.0,Documentary
7,tt10176328,10.0,5,tt10176328,Exteriores: Mulheres Brasileiras na Diplomacia,Exteriores: Mulheres Brasileiras na Diplomacia,2018,52.0,Documentary
8,tt4944240,10.0,5,tt4944240,Dog Days in the Heartland,Dog Days in the Heartland,2017,,Drama
9,tt5089804,10.0,7,tt5089804,Fly High: Story of the Disc Dog,Fly High: Story of the Disc Dog,2019,65.0,Documentary


In [158]:
query_result = pd.read_sql('''
    SELECT DISTINCT *
    FROM directors, movie_akas, movie_ratings, principals,  
          known_for, movie_basics, persons, writers 
    LIMIT 5
''', con)

query_result

Unnamed: 0,movie_id,person_id,movie_id.1,ordering,title,region,language,types,attributes,is_original_title,...,start_year,runtime_minutes,genres,person_id.1,primary_name,birth_year,death_year,primary_profession,movie_id.2,person_id.2
0,tt0285252,nm0899854,tt0369610,10,Джурасик свят,BG,bg,,,0.0,...,2013,175.0,"Action,Crime,Drama",nm0061671,Mary Ellen Bauder,,,"miscellaneous,production_manager,producer",tt0285252,nm0899854
1,tt0285252,nm0899854,tt0369610,10,Джурасик свят,BG,bg,,,0.0,...,2013,175.0,"Action,Crime,Drama",nm0061671,Mary Ellen Bauder,,,"miscellaneous,production_manager,producer",tt0438973,nm0175726
2,tt0285252,nm0899854,tt0369610,10,Джурасик свят,BG,bg,,,0.0,...,2013,175.0,"Action,Crime,Drama",nm0061671,Mary Ellen Bauder,,,"miscellaneous,production_manager,producer",tt0438973,nm1802864
3,tt0285252,nm0899854,tt0369610,10,Джурасик свят,BG,bg,,,0.0,...,2013,175.0,"Action,Crime,Drama",nm0061671,Mary Ellen Bauder,,,"miscellaneous,production_manager,producer",tt0462036,nm1940585
4,tt0285252,nm0899854,tt0369610,10,Джурасик свят,BG,bg,,,0.0,...,2013,175.0,"Action,Crime,Drama",nm0061671,Mary Ellen Bauder,,,"miscellaneous,production_manager,producer",tt0835418,nm0310087


In [70]:
con.close()

# Cleaning CSV files

In [None]:
bmg = bmg.reset_index()

In [71]:
bmg['foreign_gross'] = bmg['foreign_gross'].str.replace(',','')

In [72]:
bmg['foreign_gross'] = bmg['foreign_gross'].astype(float)

In [146]:
mb['release_date'] = pd.to_datetime(mb['release_date'])

In [147]:
m['release_date'] = pd.to_datetime(m['release_date'])

In [148]:
mb['release_year'] = mb['release_date'].dt.year

In [156]:
m['release_year'] = m['release_date'].dt.year

In [149]:
mb.head()

Unnamed: 0,id,release_date,movie,production_budget,domestic_gross,worldwide_gross,release_year
0,1,2009-12-18,Avatar,"$425,000,000","$760,507,625","$2,776,345,279",2009
1,2,2011-05-20,Pirates of the Caribbean: On Stranger Tides,"$410,600,000","$241,063,875","$1,045,663,875",2011
2,3,2019-06-07,Dark Phoenix,"$350,000,000","$42,762,350","$149,762,350",2019
3,4,2015-05-01,Avengers: Age of Ultron,"$330,600,000","$459,005,868","$1,403,013,963",2015
4,5,2017-12-15,Star Wars Ep. VIII: The Last Jedi,"$317,000,000","$620,181,382","$1,316,721,747",2017


In [139]:
mbm['release_year'] = mbm['release_date'].dt.year
#creating a release year to merge data with bmg

AttributeError: Can only use .dt accessor with datetimelike values

In [137]:
mb.head()

Unnamed: 0,id,release_date,movie,production_budget,domestic_gross,worldwide_gross
0,1,"Dec 18, 2009",Avatar,"$425,000,000","$760,507,625","$2,776,345,279"
1,2,"May 20, 2011",Pirates of the Caribbean: On Stranger Tides,"$410,600,000","$241,063,875","$1,045,663,875"
2,3,"Jun 7, 2019",Dark Phoenix,"$350,000,000","$42,762,350","$149,762,350"
3,4,"May 1, 2015",Avengers: Age of Ultron,"$330,600,000","$459,005,868","$1,403,013,963"
4,5,"Dec 15, 2017",Star Wars Ep. VIII: The Last Jedi,"$317,000,000","$620,181,382","$1,316,721,747"


In [118]:
print(mbm['release_date'].dtype)

object


In [113]:
bmg.head()

Unnamed: 0,title,studio,domestic_gross,foreign_gross,year
0,Toy Story 3,BV,415000000.0,652000000.0,2010
1,Alice in Wonderland (2010),BV,334200000.0,691300000.0,2010
2,Harry Potter and the Deathly Hallows Part 1,WB,296000000.0,664300000.0,2010
3,Inception,WB,292600000.0,535700000.0,2010
4,Shrek Forever After,P/DW,238700000.0,513900000.0,2010


In [116]:
print(bmg['year'].dtype)

int64


# Exploring the Movie Gross file

In [50]:
bmg.info()
# looks like foreign_gross is a string when it should be an integer

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3387 entries, 0 to 3386
Data columns (total 5 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   title           3387 non-null   object 
 1   studio          3382 non-null   object 
 2   domestic_gross  3359 non-null   float64
 3   foreign_gross   2037 non-null   object 
 4   year            3387 non-null   int64  
dtypes: float64(1), int64(1), object(3)
memory usage: 132.4+ KB


In [18]:
bmg.isna().sum()
#foreign gross has a ton of null values

title                0
studio               5
domestic_gross      28
foreign_gross     1350
year                 0
dtype: int64

In [66]:
bmg.notna().sum()

title             3387
studio            3382
domestic_gross    3359
foreign_gross     2037
year              3387
dtype: int64

In [75]:
bmg.sort_values(by='foreign_gross', ascending=False).head(20)

Unnamed: 0,title,studio,domestic_gross,foreign_gross,year
328,Harry Potter and the Deathly Hallows Part 2,WB,381000000.0,960500000.0,2011
1875,Avengers: Age of Ultron,BV,459000000.0,946400000.0,2015
727,Marvel's The Avengers,BV,623400000.0,895500000.0,2012
3081,Jurassic World: Fallen Kingdom,Uni.,417700000.0,891800000.0,2018
1127,Frozen,BV,400700000.0,875700000.0,2013
2764,Wolf Warrior 2,HC,2700000.0,867600000.0,2017
1477,Transformers: Age of Extinction,Par.,245400000.0,858600000.0,2014
1876,Minions,Uni.,336000000.0,823400000.0,2015
3083,Aquaman,WB,335100000.0,812700000.0,2018
1128,Iron Man 3,BV,409000000.0,805800000.0,2013


In [76]:
bmg['studio']

0               BV
1               BV
2               WB
3               WB
4             P/DW
           ...    
3382         Magn.
3383            FM
3384          Sony
3385    Synergetic
3386         Grav.
Name: studio, Length: 3387, dtype: object

# Exploring Movie Budgets file

In [77]:
mb.head()

Unnamed: 0,id,release_date,movie,production_budget,domestic_gross,worldwide_gross
0,1,"Dec 18, 2009",Avatar,"$425,000,000","$760,507,625","$2,776,345,279"
1,2,"May 20, 2011",Pirates of the Caribbean: On Stranger Tides,"$410,600,000","$241,063,875","$1,045,663,875"
2,3,"Jun 7, 2019",Dark Phoenix,"$350,000,000","$42,762,350","$149,762,350"
3,4,"May 1, 2015",Avengers: Age of Ultron,"$330,600,000","$459,005,868","$1,403,013,963"
4,5,"Dec 15, 2017",Star Wars Ep. VIII: The Last Jedi,"$317,000,000","$620,181,382","$1,316,721,747"


In [78]:
m.head()

Unnamed: 0.1,Unnamed: 0,genre_ids,id,original_language,original_title,popularity,release_date,title,vote_average,vote_count
0,0,"[12, 14, 10751]",12444,en,Harry Potter and the Deathly Hallows: Part 1,33.533,2010-11-19,Harry Potter and the Deathly Hallows: Part 1,7.7,10788
1,1,"[14, 12, 16, 10751]",10191,en,How to Train Your Dragon,28.734,2010-03-26,How to Train Your Dragon,7.7,7610
2,2,"[12, 28, 878]",10138,en,Iron Man 2,28.515,2010-05-07,Iron Man 2,6.8,12368
3,3,"[16, 35, 10751]",862,en,Toy Story,28.005,1995-11-22,Toy Story,7.9,10174
4,4,"[28, 878, 12]",27205,en,Inception,27.92,2010-07-16,Inception,8.3,22186


In [97]:
bmg.head(10)

Unnamed: 0_level_0,studio,domestic_gross,foreign_gross,year
title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Toy Story 3,BV,415000000.0,652000000.0,2010
Alice in Wonderland (2010),BV,334200000.0,691300000.0,2010
Harry Potter and the Deathly Hallows Part 1,WB,296000000.0,664300000.0,2010
Inception,WB,292600000.0,535700000.0,2010
Shrek Forever After,P/DW,238700000.0,513900000.0,2010
The Twilight Saga: Eclipse,Sum.,300500000.0,398000000.0,2010
Iron Man 2,Par.,312400000.0,311500000.0,2010
Tangled,BV,200800000.0,391000000.0,2010
Despicable Me,Uni.,251500000.0,291600000.0,2010
How to Train Your Dragon,P/DW,217600000.0,277300000.0,2010


In [86]:
mi.head(20)

Unnamed: 0,id,synopsis,rating,genre,director,writer,theater_date,dvd_date,currency,box_office,runtime,studio
0,1,"This gritty, fast-paced, and innovative police...",R,Action and Adventure|Classics|Drama,William Friedkin,Ernest Tidyman,"Oct 9, 1971","Sep 25, 2001",,,104 minutes,
1,3,"New York City, not-too-distant-future: Eric Pa...",R,Drama|Science Fiction and Fantasy,David Cronenberg,David Cronenberg|Don DeLillo,"Aug 17, 2012","Jan 1, 2013",$,600000.0,108 minutes,Entertainment One
2,5,Illeana Douglas delivers a superb performance ...,R,Drama|Musical and Performing Arts,Allison Anders,Allison Anders,"Sep 13, 1996","Apr 18, 2000",,,116 minutes,
3,6,Michael Douglas runs afoul of a treacherous su...,R,Drama|Mystery and Suspense,Barry Levinson,Paul Attanasio|Michael Crichton,"Dec 9, 1994","Aug 27, 1997",,,128 minutes,
4,7,,NR,Drama|Romance,Rodney Bennett,Giles Cooper,,,,,200 minutes,
5,8,The year is 1942. As the Allies unite overseas...,PG,Drama|Kids and Family,Jay Russell,Gail Gilchriest,"Mar 3, 2000","Jul 11, 2000",,,95 minutes,Warner Bros. Pictures
6,10,Some cast and crew from NBC's highly acclaimed...,PG-13,Comedy,Jake Kasdan,Mike White,"Jan 11, 2002","Jun 18, 2002",$,41032915.0,82 minutes,Paramount Pictures
7,13,"Stewart Kane, an Irishman living in the Austra...",R,Drama,Ray Lawrence,Raymond Carver|Beatrix Christian,"Apr 27, 2006","Oct 2, 2007",$,224114.0,123 minutes,Sony Pictures Classics
8,14,"""Love Ranch"" is a bittersweet love story that ...",R,Drama,Taylor Hackford,Mark Jacobson,"Jun 30, 2010","Nov 9, 2010",$,134904.0,117 minutes,
9,15,When a diamond expedition in the Congo is lost...,PG-13,Action and Adventure|Mystery and Suspense|Scie...,Frank Marshall,John Patrick Shanley,"Jun 9, 1995","Jul 27, 1999",,,108 minutes,


# Combining Data: bmg, mb, m

In [150]:
# tried this out mbm = m.merge(mb, how = 'left', left_on =['original_title', 'release_date'], right_on =['movie', 'release_date'])

In [151]:
#tried this out csvmerge = mbm.merge(bmg, how = 'left', left_on =['original_title', 'release_year'], right_on =['title', 'year'])

In [175]:
csvmerge =  m.merge(mb, how = 'left', 
                    left_on =['original_title', 'release_year'], 
                    right_on =['movie', 'release_year']).merge(
                        bmg, how = 'left', 
                        left_on =['original_title', 'release_year'], 
                        right_on =['title', 'year'])

In [191]:
print(mb.count())
#print(mb.count())
#print(bmg.count())
csvmerge.count()
#checking to see if merge worked ok and how many rows merged well together

id                   5782
release_date         5782
movie                5782
production_budget    5782
domestic_gross       5782
worldwide_gross      5782
release_year         5782
dtype: int64


Unnamed: 0           26517
genre_ids            26517
id_x                 26517
original_language    26517
original_title       26517
popularity           26517
release_date_x       26517
title_x              26517
vote_average         26517
vote_count           26517
release_year         26517
id_y                  1759
release_date_y        1759
movie                 1759
production_budget     1759
domestic_gross_x      1759
worldwide_gross       1759
title_y               1985
studio                1984
domestic_gross_y      1975
foreign_gross         1400
year                  1985
dtype: int64

In [177]:
csvmerge

Unnamed: 0.1,Unnamed: 0,genre_ids,id_x,original_language,original_title,popularity,release_date_x,title_x,vote_average,vote_count,...,release_date_y,movie,production_budget,domestic_gross_x,worldwide_gross,title_y,studio,domestic_gross_y,foreign_gross,year
0,0,"[12, 14, 10751]",12444,en,Harry Potter and the Deathly Hallows: Part 1,33.533,2010-11-19,Harry Potter and the Deathly Hallows: Part 1,7.7,10788,...,NaT,,,,,,,,,
1,1,"[14, 12, 16, 10751]",10191,en,How to Train Your Dragon,28.734,2010-03-26,How to Train Your Dragon,7.7,7610,...,2010-03-26,How to Train Your Dragon,"$165,000,000","$217,581,232","$494,870,992",How to Train Your Dragon,P/DW,217600000.0,277300000.0,2010.0
2,2,"[12, 28, 878]",10138,en,Iron Man 2,28.515,2010-05-07,Iron Man 2,6.8,12368,...,2010-05-07,Iron Man 2,"$170,000,000","$312,433,331","$621,156,389",Iron Man 2,Par.,312400000.0,311500000.0,2010.0
3,3,"[16, 35, 10751]",862,en,Toy Story,28.005,1995-11-22,Toy Story,7.9,10174,...,1995-11-22,Toy Story,"$30,000,000","$191,796,233","$364,545,516",,,,,
4,4,"[28, 878, 12]",27205,en,Inception,27.920,2010-07-16,Inception,8.3,22186,...,2010-07-16,Inception,"$160,000,000","$292,576,195","$835,524,642",Inception,WB,292600000.0,535700000.0,2010.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
26512,26512,"[27, 18]",488143,en,Laboratory Conditions,0.600,2018-10-13,Laboratory Conditions,0.0,1,...,NaT,,,,,,,,,
26513,26513,"[18, 53]",485975,en,_EXHIBIT_84xxx_,0.600,2018-05-01,_EXHIBIT_84xxx_,0.0,1,...,NaT,,,,,,,,,
26514,26514,"[14, 28, 12]",381231,en,The Last One,0.600,2018-10-01,The Last One,0.0,1,...,NaT,,,,,,,,,
26515,26515,"[10751, 12, 28]",366854,en,Trailer Made,0.600,2018-06-22,Trailer Made,0.0,1,...,NaT,,,,,,,,,


In [178]:
csvmerge.tail()

Unnamed: 0.1,Unnamed: 0,genre_ids,id_x,original_language,original_title,popularity,release_date_x,title_x,vote_average,vote_count,...,release_date_y,movie,production_budget,domestic_gross_x,worldwide_gross,title_y,studio,domestic_gross_y,foreign_gross,year
26512,26512,"[27, 18]",488143,en,Laboratory Conditions,0.6,2018-10-13,Laboratory Conditions,0.0,1,...,NaT,,,,,,,,,
26513,26513,"[18, 53]",485975,en,_EXHIBIT_84xxx_,0.6,2018-05-01,_EXHIBIT_84xxx_,0.0,1,...,NaT,,,,,,,,,
26514,26514,"[14, 28, 12]",381231,en,The Last One,0.6,2018-10-01,The Last One,0.0,1,...,NaT,,,,,,,,,
26515,26515,"[10751, 12, 28]",366854,en,Trailer Made,0.6,2018-06-22,Trailer Made,0.0,1,...,NaT,,,,,,,,,
26516,26516,"[53, 27]",309885,en,The Church,0.6,2018-10-05,The Church,0.0,1,...,NaT,,,,,,,,,


# Exploring Combined CSV tables

In [179]:
merge_no_null = csvmerge
mnn = merge_no_null

In [201]:
mnn.notna().sum()

Unnamed: 0           1078
genre_ids            1078
id_x                 1078
original_language    1078
original_title       1078
popularity           1078
release_date_x       1078
title_x              1078
vote_average         1078
vote_count           1078
release_year         1078
id_y                 1078
release_date_y       1078
movie                1078
production_budget    1078
domestic_gross_x     1078
worldwide_gross      1078
title_y              1078
studio               1078
domestic_gross_y     1078
foreign_gross        1078
year                 1078
dtype: int64

In [186]:
mnn = mnn.dropna()

In [188]:
mnn.count()

Unnamed: 0           1078
genre_ids            1078
id_x                 1078
original_language    1078
original_title       1078
popularity           1078
release_date_x       1078
title_x              1078
vote_average         1078
vote_count           1078
release_year         1078
id_y                 1078
release_date_y       1078
movie                1078
production_budget    1078
domestic_gross_x     1078
worldwide_gross      1078
title_y              1078
studio               1078
domestic_gross_y     1078
foreign_gross        1078
year                 1078
dtype: int64

In [197]:
print(mnn['vote_count'].dtype)

int64


In [202]:
mnn.sort_values(by='vote_count', ascending=False).tail(20)

Unnamed: 0.1,Unnamed: 0,genre_ids,id_x,original_language,original_title,popularity,release_date_x,title_x,vote_average,vote_count,...,release_date_y,movie,production_budget,domestic_gross_x,worldwide_gross,title_y,studio,domestic_gross_y,foreign_gross,year
2954,2954,"[35, 18]",60422,en,Another Happy Day,5.473,2011-11-18,Another Happy Day,6.1,60,...,2011-11-18,Another Happy Day,"$4,000,000","$9,120","$978,527",Another Happy Day,P4,8500.0,347000.0,2011.0
3017,3017,"[18, 53, 80]",62728,en,Brighton Rock,4.749,2011-08-26,Brighton Rock,6.0,57,...,2011-08-26,Brighton Rock,"$11,000,000","$229,653","$229,653",Brighton Rock,IFC,230000.0,1600000.0,2011.0
401,401,[99],43942,en,Freakonomics,5.705,2010-10-01,Freakonomics,6.6,57,...,2010-10-01,Freakonomics,"$2,900,000","$101,270","$172,867",Freakonomics,Magn.,101000.0,16400.0,2010.0
446,446,[35],34563,en,Our Family Wedding,5.025,2010-03-12,Our Family Wedding,5.5,57,...,2010-03-12,Our Family Wedding,"$14,000,000","$20,255,281","$21,410,546",Our Family Wedding,FoxS,20300000.0,1200000.0,2010.0
25148,25148,"[28, 12, 16]",332718,en,Bilal: A New Breed of Hero,2.707,2018-02-02,Bilal: A New Breed of Hero,6.8,54,...,2018-02-02,Bilal: A New Breed of Hero,"$30,000,000","$490,973","$648,599",Bilal: A New Breed of Hero,VE,491000.0,1700000.0,2018.0
361,361,"[18, 14]",44638,en,The Tempest,6.3,2010-12-10,The Tempest,5.8,52,...,2010-12-10,The Tempest,"$20,000,000","$277,943","$277,943",The Tempest,Mira.,278000.0,68700.0,2010.0
444,444,[35],47088,en,The Extra Man,5.056,2010-08-01,The Extra Man,5.9,46,...,2010-07-30,The Extra Man,"$7,000,000","$453,377","$492,108",The Extra Man,Magn.,453000.0,4500.0,2010.0
673,673,"[18, 10749]",33870,en,Mao's Last Dancer,2.681,2010-08-20,Mao's Last Dancer,7.0,45,...,2010-08-20,Mao's Last Dancer,"$18,000,000","$4,806,750","$25,941,437",Mao's Last Dancer,Gold.,4800000.0,17500000.0,2010.0
451,451,"[18, 10751]",56601,en,The Perfect Game,4.958,2010-04-16,The Perfect Game,6.3,44,...,2010-04-16,The Perfect Game,"$12,500,000","$1,089,445","$3,931,367",The Perfect Game,IW,1000000.0,2800000.0,2010.0
3035,3035,"[18, 36]",58882,en,Snow Flower and the Secret Fan,4.519,2011-07-15,Snow Flower and the Secret Fan,5.9,39,...,2011-07-15,Snow Flower and the Secret Fan,"$6,000,000","$1,348,205","$11,348,205",Snow Flower and the Secret Fan,FoxS,1300000.0,10000000.0,2011.0


In [214]:
print(m['genre_ids'].value_counts())

[99]                         3700
[]                           2479
[18]                         2268
[35]                         1660
[27]                         1145
                             ... 
[53, 18, 28, 9648]              1
[18, 36, 10752, 28]             1
[10770, 18, 10749, 10751]       1
[28, 14, 12, 35]                1
[18, 10752, 878]                1
Name: genre_ids, Length: 2477, dtype: int64


In [227]:
mnn.tail(10)

Unnamed: 0.1,Unnamed: 0,genre_ids,id_x,original_language,original_title,popularity,release_date_x,title_x,vote_average,vote_count,...,release_date_y,movie,production_budget,domestic_gross_x,worldwide_gross,title_y,studio,domestic_gross_y,foreign_gross,year
24048,24048,[18],413362,en,"Roman J. Israel, Esq.",12.688,2017-11-17,"Roman J. Israel, Esq.",6.2,499,...,2017-11-17,"Roman J. Israel, Esq.","$22,000,000","$11,962,712","$12,967,012","Roman J. Israel, Esq.",Sony,12000000.0,1100000.0,2017.0
24051,24051,"[12, 878, 10751, 14]",407451,en,A Wrinkle in Time,12.529,2018-03-09,A Wrinkle in Time,5.0,1073,...,2018-03-09,A Wrinkle in Time,"$103,000,000","$100,478,608","$133,401,882",A Wrinkle in Time,BV,100500000.0,32200000.0,2018.0
24054,24054,[35],354861,en,Father Figures,12.45,2017-12-22,Father Figures,5.8,277,...,2017-12-22,Father Figures,"$25,000,000","$17,501,244","$21,038,441",Father Figures,WB,17500000.0,8100000.0,2017.0
24076,24076,[36],476968,en,"Paul, Apostle of Christ",12.005,2018-03-28,"Paul, Apostle of Christ",7.1,98,...,2018-03-23,"Paul, Apostle of Christ","$5,000,000","$17,547,999","$25,529,498","Paul, Apostle of Christ",Affirm,17600000.0,5500000.0,2018.0
24089,24089,"[18, 36, 53]",453201,en,The 15:17 to Paris,11.576,2018-02-09,The 15:17 to Paris,5.3,799,...,2018-02-09,The 15:17 to Paris,"$30,000,000","$36,276,286","$56,096,200",The 15:17 to Paris,WB,36300000.0,20800000.0,2018.0
24120,24120,[35],474335,en,Uncle Drew,10.836,2018-06-29,Uncle Drew,6.5,220,...,2018-06-29,Uncle Drew,"$18,000,000","$42,469,946","$46,527,161",Uncle Drew,LG/S,42500000.0,4200000.0,2018.0
24128,24128,"[35, 18, 878]",301337,en,Downsizing,10.682,2017-12-22,Downsizing,5.1,1887,...,2017-12-22,Downsizing,"$68,000,000","$24,449,754","$54,462,971",Downsizing,Par.,24400000.0,30600000.0,2017.0
24212,24212,"[53, 28, 80]",442064,en,Proud Mary,9.371,2018-01-12,Proud Mary,5.5,259,...,2018-01-12,Proud Mary,"$30,000,000","$20,868,638","$21,709,539",Proud Mary,SGem,20900000.0,876000.0,2018.0
24283,24283,"[28, 35]",398177,en,Just Getting Started,8.459,2017-12-08,Just Getting Started,4.9,94,...,2017-12-08,Just Getting Started,"$22,000,000","$6,069,605","$6,756,412",Just Getting Started,BG,6100000.0,1600000.0,2017.0
25148,25148,"[28, 12, 16]",332718,en,Bilal: A New Breed of Hero,2.707,2018-02-02,Bilal: A New Breed of Hero,6.8,54,...,2018-02-02,Bilal: A New Breed of Hero,"$30,000,000","$490,973","$648,599",Bilal: A New Breed of Hero,VE,491000.0,1700000.0,2018.0


Unnamed: 0.1,Unnamed: 0,genre_ids,id,original_language,original_title,popularity,release_date,title,vote_average,vote_count,release_year
0,0,"[12, 14, 10751]",12444,en,Harry Potter and the Deathly Hallows: Part 1,33.533,2010-11-19,Harry Potter and the Deathly Hallows: Part 1,7.7,10788,2010
1,1,"[14, 12, 16, 10751]",10191,en,How to Train Your Dragon,28.734,2010-03-26,How to Train Your Dragon,7.7,7610,2010
2,2,"[12, 28, 878]",10138,en,Iron Man 2,28.515,2010-05-07,Iron Man 2,6.8,12368,2010
3,3,"[16, 35, 10751]",862,en,Toy Story,28.005,1995-11-22,Toy Story,7.9,10174,1995
4,4,"[28, 878, 12]",27205,en,Inception,27.920,2010-07-16,Inception,8.3,22186,2010
...,...,...,...,...,...,...,...,...,...,...,...
26512,26512,"[27, 18]",488143,en,Laboratory Conditions,0.600,2018-10-13,Laboratory Conditions,0.0,1,2018
26513,26513,"[18, 53]",485975,en,_EXHIBIT_84xxx_,0.600,2018-05-01,_EXHIBIT_84xxx_,0.0,1,2018
26514,26514,"[14, 28, 12]",381231,en,The Last One,0.600,2018-10-01,The Last One,0.0,1,2018
26515,26515,"[10751, 12, 28]",366854,en,Trailer Made,0.600,2018-06-22,Trailer Made,0.0,1,2018


In [236]:
m_genre_dict = {
'Action' :         28,
'Adventure'      : 12,
'Animation'   :    16,
'Comedy'      :    35,
'Crime'         :  80,
'Documentary'    : 99,
'Drama'       :    18,
'Family'      :    10751,
'Fantasy'    :     14,
'History'    :     36,
'Horror'     :     27,
'Music'      :     10402,
'Mystery'        : 9648,
'Romance'      :   10749,
'Science Fiction' : 878,
'TV Movie'      :  10770,
'Thriller'    :   53,
'War'        :     10752,
'Western'     :    37}

In [240]:
swapped = {}
for key, value in m_genre_dict.items():
    swapped[value] = key
    
swapped

{28: 'Action',
 12: 'Adventure',
 16: 'Animation',
 35: 'Comedy',
 80: 'Crime',
 99: 'Documentary',
 18: 'Drama',
 10751: 'Family',
 14: 'Fantasy',
 36: 'History',
 27: 'Horror',
 10402: 'Music',
 9648: 'Mystery',
 10749: 'Romance',
 878: 'Science Fiction',
 10770: 'TV Movie',
 53: 'Thriller',
 10752: 'War',
 37: 'Western'}

In [None]:
def genre_ids_to_genres()
    listforgenres = []
    for key, value in swapped
        if value = 
        