In [1]:
import os
import requests as rq
import pandas as pd
import numpy as np
import json


In [2]:
# the filepath for each data file
wiki_movie_file = os.path.join('Data', 'wikipedia-movies.json')
ratings_file = os.path.join('Data', 'ratings.csv')
movie_meta_data_file = os.path.join('Data', 'movies_metadata.csv')


In [3]:
# Load the files
with open(wiki_movie_file, 'r') as file1:
    raw_movie_json = json.load(file1)
    file1.close()

# file 2, not cleaning this one yet
# raw_ratings_df = pd.read_csv(ratings_file, sep=',', header=0)

# file 3, not cleaning this one yet
# raw_meta_data_df = pd.read_csv(movie_meta_data_file, sep=',', low_memory=False)


In [4]:
# Explore the data, commenting results
raw_movie_json # Results: list of dicts, each movie is it's own dict
len(raw_movie_json) # Result: 7311 Movies
len(raw_movie_json[10].keys()) # changing the index looking for number of keys; 10 random indexes Results: 17 to 22 keys
raw_movie_df = pd.DataFrame(raw_movie_json) # Results: 7311 rows × 193 columns with lots of Nan fields, not every movie has the same "22" keys
raw_movie_df

Unnamed: 0,url,year,imdb_link,title,Directed by,Produced by,Screenplay by,Story by,Based on,Starring,...,Predecessor,Founders,Area served,Products,Services,Russian,Hebrew,Revenue,Operating income,Polish
0,https://en.wikipedia.org/wiki/The_Adventures_o...,1990.0,https://www.imdb.com/title/tt0098987/,The Adventures of Ford Fairlane,Renny Harlin,"[Steve Perry, Joel Silver]","[David Arnott, James Cappe, Daniel Waters]","[David Arnott, James Cappe]","[Characters, by Rex Weiner]","[Andrew Dice Clay, Wayne Newton, Priscilla Pre...",...,,,,,,,,,,
1,"https://en.wikipedia.org/wiki/After_Dark,_My_S...",1990.0,https://www.imdb.com/title/tt0098994/,"After Dark, My Sweet",James Foley,"[Ric Kidney, Robert Redlin]","[James Foley, Robert Redlin]",,"[the novel, After Dark, My Sweet, by, Jim Thom...","[Jason Patric, Rachel Ward, Bruce Dern, George...",...,,,,,,,,,,
2,https://en.wikipedia.org/wiki/Air_America_(film),1990.0,https://www.imdb.com/title/tt0099005/,Air America,Roger Spottiswoode,Daniel Melnick,"[John Eskow, Richard Rush]",,"[Air America, by, Christopher Robbins]","[Mel Gibson, Robert Downey Jr., Nancy Travis, ...",...,,,,,,,,,,
3,https://en.wikipedia.org/wiki/Alice_(1990_film),1990.0,https://www.imdb.com/title/tt0099012/,Alice,Woody Allen,Robert Greenhut,,,,"[Alec Baldwin, Blythe Danner, Judy Davis, Mia ...",...,,,,,,,,,,
4,https://en.wikipedia.org/wiki/Almost_an_Angel,1990.0,https://www.imdb.com/title/tt0099018/,Almost an Angel,John Cornell,John Cornell,,,,"[Paul Hogan, Elias Koteas, Linda Kozlowski]",...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7306,https://en.wikipedia.org/wiki/Holmes_%26_Watson,2018.0,https://www.imdb.com/title/tt1255919/,Holmes & Watson,Etan Cohen,"[Will Ferrell, Adam McKay, Jimmy Miller, Clayt...",Etan Cohen,,"[Sherlock Holmes, and, Dr. Watson, by, Sir Art...","[Will Ferrell, John C. Reilly, Rebecca Hall, R...",...,,,,,,,,,,
7307,https://en.wikipedia.org/wiki/Vice_(2018_film),2018.0,https://www.imdb.com/title/tt6266538/,Vice,Adam McKay,"[Brad Pitt, Dede Gardner, Jeremy Kleiner, Kevi...",,,,"[Christian Bale, Amy Adams, Steve Carell, Sam ...",...,,,,,,,,,,
7308,https://en.wikipedia.org/wiki/On_the_Basis_of_Sex,2018.0,https://www.imdb.com/title/tt4669788/,On the Basis of Sex,Mimi Leder,Robert W. Cort,,,,"[Felicity Jones, Armie Hammer, Justin Theroux,...",...,,,,,,,,,,
7309,https://en.wikipedia.org/wiki/Destroyer_(2018_...,2018.0,https://www.imdb.com/title/tt7137380/,Destroyer,Karyn Kusama,"[Fred Berger, Phil Hay, Matt Manfredi]",,,,"[Nicole Kidman, Sebastian Stan, Toby Kebbell, ...",...,,,,,,,,,,


In [5]:
# Explore the data, commenting results
raw_movie_df.info() # Result: too much data to display any granular info about the columns
raw_movie_df.isnull().sum() # Result: 10 columns visible, Null values seem to range from 158 to 7310; find the minimum number
min(raw_movie_df.isnull().sum())  # Results: 158 is the minimum, url and year columns are the lowest
raw_movie_df.keys().to_list() # Results: column names suggest more than movies; tv shows, people, and possibly books also.


['url',
 'year',
 'imdb_link',
 'title',
 'Directed by',
 'Produced by',
 'Screenplay by',
 'Story by',
 'Based on',
 'Starring',
 'Narrated by',
 'Music by',
 'Cinematography',
 'Edited by',
 'Productioncompany ',
 'Distributed by',
 'Release date',
 'Running time',
 'Country',
 'Language',
 'Budget',
 'Box office',
 'Written by',
 'Genre',
 'Theme music composer',
 'Country of origin',
 'Original language(s)',
 'Producer(s)',
 'Editor(s)',
 'Production company(s)',
 'Original network',
 'Original release',
 'Productioncompanies ',
 'Executive producer(s)',
 'Production location(s)',
 'Distributor',
 'Picture format',
 'Audio format',
 'Voices of',
 'Followed by',
 'Composer(s)',
 'Created by',
 'Also known as',
 'Opening theme',
 'No. of episodes',
 'Preceded by',
 'Author',
 'Publisher',
 'Publication date',
 'Media type',
 'Pages',
 'ISBN',
 'OCLC',
 'LC Class',
 'Cover artist',
 'Series',
 'Set in',
 'Adaptation by',
 'Suggested by',
 'Biographical data',
 'Born',
 'Died',
 'Resti

In [6]:
# select for records with an imdb link, 'Director' or 'Directed by', and not references to tv shows.
cleaning_wiki = [movie for movie in raw_movie_json if (('imdb_link' and ('Directed by' or 'Director')) in movie.keys()) and ('No. of episodes' not in movie.keys())]
cleaning_df = pd.DataFrame(cleaning_wiki)
cleaning_df.info()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7099 entries, 0 to 7098
Data columns (total 74 columns):
 #   Column                  Non-Null Count  Dtype 
---  ------                  --------------  ----- 
 0   url                     7099 non-null   object
 1   year                    7099 non-null   int64 
 2   imdb_link               7074 non-null   object
 3   title                   7098 non-null   object
 4   Directed by             7099 non-null   object
 5   Produced by             6737 non-null   object
 6   Screenplay by           2323 non-null   object
 7   Story by                1004 non-null   object
 8   Based on                2196 non-null   object
 9   Starring                6913 non-null   object
 10  Narrated by             283 non-null    object
 11  Music by                6502 non-null   object
 12  Cinematography          6396 non-null   object
 13  Edited by               6398 non-null   object
 14  Productioncompany       4539 non-null   object
 15  Dist

In [7]:
# # lots of the columns are languages:  "Arabic", "Japanese", "Mandarin", "Polish", "Yiddish", "Romanized"
cleaning_df[cleaning_df['Arabic'].notnull()]['Arabic']

6856    قضية رقم ٢٣
7081      کفرناحوم‎
Name: Arabic, dtype: object

In [8]:
# staring to define a function to clean the data. troubleshooting and ajustments were decided in the cell below this one.
def cleaning_movie(movie):
    """wiki_movie_file is a json file containing a list of dictionaries with each index as a different movie 
    with different key:value pairs. This function is specially designed to clean that specific dataset. 
    Wikipedia is managed by many people and they do not all use the same words.
    """
    alternate_titles = dict()
    fixed_movie = dict(movie)
###  Start with alternate titles stored in language keys and merge them into one.
###  Language keys i could find:      
    language_keys = ['Also known as','Arabic','Cantonese','Chinese','French', 'Hangul','Hebrew','Hepburn','Japanese','Literally',
        'Mandarin','McCune–Reischauer','Original title','Polish', 'Revised Romanization','Romanized','Russian',
        'Simplified','Traditional','Yiddish']
    for key in language_keys:
        if key in fixed_movie.keys():
            alternate_titles[key] = fixed_movie[key]
            fixed_movie.pop(key)
        else:
            pass
##  if there were alternate titles, add them to the movie.       
    if len(alternate_titles) > 0:
        fixed_movie['alternate_titles'] = alternate_titles
    else:
        pass
    return fixed_movie



In [9]:
## using this cell to debug the above function and decide on key(s) to keep, merge, and delete. 
## Starting with the alternate titles hidden in the language keys.
language_keys1 = ['Also known as','Arabic','Cantonese','Chinese','French', 'Hangul','Hebrew','Hepburn','Japanese','Literally',
        'Mandarin','McCune–Reischauer','Original title','Polish', 'Revised Romanization','Romanized','Russian',
        'Simplified','Traditional','Yiddish']

cleaning_wiki2 = [cleaning_movie(movie) for movie in cleaning_wiki]

## find a column to remove, add it to the list, check the columns again.
columns = sorted(pd.DataFrame(cleaning_wiki2).columns.to_list())
columns


['Actor control',
 'Adaptation by',
 'Animation by',
 'Audio format',
 'Based on',
 'Box office',
 'Budget',
 'Cinematography',
 'Color process',
 'Composer(s)',
 'Country',
 'Country of origin',
 'Created by',
 'Directed by',
 'Distributed by',
 'Distributor',
 'Edited by',
 'Editor(s)',
 'Engine(s)',
 'Executive producer(s)',
 'Followed by',
 'Format(s)',
 'Genre',
 'Genre(s)',
 'Language',
 'Music by',
 'Narrated by',
 'Original language(s)',
 'Original network',
 'Original release',
 'Picture format',
 'Preceded by',
 'Produced by',
 'Producer(s)',
 'Production company',
 'Production company(s)',
 'Production location(s)',
 'Productioncompanies ',
 'Productioncompany ',
 'Release date',
 'Release(s)',
 'Running time',
 'Screen story by',
 'Screenplay by',
 'Starring',
 'Story by',
 'Suggested by',
 'Theme music composer',
 'Voices of',
 'Written by',
 'alternate_titles',
 'imdb_link',
 'title',
 'url',
 'year']

In [10]:
## Where I am at for this point. 
pd.DataFrame(cleaning_wiki2).info()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7099 entries, 0 to 7098
Data columns (total 55 columns):
 #   Column                  Non-Null Count  Dtype 
---  ------                  --------------  ----- 
 0   url                     7099 non-null   object
 1   year                    7099 non-null   int64 
 2   imdb_link               7074 non-null   object
 3   title                   7098 non-null   object
 4   Directed by             7099 non-null   object
 5   Produced by             6737 non-null   object
 6   Screenplay by           2323 non-null   object
 7   Story by                1004 non-null   object
 8   Based on                2196 non-null   object
 9   Starring                6913 non-null   object
 10  Narrated by             283 non-null    object
 11  Music by                6502 non-null   object
 12  Cinematography          6396 non-null   object
 13  Edited by               6398 non-null   object
 14  Productioncompany       4539 non-null   object
 15  Dist

In [11]:
def cleaning_movie(movie):
    """wiki_movie_file is a json file containing a list of dictionaries with each index as a different movie 
    with different key:value pairs. This function is specially designed to clean that specific dataset. 
    Wikipedia is managed by many people and they do not all use the same words.
    """
    alternate_titles = dict()
    fixed_movie = dict(movie)
###  Start with alternate titles stored in language keys and merge them into one.
###  Language keys i could find:      
    language_keys = ['Also known as','Arabic','Cantonese','Chinese','French', 'Hangul','Hebrew','Hepburn','Japanese','Literally',
        'Mandarin','McCune–Reischauer','Original title','Polish', 'Revised Romanization','Romanized','Russian',
        'Simplified','Traditional','Yiddish']
    for key in language_keys:
        if key in fixed_movie.keys():
            alternate_titles[key] = fixed_movie[key]
            fixed_movie.pop(key)
        else:
            pass
##  if there were alternate titles, add them to the movie.       
    if len(alternate_titles) > 0:
        fixed_movie['alternate_titles'] = alternate_titles
    else:
        pass
        
###  Alternative titles are fixed, now merge columns that are similar.     
    keys_to_merge = {'Director':'Directed by', 'Country': 'Country of origin', 'Distributor(s)':'Distributed by',
                     'Editor(s)':'Edited by',  'Language':'Original language(s)', 'Producer(s)':'Produced by',
                     'Genre(s)': 'Genre', 'Composer(s)': ['Music by', 'Theme music composer'],
                     'Release date': ['Release(s)', 'Original release'], 'Distributor(s)':['Distributed by','Distributor'],
                     'Writer(s)':['Written by', 'Story by', 'Screenplay by', 'Screen story by', 'Adaptation by'],
                     'Production Comapany': ['Production company', 'Production company(s)', 'Productioncompanies ','Productioncompany ']  }
##  item = key, from the key:value pair and the key I want; values = the movie key(s) I do not want. 
    for item in keys_to_merge: 
        if type(keys_to_merge[item]) == type(list()):
            for n in keys_to_merge[item]:
                if n in fixed_movie.keys():
                    fixed_movie[item] = fixed_movie.pop(n)
                else:
                    pass
        else: 
            if keys_to_merge[item] in fixed_movie.keys():
                fixed_movie[item] = fixed_movie.pop(keys_to_merge[item])
            else:
                pass
    
    return fixed_movie




In [12]:
## using this cell to debug the above function and decide on key(s) to keep, merge, and delete. 
## keys that are the same idea need merged, key = the key I am keeping, Value = similar key(s).
keys_to_merge1 = {'Director':'Directed by', 'Country': 'Country of origin', 'Distributor(s)':'Distributed by',
                 'Editor(s)':'Edited by',  'Language':'Original language(s)', 'Producer(s)':'Produced by',
                 'Genre(s)': 'Genre', 'Composer(s)': ['Music by', 'Theme music composer'],
                 'Release date': ['Release(s)', 'Original release'], 'Distributor(s)':['Distributed by','Distributor'],
                 'Writer(s)':['Written by', 'Story by', 'Screenplay by', 'Screen story by', 'Adaptation by'],
                 'Production Comapany': ['Production company', 'Production company(s)', 'Productioncompanies ','Productioncompany ']  }


cleaning_wiki3 = [cleaning_movie(movie) for movie in cleaning_wiki]

## find common columns, add them to the dictionary and check the columns again.
columns = sorted(pd.DataFrame(cleaning_wiki3).columns.to_list())
columns



['Actor control',
 'Animation by',
 'Audio format',
 'Based on',
 'Box office',
 'Budget',
 'Cinematography',
 'Color process',
 'Composer(s)',
 'Country',
 'Created by',
 'Director',
 'Distributor(s)',
 'Editor(s)',
 'Engine(s)',
 'Executive producer(s)',
 'Followed by',
 'Format(s)',
 'Genre(s)',
 'Language',
 'Narrated by',
 'Original network',
 'Picture format',
 'Preceded by',
 'Producer(s)',
 'Production Comapany',
 'Production location(s)',
 'Release date',
 'Running time',
 'Starring',
 'Suggested by',
 'Voices of',
 'Writer(s)',
 'alternate_titles',
 'imdb_link',
 'title',
 'url',
 'year']

In [13]:
## Where I am at for this point. 
cleaning_wiki_df3 = pd.DataFrame(cleaning_wiki3)
cleaning_wiki_df3.info()
# Down to 38 columns from 193.

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7099 entries, 0 to 7098
Data columns (total 38 columns):
 #   Column                  Non-Null Count  Dtype 
---  ------                  --------------  ----- 
 0   url                     7099 non-null   object
 1   year                    7099 non-null   int64 
 2   imdb_link               7074 non-null   object
 3   title                   7098 non-null   object
 4   Based on                2196 non-null   object
 5   Starring                6913 non-null   object
 6   Narrated by             283 non-null    object
 7   Cinematography          6396 non-null   object
 8   Release date            7067 non-null   object
 9   Running time            6956 non-null   object
 10  Country                 6860 non-null   object
 11  Language                7014 non-null   object
 12  Budget                  4774 non-null   object
 13  Box office              5530 non-null   object
 14  Director                7099 non-null   object
 15  Dist

In [26]:
# extract the imdb id from the imdb link to link with other data sets.
cleaning_wiki_df3['imdb_link']   
pattern = r'(tt\d{7})'
cleaning_wiki_df3['imdb_id'] = cleaning_wiki_df3['imdb_link'].str.extract(pattern)

# checking for duplicate data
len(cleaning_wiki_df3['imdb_id'].unique()) # 7074 rows have imdb links, 7032 are unique. There are duplicate movies in the dataset.

7032

In [30]:
## Drop the duplicate rows
cleaning_wiki_df4 = pd.DataFrame(cleaning_wiki_df3.drop_duplicates(subset='imdb_id'))
cleaning_wiki_df4

Unnamed: 0,url,year,imdb_link,title,Based on,Starring,Narrated by,Cinematography,Release date,Running time,...,Created by,Preceded by,Suggested by,alternate_titles,Animation by,Color process,Engine(s),Actor control,Format(s),imdb_id
0,https://en.wikipedia.org/wiki/The_Adventures_o...,1990,https://www.imdb.com/title/tt0098987/,The Adventures of Ford Fairlane,"[Characters, by Rex Weiner]","[Andrew Dice Clay, Wayne Newton, Priscilla Pre...","Andrew ""Dice"" Clay",Oliver Wood,"[July 11, 1990, (, 1990-07-11, )]",102 minutes,...,,,,,,,,,,tt0098987
1,"https://en.wikipedia.org/wiki/After_Dark,_My_S...",1990,https://www.imdb.com/title/tt0098994/,"After Dark, My Sweet","[the novel, After Dark, My Sweet, by, Jim Thom...","[Jason Patric, Rachel Ward, Bruce Dern, George...",,Mark Plummer,"[May 17, 1990, (, 1990-05-17, ), (Cannes Film ...",114 minutes,...,,,,,,,,,,tt0098994
2,https://en.wikipedia.org/wiki/Air_America_(film),1990,https://www.imdb.com/title/tt0099005/,Air America,"[Air America, by, Christopher Robbins]","[Mel Gibson, Robert Downey Jr., Nancy Travis, ...",,Roger Deakins,"[August 10, 1990, (, 1990-08-10, )]",113 minutes,...,,,,,,,,,,tt0099005
3,https://en.wikipedia.org/wiki/Alice_(1990_film),1990,https://www.imdb.com/title/tt0099012/,Alice,,"[Alec Baldwin, Blythe Danner, Judy Davis, Mia ...",,Carlo Di Palma,"[December 25, 1990, (, 1990-12-25, )]",106 minutes,...,,,,,,,,,,tt0099012
4,https://en.wikipedia.org/wiki/Almost_an_Angel,1990,https://www.imdb.com/title/tt0099018/,Almost an Angel,,"[Paul Hogan, Elias Koteas, Linda Kozlowski]",,Russell Boyd,"December 19, 1990",95 minutes,...,,,,,,,,,,tt0099018
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7094,https://en.wikipedia.org/wiki/Holmes_%26_Watson,2018,https://www.imdb.com/title/tt1255919/,Holmes & Watson,"[Sherlock Holmes, and, Dr. Watson, by, Sir Art...","[Will Ferrell, John C. Reilly, Rebecca Hall, R...",,Oliver Wood,"[December 25, 2018, (, 2018-12-25, ), (United ...",90 minutes,...,,,,,,,,,,tt1255919
7095,https://en.wikipedia.org/wiki/Vice_(2018_film),2018,https://www.imdb.com/title/tt6266538/,Vice,,"[Christian Bale, Amy Adams, Steve Carell, Sam ...",,Greig Fraser,"[December 11, 2018, (, 2018-12-11, ), (, Samue...",132 minutes,...,,,,,,,,,,tt6266538
7096,https://en.wikipedia.org/wiki/On_the_Basis_of_Sex,2018,https://www.imdb.com/title/tt4669788/,On the Basis of Sex,,"[Felicity Jones, Armie Hammer, Justin Theroux,...",,Michael Grady,"[November 8, 2018, (, 2018-11-08, ), (, AFI Fe...",120 minutes,...,,,,,,,,,,tt4669788
7097,https://en.wikipedia.org/wiki/Destroyer_(2018_...,2018,https://www.imdb.com/title/tt7137380/,Destroyer,,"[Nicole Kidman, Sebastian Stan, Toby Kebbell, ...",,Julie Kirkwood,"[August 31, 2018, (, 2018-08-31, ), (, Telluri...",123 minutes,...,,,,,,,,,,tt7137380


In [37]:
# drop columns that are 90% or more null
columns_to_keep = [column for column in cleaning_wiki_df4 if cleaning_wiki_df4[column].isnull().sum() < (len(cleaning_wiki_df4['url'])*0.9)]
cleaning_wiki_df5 = cleaning_wiki_df4[columns_to_keep]
# down to 21 useful columns; from 193, where 75-80% of the cells were null Values.

Unnamed: 0,url,year,imdb_link,title,Based on,Starring,Cinematography,Release date,Running time,Country,...,Budget,Box office,Director,Distributor(s),Editor(s),Producer(s),Composer(s),Writer(s),Production Comapany,imdb_id
0,https://en.wikipedia.org/wiki/The_Adventures_o...,1990,https://www.imdb.com/title/tt0098987/,The Adventures of Ford Fairlane,"[Characters, by Rex Weiner]","[Andrew Dice Clay, Wayne Newton, Priscilla Pre...",Oliver Wood,"[July 11, 1990, (, 1990-07-11, )]",102 minutes,United States,...,$20 million,$21.4 million,Renny Harlin,20th Century Fox,Michael Tronick,"[Steve Perry, Joel Silver]","[Cliff Eidelman, Yello]","[David Arnott, James Cappe, Daniel Waters]",Silver Pictures,tt0098987
1,"https://en.wikipedia.org/wiki/After_Dark,_My_S...",1990,https://www.imdb.com/title/tt0098994/,"After Dark, My Sweet","[the novel, After Dark, My Sweet, by, Jim Thom...","[Jason Patric, Rachel Ward, Bruce Dern, George...",Mark Plummer,"[May 17, 1990, (, 1990-05-17, ), (Cannes Film ...",114 minutes,United States,...,$6 million,$2.7 million,James Foley,Avenue Pictures,Howard E. Smith,"[Ric Kidney, Robert Redlin]",Maurice Jarre,"[James Foley, Robert Redlin]",Avenue Pictures,tt0098994
2,https://en.wikipedia.org/wiki/Air_America_(film),1990,https://www.imdb.com/title/tt0099005/,Air America,"[Air America, by, Christopher Robbins]","[Mel Gibson, Robert Downey Jr., Nancy Travis, ...",Roger Deakins,"[August 10, 1990, (, 1990-08-10, )]",113 minutes,United States,...,$35 million,"$57,718,089",Roger Spottiswoode,TriStar Pictures,"[John Bloom, Lois Freeman-Fox]",Daniel Melnick,Charles Gross,"[John Eskow, Richard Rush]","[Carolco Pictures, IndieProd Company]",tt0099005
3,https://en.wikipedia.org/wiki/Alice_(1990_film),1990,https://www.imdb.com/title/tt0099012/,Alice,,"[Alec Baldwin, Blythe Danner, Judy Davis, Mia ...",Carlo Di Palma,"[December 25, 1990, (, 1990-12-25, )]",106 minutes,United States,...,$12 million,"$7,331,647",Woody Allen,Orion Pictures,Susan E. Morse,Robert Greenhut,,Woody Allen,,tt0099012
4,https://en.wikipedia.org/wiki/Almost_an_Angel,1990,https://www.imdb.com/title/tt0099018/,Almost an Angel,,"[Paul Hogan, Elias Koteas, Linda Kozlowski]",Russell Boyd,"December 19, 1990",95 minutes,US,...,$25 million,"$6,939,946 (USA)",John Cornell,Paramount Pictures,David Stiven,John Cornell,Maurice Jarre,Paul Hogan,,tt0099018
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7094,https://en.wikipedia.org/wiki/Holmes_%26_Watson,2018,https://www.imdb.com/title/tt1255919/,Holmes & Watson,"[Sherlock Holmes, and, Dr. Watson, by, Sir Art...","[Will Ferrell, John C. Reilly, Rebecca Hall, R...",Oliver Wood,"[December 25, 2018, (, 2018-12-25, ), (United ...",90 minutes,United States,...,$42 million,$41.9 million,Etan Cohen,Sony Pictures Releasing,Dean Zimmerman,"[Will Ferrell, Adam McKay, Jimmy Miller, Clayt...",Mark Mothersbaugh,Etan Cohen,"[Columbia Pictures, Gary Sanchez Productions, ...",tt1255919
7095,https://en.wikipedia.org/wiki/Vice_(2018_film),2018,https://www.imdb.com/title/tt6266538/,Vice,,"[Christian Bale, Amy Adams, Steve Carell, Sam ...",Greig Fraser,"[December 11, 2018, (, 2018-12-11, ), (, Samue...",132 minutes,United States,...,$60 million,$76.1 million,Adam McKay,Mirror Releasing,Hank Corwin,"[Brad Pitt, Dede Gardner, Jeremy Kleiner, Kevi...",Nicholas Britell,Adam McKay,"[Plan B Entertainment, Gary Sanchez Production...",tt6266538
7096,https://en.wikipedia.org/wiki/On_the_Basis_of_Sex,2018,https://www.imdb.com/title/tt4669788/,On the Basis of Sex,,"[Felicity Jones, Armie Hammer, Justin Theroux,...",Michael Grady,"[November 8, 2018, (, 2018-11-08, ), (, AFI Fe...",120 minutes,United States,...,$20 million,$38.4 million,Mimi Leder,Focus Features,Michelle Tesoro,Robert W. Cort,Mychael Danna,Daniel Stiepleman,"[Focus Features, [1], Participant Media, [1], ...",tt4669788
7097,https://en.wikipedia.org/wiki/Destroyer_(2018_...,2018,https://www.imdb.com/title/tt7137380/,Destroyer,,"[Nicole Kidman, Sebastian Stan, Toby Kebbell, ...",Julie Kirkwood,"[August 31, 2018, (, 2018-08-31, ), (, Telluri...",123 minutes,United States,...,$9 million,$5.5 million,Karyn Kusama,Mirror Releasing,Plummy Tucker,"[Fred Berger, Phil Hay, Matt Manfredi]",Theodore Shapiro,"[Phil Hay, Matt Manfredi]","[30West, Automatik Entertainment, Annapurna Pi...",tt7137380
