In [1]:
import json
import pandas as pd
import numpy as np

import re

from sqlalchemy import create_engine
import psycopg2

# from config import db_password
from config import db_password

import time

## Get the original Clean Movie Data

In [2]:
db_string = f"postgresql://postgres:{db_password}@127.0.0.1:5433/movie_data_original"
engine = create_engine(db_string)
original_df = pd.read_sql_table(table_name='movies', con=engine, schema=None, index_col=None,
                                coerce_float=True, parse_dates=None, columns=None, chunksize=None)    

In [3]:
original_df.head()

Unnamed: 0,index,imdb_id,kaggle_id,title,original_title,tagline,belongs_to_collection,wikipedia_url,imdb_link,runtime,...,production_countries,distributor,producers,director,starring,cinematography,editors,writers,composers,based_on
0,0,tt0098987,9548,The Adventures of Ford Fairlane,The Adventures of Ford Fairlane,Kojak. Columbo. Dirty Harry. Wimps.,,https://en.wikipedia.org/wiki/The_Adventures_o...,https://www.imdb.com/title/tt0098987/,104.0,...,"[{'iso_3166_1': 'US', 'name': 'United States o...",20th Century Fox,"{""Steve Perry"",""Joel Silver""}",Renny Harlin,"{""Andrew Dice Clay"",""Wayne Newton"",""Priscilla ...",Oliver Wood,Michael Tronick,"{""David Arnott"",""James Cappe""}","{""Cliff Eidelman"",Yello}","{Characters,""by Rex Weiner""}"
1,1,tt0098994,25501,"After Dark, My Sweet","After Dark, My Sweet",All they risked was everything.,,"https://en.wikipedia.org/wiki/After_Dark,_My_S...",https://www.imdb.com/title/tt0098994/,114.0,...,"[{'iso_3166_1': 'US', 'name': 'United States o...",Avenue Pictures,"{""Ric Kidney"",""Robert Redlin""}",James Foley,"{""Jason Patric"",""Rachel Ward"",""Bruce Dern"",""Ge...",Mark Plummer,Howard E. Smith,"{""James Foley"",""Robert Redlin""}",Maurice Jarre,"{""the novel"",""After Dark, My Sweet"",by,""Jim Th..."
2,2,tt0099005,11856,Air America,Air America,The few. The proud. The totally insane.,,https://en.wikipedia.org/wiki/Air_America_(film),https://www.imdb.com/title/tt0099005/,112.0,...,"[{'iso_3166_1': 'US', 'name': 'United States o...",TriStar Pictures,Daniel Melnick,Roger Spottiswoode,"{""Mel Gibson"",""Robert Downey Jr."",""Nancy Travi...",Roger Deakins,"{""John Bloom"",""Lois Freeman-Fox""}","{""John Eskow"",""Richard Rush""}",Charles Gross,"{""Air America"",by,""Christopher Robbins""}"
3,3,tt0099012,8217,Alice,Alice,,,https://en.wikipedia.org/wiki/Alice_(1990_film),https://www.imdb.com/title/tt0099012/,102.0,...,"[{'iso_3166_1': 'US', 'name': 'United States o...",Orion Pictures,Robert Greenhut,Woody Allen,"{""Alec Baldwin"",""Blythe Danner"",""Judy Davis"",""...",Carlo Di Palma,Susan E. Morse,Woody Allen,,
4,4,tt0099018,25943,Almost an Angel,Almost an Angel,Who does he think he is?,,https://en.wikipedia.org/wiki/Almost_an_Angel,https://www.imdb.com/title/tt0099018/,95.0,...,"[{'iso_3166_1': 'US', 'name': 'United States o...",Paramount Pictures,John Cornell,John Cornell,"{""Paul Hogan"",""Elias Koteas"",""Linda Kozlowski""}",Russell Boyd,David Stiven,Paul Hogan,Maurice Jarre,


In [4]:
len(original_df)

6051

In [5]:
original_df.dtypes

index                             int64
imdb_id                          object
kaggle_id                         int64
title                            object
original_title                   object
tagline                          object
belongs_to_collection            object
wikipedia_url                    object
imdb_link                        object
runtime                         float64
budget                          float64
revenue                         float64
release_date             datetime64[ns]
popularity                      float64
vote_average                    float64
vote_count                      float64
genres                           object
original_language                object
overview                         object
spoken_languages                 object
country                          object
production_companies             object
production_countries             object
distributor                      object
producers                        object


## Get the automated Clean Movie Data

In [6]:
db_string = f"postgresql://postgres:{db_password}@127.0.0.1:5433/movie_data"
engine = create_engine(db_string)
automated_df = pd.read_sql_table(table_name='movies', con=engine, schema=None, index_col=None,
                                coerce_float=True, parse_dates=None, columns=None, chunksize=None)  

In [7]:
automated_df.head()

Unnamed: 0,index,imdb_id,kaggle_id,title,original_title,tagline,belongs_to_collection,wikipedia_url,imdb_link,runtime,...,production_countries,distributor,producers,director,starring,cinematography,editors,writers,composers,based_on
0,0,tt0098987,9548,The Adventures of Ford Fairlane,The Adventures of Ford Fairlane,Kojak. Columbo. Dirty Harry. Wimps.,,https://en.wikipedia.org/wiki/The_Adventures_o...,https://www.imdb.com/title/tt0098987/,104.0,...,"[{'iso_3166_1': 'US', 'name': 'United States o...",20th Century Fox,"{""Steve Perry"",""Joel Silver""}",Renny Harlin,"{""Andrew Dice Clay"",""Wayne Newton"",""Priscilla ...",Oliver Wood,Michael Tronick,"{""David Arnott"",""James Cappe""}","{""Cliff Eidelman"",Yello}","{Characters,""by Rex Weiner""}"
1,1,tt0098994,25501,"After Dark, My Sweet","After Dark, My Sweet",All they risked was everything.,,"https://en.wikipedia.org/wiki/After_Dark,_My_S...",https://www.imdb.com/title/tt0098994/,114.0,...,"[{'iso_3166_1': 'US', 'name': 'United States o...",Avenue Pictures,"{""Ric Kidney"",""Robert Redlin""}",James Foley,"{""Jason Patric"",""Rachel Ward"",""Bruce Dern"",""Ge...",Mark Plummer,Howard E. Smith,"{""James Foley"",""Robert Redlin""}",Maurice Jarre,"{""the novel"",""After Dark, My Sweet"",by,""Jim Th..."
2,2,tt0099005,11856,Air America,Air America,The few. The proud. The totally insane.,,https://en.wikipedia.org/wiki/Air_America_(film),https://www.imdb.com/title/tt0099005/,112.0,...,"[{'iso_3166_1': 'US', 'name': 'United States o...",TriStar Pictures,Daniel Melnick,Roger Spottiswoode,"{""Mel Gibson"",""Robert Downey Jr."",""Nancy Travi...",Roger Deakins,"{""John Bloom"",""Lois Freeman-Fox""}","{""John Eskow"",""Richard Rush""}",Charles Gross,"{""Air America"",by,""Christopher Robbins""}"
3,3,tt0099012,8217,Alice,Alice,,,https://en.wikipedia.org/wiki/Alice_(1990_film),https://www.imdb.com/title/tt0099012/,102.0,...,"[{'iso_3166_1': 'US', 'name': 'United States o...",Orion Pictures,Robert Greenhut,Woody Allen,"{""Alec Baldwin"",""Blythe Danner"",""Judy Davis"",""...",Carlo Di Palma,Susan E. Morse,Woody Allen,,
4,4,tt0099018,25943,Almost an Angel,Almost an Angel,Who does he think he is?,,https://en.wikipedia.org/wiki/Almost_an_Angel,https://www.imdb.com/title/tt0099018/,95.0,...,"[{'iso_3166_1': 'US', 'name': 'United States o...",Paramount Pictures,John Cornell,John Cornell,"{""Paul Hogan"",""Elias Koteas"",""Linda Kozlowski""}",Russell Boyd,David Stiven,Paul Hogan,Maurice Jarre,


In [8]:
len(automated_df)

6075

In [9]:
automated_df.dtypes

index                             int64
imdb_id                          object
kaggle_id                         int64
title                            object
original_title                   object
tagline                          object
belongs_to_collection            object
wikipedia_url                    object
imdb_link                        object
runtime                         float64
budget                          float64
revenue                         float64
release_date             datetime64[ns]
popularity                      float64
vote_average                    float64
vote_count                      float64
genres                           object
original_language                object
overview                         object
spoken_languages                 object
country                          object
production_companies             object
production_countries             object
distributor                      object
producers                        object


In [10]:
# DataFrame.equals(other)[source]
original_df.equals(automated_df)

False

In [11]:
# DataFrame.join(other, on=None, how='left', lsuffix='', rsuffix='', sort=False)
automated_df.join(original_df, on=None, how='outer', lsuffix='_a', rsuffix='_o', sort=False)

Unnamed: 0,index_a,imdb_id_a,kaggle_id_a,title_a,original_title_a,tagline_a,belongs_to_collection_a,wikipedia_url_a,imdb_link_a,runtime_a,...,production_countries_o,distributor_o,producers_o,director_o,starring_o,cinematography_o,editors_o,writers_o,composers_o,based_on_o
0,0,tt0098987,9548,The Adventures of Ford Fairlane,The Adventures of Ford Fairlane,Kojak. Columbo. Dirty Harry. Wimps.,,https://en.wikipedia.org/wiki/The_Adventures_o...,https://www.imdb.com/title/tt0098987/,104.0,...,"[{'iso_3166_1': 'US', 'name': 'United States o...",20th Century Fox,"{""Steve Perry"",""Joel Silver""}",Renny Harlin,"{""Andrew Dice Clay"",""Wayne Newton"",""Priscilla ...",Oliver Wood,Michael Tronick,"{""David Arnott"",""James Cappe""}","{""Cliff Eidelman"",Yello}","{Characters,""by Rex Weiner""}"
1,1,tt0098994,25501,"After Dark, My Sweet","After Dark, My Sweet",All they risked was everything.,,"https://en.wikipedia.org/wiki/After_Dark,_My_S...",https://www.imdb.com/title/tt0098994/,114.0,...,"[{'iso_3166_1': 'US', 'name': 'United States o...",Avenue Pictures,"{""Ric Kidney"",""Robert Redlin""}",James Foley,"{""Jason Patric"",""Rachel Ward"",""Bruce Dern"",""Ge...",Mark Plummer,Howard E. Smith,"{""James Foley"",""Robert Redlin""}",Maurice Jarre,"{""the novel"",""After Dark, My Sweet"",by,""Jim Th..."
2,2,tt0099005,11856,Air America,Air America,The few. The proud. The totally insane.,,https://en.wikipedia.org/wiki/Air_America_(film),https://www.imdb.com/title/tt0099005/,112.0,...,"[{'iso_3166_1': 'US', 'name': 'United States o...",TriStar Pictures,Daniel Melnick,Roger Spottiswoode,"{""Mel Gibson"",""Robert Downey Jr."",""Nancy Travi...",Roger Deakins,"{""John Bloom"",""Lois Freeman-Fox""}","{""John Eskow"",""Richard Rush""}",Charles Gross,"{""Air America"",by,""Christopher Robbins""}"
3,3,tt0099012,8217,Alice,Alice,,,https://en.wikipedia.org/wiki/Alice_(1990_film),https://www.imdb.com/title/tt0099012/,102.0,...,"[{'iso_3166_1': 'US', 'name': 'United States o...",Orion Pictures,Robert Greenhut,Woody Allen,"{""Alec Baldwin"",""Blythe Danner"",""Judy Davis"",""...",Carlo Di Palma,Susan E. Morse,Woody Allen,,
4,4,tt0099018,25943,Almost an Angel,Almost an Angel,Who does he think he is?,,https://en.wikipedia.org/wiki/Almost_an_Angel,https://www.imdb.com/title/tt0099018/,95.0,...,"[{'iso_3166_1': 'US', 'name': 'United States o...",Paramount Pictures,John Cornell,John Cornell,"{""Paul Hogan"",""Elias Koteas"",""Linda Kozlowski""}",Russell Boyd,David Stiven,Paul Hogan,Maurice Jarre,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6070,6068,tt4765284,353616,Pitch Perfect 3,Pitch Perfect 3,,"{'id': 306031, 'name': 'Pitch Perfect Collecti...",https://en.wikipedia.org/wiki/Pitch_Perfect_3,https://www.imdb.com/title/tt4765284/,93.0,...,,,,,,,,,,
6071,6069,tt3567666,348389,Stratton,Stratton,The enemy has a weapon. So do we.,,https://en.wikipedia.org/wiki/Stratton_(film),https://www.imdb.com/title/tt3567666/,94.0,...,,,,,,,,,,
6072,6070,tt5639354,429191,A Fantastic Woman,Una mujer fantástica,,,https://en.wikipedia.org/wiki/A_Fantastic_Woman,https://www.imdb.com/title/tt5639354/,104.0,...,,,,,,,,,,
6073,6071,tt5390066,390059,Permission,Permission,,,https://en.wikipedia.org/wiki/Permission_(film),https://www.imdb.com/title/tt5390066/,96.0,...,,,,,,,,,,


In [12]:
# DataFrame.set_index(keys, drop=True, append=False, inplace=False, verify_integrity=False)
original_df.set_index('imdb_id',drop=True, append=False, inplace=False, verify_integrity=False)

Unnamed: 0_level_0,index,kaggle_id,title,original_title,tagline,belongs_to_collection,wikipedia_url,imdb_link,runtime,budget,...,production_countries,distributor,producers,director,starring,cinematography,editors,writers,composers,based_on
imdb_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
tt0098987,0,9548,The Adventures of Ford Fairlane,The Adventures of Ford Fairlane,Kojak. Columbo. Dirty Harry. Wimps.,,https://en.wikipedia.org/wiki/The_Adventures_o...,https://www.imdb.com/title/tt0098987/,104.0,49000000.0,...,"[{'iso_3166_1': 'US', 'name': 'United States o...",20th Century Fox,"{""Steve Perry"",""Joel Silver""}",Renny Harlin,"{""Andrew Dice Clay"",""Wayne Newton"",""Priscilla ...",Oliver Wood,Michael Tronick,"{""David Arnott"",""James Cappe""}","{""Cliff Eidelman"",Yello}","{Characters,""by Rex Weiner""}"
tt0098994,1,25501,"After Dark, My Sweet","After Dark, My Sweet",All they risked was everything.,,"https://en.wikipedia.org/wiki/After_Dark,_My_S...",https://www.imdb.com/title/tt0098994/,114.0,6000000.0,...,"[{'iso_3166_1': 'US', 'name': 'United States o...",Avenue Pictures,"{""Ric Kidney"",""Robert Redlin""}",James Foley,"{""Jason Patric"",""Rachel Ward"",""Bruce Dern"",""Ge...",Mark Plummer,Howard E. Smith,"{""James Foley"",""Robert Redlin""}",Maurice Jarre,"{""the novel"",""After Dark, My Sweet"",by,""Jim Th..."
tt0099005,2,11856,Air America,Air America,The few. The proud. The totally insane.,,https://en.wikipedia.org/wiki/Air_America_(film),https://www.imdb.com/title/tt0099005/,112.0,35000000.0,...,"[{'iso_3166_1': 'US', 'name': 'United States o...",TriStar Pictures,Daniel Melnick,Roger Spottiswoode,"{""Mel Gibson"",""Robert Downey Jr."",""Nancy Travi...",Roger Deakins,"{""John Bloom"",""Lois Freeman-Fox""}","{""John Eskow"",""Richard Rush""}",Charles Gross,"{""Air America"",by,""Christopher Robbins""}"
tt0099012,3,8217,Alice,Alice,,,https://en.wikipedia.org/wiki/Alice_(1990_film),https://www.imdb.com/title/tt0099012/,102.0,12000000.0,...,"[{'iso_3166_1': 'US', 'name': 'United States o...",Orion Pictures,Robert Greenhut,Woody Allen,"{""Alec Baldwin"",""Blythe Danner"",""Judy Davis"",""...",Carlo Di Palma,Susan E. Morse,Woody Allen,,
tt0099018,4,25943,Almost an Angel,Almost an Angel,Who does he think he is?,,https://en.wikipedia.org/wiki/Almost_an_Angel,https://www.imdb.com/title/tt0099018/,95.0,25000000.0,...,"[{'iso_3166_1': 'US', 'name': 'United States o...",Paramount Pictures,John Cornell,John Cornell,"{""Paul Hogan"",""Elias Koteas"",""Linda Kozlowski""}",Russell Boyd,David Stiven,Paul Hogan,Maurice Jarre,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
tt3567666,6046,348389,Stratton,Stratton,The enemy has a weapon. So do we.,,https://en.wikipedia.org/wiki/Stratton_(film),https://www.imdb.com/title/tt3567666/,94.0,,...,"[{'iso_3166_1': 'GB', 'name': 'United Kingdom'}]",,Matthew Jenkins,Simon West,"{""Dominic Cooper"",""Gemma Chan"",""Austin Stowell...",Felix Wiedemann,Andrew MacRitchie,"{""Duncan Falconer"",""Warren Davis II""}",Nathaniel Méchaly,"{Stratton,series,by,""Duncan Falconer""}"
tt5639354,6047,429191,A Fantastic Woman,Una mujer fantástica,,,https://en.wikipedia.org/wiki/A_Fantastic_Woman,https://www.imdb.com/title/tt5639354/,104.0,,...,"[{'iso_3166_1': 'FR', 'name': 'France'}, {'iso...","{""Participant Media (Chile)"",""Piffl Medien (Ge...","{""Juan de Dios Larraín"",""Pablo Larraín""}",Sebastián Lelio,"{""Daniela Vega"",""Francisco Reyes""}",Benjamín Echazarreta,Soledad Salfate,"{""Sebastián Lelio"",""Gonzalo Maza""}",Matthew Herbert,
tt5390066,6048,390059,Permission,Permission,,,https://en.wikipedia.org/wiki/Permission_(film),https://www.imdb.com/title/tt5390066/,96.0,,...,"[{'iso_3166_1': 'US', 'name': 'United States o...",Good Deed Entertainment,"{""Brian Crano"",""Rebecca Hall""}",Brian Crano,"{""Rebecca Hall"",""Dan Stevens"",""Morgan Spector""...",Adam Bricker,Matt Friedman,Brian Crano,,
tt6304162,6049,429174,Loveless,Нелюбовь,,,https://en.wikipedia.org/wiki/Loveless_(film),https://www.imdb.com/title/tt6304162/,128.0,,...,"[{'iso_3166_1': 'RU', 'name': 'Russia'}, {'iso...","{""Sony Pictures Releasing"",(Russia),[1]}","{""Alexander Rodnyansky"",""Sergey Melkumov"",""Gle...",Andrey Zvyagintsev,"{""Maryana Spivak"",""Aleksey Rozin"",""Matvey Novi...",Mikhail Krichman,Anna Mass,"{""Oleg Negin"",""Andrey Zvyagintsev""}","{""Evgueni Galperine"",""Sacha Galperine""}",


In [13]:
# DataFrame.set_index(keys, drop=True, append=False, inplace=False, verify_integrity=False)
automated_df.set_index('imdb_id',drop=True, append=False, inplace=False, verify_integrity=False)

Unnamed: 0_level_0,index,kaggle_id,title,original_title,tagline,belongs_to_collection,wikipedia_url,imdb_link,runtime,budget,...,production_countries,distributor,producers,director,starring,cinematography,editors,writers,composers,based_on
imdb_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
tt0098987,0,9548,The Adventures of Ford Fairlane,The Adventures of Ford Fairlane,Kojak. Columbo. Dirty Harry. Wimps.,,https://en.wikipedia.org/wiki/The_Adventures_o...,https://www.imdb.com/title/tt0098987/,104.0,49000000.0,...,"[{'iso_3166_1': 'US', 'name': 'United States o...",20th Century Fox,"{""Steve Perry"",""Joel Silver""}",Renny Harlin,"{""Andrew Dice Clay"",""Wayne Newton"",""Priscilla ...",Oliver Wood,Michael Tronick,"{""David Arnott"",""James Cappe""}","{""Cliff Eidelman"",Yello}","{Characters,""by Rex Weiner""}"
tt0098994,1,25501,"After Dark, My Sweet","After Dark, My Sweet",All they risked was everything.,,"https://en.wikipedia.org/wiki/After_Dark,_My_S...",https://www.imdb.com/title/tt0098994/,114.0,6000000.0,...,"[{'iso_3166_1': 'US', 'name': 'United States o...",Avenue Pictures,"{""Ric Kidney"",""Robert Redlin""}",James Foley,"{""Jason Patric"",""Rachel Ward"",""Bruce Dern"",""Ge...",Mark Plummer,Howard E. Smith,"{""James Foley"",""Robert Redlin""}",Maurice Jarre,"{""the novel"",""After Dark, My Sweet"",by,""Jim Th..."
tt0099005,2,11856,Air America,Air America,The few. The proud. The totally insane.,,https://en.wikipedia.org/wiki/Air_America_(film),https://www.imdb.com/title/tt0099005/,112.0,35000000.0,...,"[{'iso_3166_1': 'US', 'name': 'United States o...",TriStar Pictures,Daniel Melnick,Roger Spottiswoode,"{""Mel Gibson"",""Robert Downey Jr."",""Nancy Travi...",Roger Deakins,"{""John Bloom"",""Lois Freeman-Fox""}","{""John Eskow"",""Richard Rush""}",Charles Gross,"{""Air America"",by,""Christopher Robbins""}"
tt0099012,3,8217,Alice,Alice,,,https://en.wikipedia.org/wiki/Alice_(1990_film),https://www.imdb.com/title/tt0099012/,102.0,12000000.0,...,"[{'iso_3166_1': 'US', 'name': 'United States o...",Orion Pictures,Robert Greenhut,Woody Allen,"{""Alec Baldwin"",""Blythe Danner"",""Judy Davis"",""...",Carlo Di Palma,Susan E. Morse,Woody Allen,,
tt0099018,4,25943,Almost an Angel,Almost an Angel,Who does he think he is?,,https://en.wikipedia.org/wiki/Almost_an_Angel,https://www.imdb.com/title/tt0099018/,95.0,25000000.0,...,"[{'iso_3166_1': 'US', 'name': 'United States o...",Paramount Pictures,John Cornell,John Cornell,"{""Paul Hogan"",""Elias Koteas"",""Linda Kozlowski""}",Russell Boyd,David Stiven,Paul Hogan,Maurice Jarre,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
tt4765284,6068,353616,Pitch Perfect 3,Pitch Perfect 3,,"{'id': 306031, 'name': 'Pitch Perfect Collecti...",https://en.wikipedia.org/wiki/Pitch_Perfect_3,https://www.imdb.com/title/tt4765284/,93.0,45000000.0,...,[],Universal Pictures,"{""Elizabeth Banks"",""Paul Brooks"",""Max Handelman""}",Trish Sie,"{""Anna Kendrick"",""Rebel Wilson"",""Hailee Steinf...",Matthew Clark,"{""Craig Alpert"",""Colin Patton""}",Kay Cannon,Christopher Lennertz,
tt3567666,6069,348389,Stratton,Stratton,The enemy has a weapon. So do we.,,https://en.wikipedia.org/wiki/Stratton_(film),https://www.imdb.com/title/tt3567666/,94.0,,...,"[{'iso_3166_1': 'GB', 'name': 'United Kingdom'}]",,Matthew Jenkins,Simon West,"{""Dominic Cooper"",""Gemma Chan"",""Austin Stowell...",Felix Wiedemann,Andrew MacRitchie,"{""Duncan Falconer"",""Warren Davis II""}",Nathaniel Méchaly,"{Stratton,series,by,""Duncan Falconer""}"
tt5639354,6070,429191,A Fantastic Woman,Una mujer fantástica,,,https://en.wikipedia.org/wiki/A_Fantastic_Woman,https://www.imdb.com/title/tt5639354/,104.0,,...,"[{'iso_3166_1': 'FR', 'name': 'France'}, {'iso...","{""Participant Media (Chile)"",""Piffl Medien (Ge...","{""Juan de Dios Larraín"",""Pablo Larraín""}",Sebastián Lelio,"{""Daniela Vega"",""Francisco Reyes""}",Benjamín Echazarreta,Soledad Salfate,"{""Sebastián Lelio"",""Gonzalo Maza""}",Matthew Herbert,
tt5390066,6071,390059,Permission,Permission,,,https://en.wikipedia.org/wiki/Permission_(film),https://www.imdb.com/title/tt5390066/,96.0,,...,"[{'iso_3166_1': 'US', 'name': 'United States o...",Good Deed Entertainment,"{""Brian Crano"",""Rebecca Hall""}",Brian Crano,"{""Rebecca Hall"",""Dan Stevens"",""Morgan Spector""...",Adam Bricker,Matt Friedman,Brian Crano,,


In [14]:
# Reference adapted from https://stackoverflow.com/questions/47131361/diff-between-two-dataframes-in-pandas
diff_df = pd.merge(original_df, automated_df, how='inner', indicator='Exist')
diff_df = diff_df.loc[diff_df['Exist'] == 'both']
diff_df

Unnamed: 0,index,imdb_id,kaggle_id,title,original_title,tagline,belongs_to_collection,wikipedia_url,imdb_link,runtime,...,distributor,producers,director,starring,cinematography,editors,writers,composers,based_on,Exist
0,0,tt0098987,9548,The Adventures of Ford Fairlane,The Adventures of Ford Fairlane,Kojak. Columbo. Dirty Harry. Wimps.,,https://en.wikipedia.org/wiki/The_Adventures_o...,https://www.imdb.com/title/tt0098987/,104.0,...,20th Century Fox,"{""Steve Perry"",""Joel Silver""}",Renny Harlin,"{""Andrew Dice Clay"",""Wayne Newton"",""Priscilla ...",Oliver Wood,Michael Tronick,"{""David Arnott"",""James Cappe""}","{""Cliff Eidelman"",Yello}","{Characters,""by Rex Weiner""}",both
1,1,tt0098994,25501,"After Dark, My Sweet","After Dark, My Sweet",All they risked was everything.,,"https://en.wikipedia.org/wiki/After_Dark,_My_S...",https://www.imdb.com/title/tt0098994/,114.0,...,Avenue Pictures,"{""Ric Kidney"",""Robert Redlin""}",James Foley,"{""Jason Patric"",""Rachel Ward"",""Bruce Dern"",""Ge...",Mark Plummer,Howard E. Smith,"{""James Foley"",""Robert Redlin""}",Maurice Jarre,"{""the novel"",""After Dark, My Sweet"",by,""Jim Th...",both
2,2,tt0099005,11856,Air America,Air America,The few. The proud. The totally insane.,,https://en.wikipedia.org/wiki/Air_America_(film),https://www.imdb.com/title/tt0099005/,112.0,...,TriStar Pictures,Daniel Melnick,Roger Spottiswoode,"{""Mel Gibson"",""Robert Downey Jr."",""Nancy Travi...",Roger Deakins,"{""John Bloom"",""Lois Freeman-Fox""}","{""John Eskow"",""Richard Rush""}",Charles Gross,"{""Air America"",by,""Christopher Robbins""}",both
3,3,tt0099012,8217,Alice,Alice,,,https://en.wikipedia.org/wiki/Alice_(1990_film),https://www.imdb.com/title/tt0099012/,102.0,...,Orion Pictures,Robert Greenhut,Woody Allen,"{""Alec Baldwin"",""Blythe Danner"",""Judy Davis"",""...",Carlo Di Palma,Susan E. Morse,Woody Allen,,,both
4,4,tt0099018,25943,Almost an Angel,Almost an Angel,Who does he think he is?,,https://en.wikipedia.org/wiki/Almost_an_Angel,https://www.imdb.com/title/tt0099018/,95.0,...,Paramount Pictures,John Cornell,John Cornell,"{""Paul Hogan"",""Elias Koteas"",""Linda Kozlowski""}",Russell Boyd,David Stiven,Paul Hogan,Maurice Jarre,,both
5,5,tt0099026,79509,The Ambulance,The Ambulance,You'll be in perfect health before you die.,,https://en.wikipedia.org/wiki/The_Ambulance,https://www.imdb.com/title/tt0099026/,91.0,...,Triumph Releasing Corporation,"{""Larry Cohen"",""Moctesuma Esparza"",""Robert Katz""}",Larry Cohen,"{""Eric Roberts"",""James Earl Jones"",""Red Button...",Jacques Haitkin,"{""Claudia Finkle"",""Armond Leibowitz""}",Larry Cohen,Jay Chattaway,,both
6,19,tt0099180,18111,Bride of Re-Animator,Bride of Re-Animator,Date. Mate. Re-animate.,"{'id': 98036, 'name': 'Re-Animator Collection'...",https://en.wikipedia.org/wiki/Bride_of_Re-Anim...,https://www.imdb.com/title/tt0099180/,96.0,...,50th Street Films,Brian Yuzna,Brian Yuzna,"{""Jeffrey Combs"",""Bruce Abbott"",""Fabiana Udeni...",Rick Fichter,Peter Teschner,"{""Rick Fry"",""Woody Keith"",""Brian Yuzna""}",Richard Band,"{Characters,by,""H. P. Lovecraft""}",both
7,6,tt0099028,41326,American Dream,American Dream,"The award-winning film of American lives, Amer...",,https://en.wikipedia.org/wiki/American_Dream_(...,https://www.imdb.com/title/tt0099028/,98.0,...,Prestige Films,"{""Arthur Cohn"",""Barbara Kopple""}","{""Barbara Kopple"",Co-directors:,""Cathy Caplan""...",,"{""Tom Hurwitz"",""Mathieu Roberts"",""Nesya Shapiro""}","{""Cathy Caplan"",""Thomas Haneke"",""Lawrence Silk""}",,Michael Small,,both
8,7,tt0101326,25528,American Ninja 4: The Annihilation,American Ninja 4: The Annihilation,,"{'id': 91945, 'name': 'American Ninja Collecti...",https://en.wikipedia.org/wiki/American_Ninja_4...,https://www.imdb.com/title/tt0101326/,99.0,...,Cannon Group,Ovidio G Assonitis,Cedric Sundstrom,"{""Michael Dudikoff"",""David Bradley"",""James Boo...",,,David Geeves,,,both
9,8,tt0099044,11595,Another 48 Hrs.,Another 48 Hrs.,The boys are back in town.,"{'id': 93295, 'name': '48 Hrs. Collection', 'p...",https://en.wikipedia.org/wiki/Another_48_Hrs.,https://www.imdb.com/title/tt0099044/,95.0,...,Paramount Pictures,"{""Lawrence Gordon"",""Robert D. Wachs""}",Walter Hill,"{""Eddie Murphy"",""Nick Nolte"",""Brion James"",""Ed...",Matthew F. Leonetti,"{""Donn Aron"",""Carmel Davies"",""Freeman A. Davie...",Fred Braughton,James Horner,"{""Characters by"",""Roger Spottiswoode"",""Walter ...",both


In [15]:
new_df = pd.merge(original_df, automated_df, left_index=True, right_index=True)
new_df

Unnamed: 0,index_x,imdb_id_x,kaggle_id_x,title_x,original_title_x,tagline_x,belongs_to_collection_x,wikipedia_url_x,imdb_link_x,runtime_x,...,production_countries_y,distributor_y,producers_y,director_y,starring_y,cinematography_y,editors_y,writers_y,composers_y,based_on_y
0,0,tt0098987,9548,The Adventures of Ford Fairlane,The Adventures of Ford Fairlane,Kojak. Columbo. Dirty Harry. Wimps.,,https://en.wikipedia.org/wiki/The_Adventures_o...,https://www.imdb.com/title/tt0098987/,104.0,...,"[{'iso_3166_1': 'US', 'name': 'United States o...",20th Century Fox,"{""Steve Perry"",""Joel Silver""}",Renny Harlin,"{""Andrew Dice Clay"",""Wayne Newton"",""Priscilla ...",Oliver Wood,Michael Tronick,"{""David Arnott"",""James Cappe""}","{""Cliff Eidelman"",Yello}","{Characters,""by Rex Weiner""}"
1,1,tt0098994,25501,"After Dark, My Sweet","After Dark, My Sweet",All they risked was everything.,,"https://en.wikipedia.org/wiki/After_Dark,_My_S...",https://www.imdb.com/title/tt0098994/,114.0,...,"[{'iso_3166_1': 'US', 'name': 'United States o...",Avenue Pictures,"{""Ric Kidney"",""Robert Redlin""}",James Foley,"{""Jason Patric"",""Rachel Ward"",""Bruce Dern"",""Ge...",Mark Plummer,Howard E. Smith,"{""James Foley"",""Robert Redlin""}",Maurice Jarre,"{""the novel"",""After Dark, My Sweet"",by,""Jim Th..."
2,2,tt0099005,11856,Air America,Air America,The few. The proud. The totally insane.,,https://en.wikipedia.org/wiki/Air_America_(film),https://www.imdb.com/title/tt0099005/,112.0,...,"[{'iso_3166_1': 'US', 'name': 'United States o...",TriStar Pictures,Daniel Melnick,Roger Spottiswoode,"{""Mel Gibson"",""Robert Downey Jr."",""Nancy Travi...",Roger Deakins,"{""John Bloom"",""Lois Freeman-Fox""}","{""John Eskow"",""Richard Rush""}",Charles Gross,"{""Air America"",by,""Christopher Robbins""}"
3,3,tt0099012,8217,Alice,Alice,,,https://en.wikipedia.org/wiki/Alice_(1990_film),https://www.imdb.com/title/tt0099012/,102.0,...,"[{'iso_3166_1': 'US', 'name': 'United States o...",Orion Pictures,Robert Greenhut,Woody Allen,"{""Alec Baldwin"",""Blythe Danner"",""Judy Davis"",""...",Carlo Di Palma,Susan E. Morse,Woody Allen,,
4,4,tt0099018,25943,Almost an Angel,Almost an Angel,Who does he think he is?,,https://en.wikipedia.org/wiki/Almost_an_Angel,https://www.imdb.com/title/tt0099018/,95.0,...,"[{'iso_3166_1': 'US', 'name': 'United States o...",Paramount Pictures,John Cornell,John Cornell,"{""Paul Hogan"",""Elias Koteas"",""Linda Kozlowski""}",Russell Boyd,David Stiven,Paul Hogan,Maurice Jarre,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6046,6046,tt3567666,348389,Stratton,Stratton,The enemy has a weapon. So do we.,,https://en.wikipedia.org/wiki/Stratton_(film),https://www.imdb.com/title/tt3567666/,94.0,...,"[{'iso_3166_1': 'NL', 'name': 'Netherlands'}, ...",Warner Bros. Pictures,"{""Emma Thomas"",""Christopher Nolan""}",Christopher Nolan,"{""Fionn Whitehead"",""Tom Glynn-Carney"",""Jack Lo...",Hoyte van Hoytema,Lee Smith,Christopher Nolan,Hans Zimmer,
6047,6047,tt5639354,429191,A Fantastic Woman,Una mujer fantástica,,,https://en.wikipedia.org/wiki/A_Fantastic_Woman,https://www.imdb.com/title/tt5639354/,104.0,...,"[{'iso_3166_1': 'FR', 'name': 'France'}]","{""EuropaCorp Distribution"",STXfilms,(USA),Lion...",Virginie Besson-Silla,Luc Besson,"{""Dane DeHaan"",""Cara Delevingne"",""Clive Owen"",...",Thierry Arbogast,Julien Rey,Luc Besson,Alexandre Desplat,"{""Valérian and Laureline"",by,""Pierre Christin""..."
6048,6048,tt5390066,390059,Permission,Permission,,,https://en.wikipedia.org/wiki/Permission_(film),https://www.imdb.com/title/tt5390066/,96.0,...,"[{'iso_3166_1': 'US', 'name': 'United States o...",Universal Pictures,"{""Will Packer"",""Malcolm D. Lee""}",Malcolm D. Lee,"{""Regina Hall"",""Tiffany Haddish"",""Jada Pinkett...",Greg Gardiner,Paul Millspaugh,"{""Erica Rivinoja"",""Kenya Barris"",""Tracy Oliver""}",David Newman,
6049,6049,tt6304162,429174,Loveless,Нелюбовь,,,https://en.wikipedia.org/wiki/Loveless_(film),https://www.imdb.com/title/tt6304162/,128.0,...,"[{'iso_3166_1': 'US', 'name': 'United States o...",Lionsgate Premiere,"{""Randall Emmett"",""George Furla"",""Mark Stewart""}",Steven C. Miller,"{""Hayden Christensen"",""Bruce Willis"",""Gethin A...",Brandon Cox,,Nick Gordon,"{""Ryan Franks"",""Scott Nickoley""}",


In [16]:
original_df

Unnamed: 0,index,imdb_id,kaggle_id,title,original_title,tagline,belongs_to_collection,wikipedia_url,imdb_link,runtime,...,production_countries,distributor,producers,director,starring,cinematography,editors,writers,composers,based_on
0,0,tt0098987,9548,The Adventures of Ford Fairlane,The Adventures of Ford Fairlane,Kojak. Columbo. Dirty Harry. Wimps.,,https://en.wikipedia.org/wiki/The_Adventures_o...,https://www.imdb.com/title/tt0098987/,104.0,...,"[{'iso_3166_1': 'US', 'name': 'United States o...",20th Century Fox,"{""Steve Perry"",""Joel Silver""}",Renny Harlin,"{""Andrew Dice Clay"",""Wayne Newton"",""Priscilla ...",Oliver Wood,Michael Tronick,"{""David Arnott"",""James Cappe""}","{""Cliff Eidelman"",Yello}","{Characters,""by Rex Weiner""}"
1,1,tt0098994,25501,"After Dark, My Sweet","After Dark, My Sweet",All they risked was everything.,,"https://en.wikipedia.org/wiki/After_Dark,_My_S...",https://www.imdb.com/title/tt0098994/,114.0,...,"[{'iso_3166_1': 'US', 'name': 'United States o...",Avenue Pictures,"{""Ric Kidney"",""Robert Redlin""}",James Foley,"{""Jason Patric"",""Rachel Ward"",""Bruce Dern"",""Ge...",Mark Plummer,Howard E. Smith,"{""James Foley"",""Robert Redlin""}",Maurice Jarre,"{""the novel"",""After Dark, My Sweet"",by,""Jim Th..."
2,2,tt0099005,11856,Air America,Air America,The few. The proud. The totally insane.,,https://en.wikipedia.org/wiki/Air_America_(film),https://www.imdb.com/title/tt0099005/,112.0,...,"[{'iso_3166_1': 'US', 'name': 'United States o...",TriStar Pictures,Daniel Melnick,Roger Spottiswoode,"{""Mel Gibson"",""Robert Downey Jr."",""Nancy Travi...",Roger Deakins,"{""John Bloom"",""Lois Freeman-Fox""}","{""John Eskow"",""Richard Rush""}",Charles Gross,"{""Air America"",by,""Christopher Robbins""}"
3,3,tt0099012,8217,Alice,Alice,,,https://en.wikipedia.org/wiki/Alice_(1990_film),https://www.imdb.com/title/tt0099012/,102.0,...,"[{'iso_3166_1': 'US', 'name': 'United States o...",Orion Pictures,Robert Greenhut,Woody Allen,"{""Alec Baldwin"",""Blythe Danner"",""Judy Davis"",""...",Carlo Di Palma,Susan E. Morse,Woody Allen,,
4,4,tt0099018,25943,Almost an Angel,Almost an Angel,Who does he think he is?,,https://en.wikipedia.org/wiki/Almost_an_Angel,https://www.imdb.com/title/tt0099018/,95.0,...,"[{'iso_3166_1': 'US', 'name': 'United States o...",Paramount Pictures,John Cornell,John Cornell,"{""Paul Hogan"",""Elias Koteas"",""Linda Kozlowski""}",Russell Boyd,David Stiven,Paul Hogan,Maurice Jarre,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6046,6046,tt3567666,348389,Stratton,Stratton,The enemy has a weapon. So do we.,,https://en.wikipedia.org/wiki/Stratton_(film),https://www.imdb.com/title/tt3567666/,94.0,...,"[{'iso_3166_1': 'GB', 'name': 'United Kingdom'}]",,Matthew Jenkins,Simon West,"{""Dominic Cooper"",""Gemma Chan"",""Austin Stowell...",Felix Wiedemann,Andrew MacRitchie,"{""Duncan Falconer"",""Warren Davis II""}",Nathaniel Méchaly,"{Stratton,series,by,""Duncan Falconer""}"
6047,6047,tt5639354,429191,A Fantastic Woman,Una mujer fantástica,,,https://en.wikipedia.org/wiki/A_Fantastic_Woman,https://www.imdb.com/title/tt5639354/,104.0,...,"[{'iso_3166_1': 'FR', 'name': 'France'}, {'iso...","{""Participant Media (Chile)"",""Piffl Medien (Ge...","{""Juan de Dios Larraín"",""Pablo Larraín""}",Sebastián Lelio,"{""Daniela Vega"",""Francisco Reyes""}",Benjamín Echazarreta,Soledad Salfate,"{""Sebastián Lelio"",""Gonzalo Maza""}",Matthew Herbert,
6048,6048,tt5390066,390059,Permission,Permission,,,https://en.wikipedia.org/wiki/Permission_(film),https://www.imdb.com/title/tt5390066/,96.0,...,"[{'iso_3166_1': 'US', 'name': 'United States o...",Good Deed Entertainment,"{""Brian Crano"",""Rebecca Hall""}",Brian Crano,"{""Rebecca Hall"",""Dan Stevens"",""Morgan Spector""...",Adam Bricker,Matt Friedman,Brian Crano,,
6049,6049,tt6304162,429174,Loveless,Нелюбовь,,,https://en.wikipedia.org/wiki/Loveless_(film),https://www.imdb.com/title/tt6304162/,128.0,...,"[{'iso_3166_1': 'RU', 'name': 'Russia'}, {'iso...","{""Sony Pictures Releasing"",(Russia),[1]}","{""Alexander Rodnyansky"",""Sergey Melkumov"",""Gle...",Andrey Zvyagintsev,"{""Maryana Spivak"",""Aleksey Rozin"",""Matvey Novi...",Mikhail Krichman,Anna Mass,"{""Oleg Negin"",""Andrey Zvyagintsev""}","{""Evgueni Galperine"",""Sacha Galperine""}",


In [17]:
automated_df

Unnamed: 0,index,imdb_id,kaggle_id,title,original_title,tagline,belongs_to_collection,wikipedia_url,imdb_link,runtime,...,production_countries,distributor,producers,director,starring,cinematography,editors,writers,composers,based_on
0,0,tt0098987,9548,The Adventures of Ford Fairlane,The Adventures of Ford Fairlane,Kojak. Columbo. Dirty Harry. Wimps.,,https://en.wikipedia.org/wiki/The_Adventures_o...,https://www.imdb.com/title/tt0098987/,104.0,...,"[{'iso_3166_1': 'US', 'name': 'United States o...",20th Century Fox,"{""Steve Perry"",""Joel Silver""}",Renny Harlin,"{""Andrew Dice Clay"",""Wayne Newton"",""Priscilla ...",Oliver Wood,Michael Tronick,"{""David Arnott"",""James Cappe""}","{""Cliff Eidelman"",Yello}","{Characters,""by Rex Weiner""}"
1,1,tt0098994,25501,"After Dark, My Sweet","After Dark, My Sweet",All they risked was everything.,,"https://en.wikipedia.org/wiki/After_Dark,_My_S...",https://www.imdb.com/title/tt0098994/,114.0,...,"[{'iso_3166_1': 'US', 'name': 'United States o...",Avenue Pictures,"{""Ric Kidney"",""Robert Redlin""}",James Foley,"{""Jason Patric"",""Rachel Ward"",""Bruce Dern"",""Ge...",Mark Plummer,Howard E. Smith,"{""James Foley"",""Robert Redlin""}",Maurice Jarre,"{""the novel"",""After Dark, My Sweet"",by,""Jim Th..."
2,2,tt0099005,11856,Air America,Air America,The few. The proud. The totally insane.,,https://en.wikipedia.org/wiki/Air_America_(film),https://www.imdb.com/title/tt0099005/,112.0,...,"[{'iso_3166_1': 'US', 'name': 'United States o...",TriStar Pictures,Daniel Melnick,Roger Spottiswoode,"{""Mel Gibson"",""Robert Downey Jr."",""Nancy Travi...",Roger Deakins,"{""John Bloom"",""Lois Freeman-Fox""}","{""John Eskow"",""Richard Rush""}",Charles Gross,"{""Air America"",by,""Christopher Robbins""}"
3,3,tt0099012,8217,Alice,Alice,,,https://en.wikipedia.org/wiki/Alice_(1990_film),https://www.imdb.com/title/tt0099012/,102.0,...,"[{'iso_3166_1': 'US', 'name': 'United States o...",Orion Pictures,Robert Greenhut,Woody Allen,"{""Alec Baldwin"",""Blythe Danner"",""Judy Davis"",""...",Carlo Di Palma,Susan E. Morse,Woody Allen,,
4,4,tt0099018,25943,Almost an Angel,Almost an Angel,Who does he think he is?,,https://en.wikipedia.org/wiki/Almost_an_Angel,https://www.imdb.com/title/tt0099018/,95.0,...,"[{'iso_3166_1': 'US', 'name': 'United States o...",Paramount Pictures,John Cornell,John Cornell,"{""Paul Hogan"",""Elias Koteas"",""Linda Kozlowski""}",Russell Boyd,David Stiven,Paul Hogan,Maurice Jarre,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6070,6068,tt4765284,353616,Pitch Perfect 3,Pitch Perfect 3,,"{'id': 306031, 'name': 'Pitch Perfect Collecti...",https://en.wikipedia.org/wiki/Pitch_Perfect_3,https://www.imdb.com/title/tt4765284/,93.0,...,[],Universal Pictures,"{""Elizabeth Banks"",""Paul Brooks"",""Max Handelman""}",Trish Sie,"{""Anna Kendrick"",""Rebel Wilson"",""Hailee Steinf...",Matthew Clark,"{""Craig Alpert"",""Colin Patton""}",Kay Cannon,Christopher Lennertz,
6071,6069,tt3567666,348389,Stratton,Stratton,The enemy has a weapon. So do we.,,https://en.wikipedia.org/wiki/Stratton_(film),https://www.imdb.com/title/tt3567666/,94.0,...,"[{'iso_3166_1': 'GB', 'name': 'United Kingdom'}]",,Matthew Jenkins,Simon West,"{""Dominic Cooper"",""Gemma Chan"",""Austin Stowell...",Felix Wiedemann,Andrew MacRitchie,"{""Duncan Falconer"",""Warren Davis II""}",Nathaniel Méchaly,"{Stratton,series,by,""Duncan Falconer""}"
6072,6070,tt5639354,429191,A Fantastic Woman,Una mujer fantástica,,,https://en.wikipedia.org/wiki/A_Fantastic_Woman,https://www.imdb.com/title/tt5639354/,104.0,...,"[{'iso_3166_1': 'FR', 'name': 'France'}, {'iso...","{""Participant Media (Chile)"",""Piffl Medien (Ge...","{""Juan de Dios Larraín"",""Pablo Larraín""}",Sebastián Lelio,"{""Daniela Vega"",""Francisco Reyes""}",Benjamín Echazarreta,Soledad Salfate,"{""Sebastián Lelio"",""Gonzalo Maza""}",Matthew Herbert,
6073,6071,tt5390066,390059,Permission,Permission,,,https://en.wikipedia.org/wiki/Permission_(film),https://www.imdb.com/title/tt5390066/,96.0,...,"[{'iso_3166_1': 'US', 'name': 'United States o...",Good Deed Entertainment,"{""Brian Crano"",""Rebecca Hall""}",Brian Crano,"{""Rebecca Hall"",""Dan Stevens"",""Morgan Spector""...",Adam Bricker,Matt Friedman,Brian Crano,,


## Check for Null imdb_id

In [18]:
automated_df[automated_df['imdb_id'].isnull()]

Unnamed: 0,index,imdb_id,kaggle_id,title,original_title,tagline,belongs_to_collection,wikipedia_url,imdb_link,runtime,...,production_countries,distributor,producers,director,starring,cinematography,editors,writers,composers,based_on
40,38,,36337,Delusion,Delusion,She only wanted love. But money's better than ...,,,,100.0,...,"[{'iso_3166_1': 'US', 'name': 'United States o...",,,,,,,,,
41,39,,200796,Show,Show,,,,,,...,[],,,,,,,,,
42,40,,75015,How I Unleashed World War II Part III: Among F...,Jak rozpętałem drugą wojnę światową: Cz.3 - Wś...,,"{'id': 75014, 'name': 'How I Unleashed World W...",,,73.0,...,"[{'iso_3166_1': 'PL', 'name': 'Poland'}]",,,,,,,,,
43,41,,36663,Dreamkiller,Dreamkiller,Fear is the greatest killer.,,,,110.0,...,[],,,,,,,,,
44,42,,47116,The Winner,Víťaz,,,,,78.0,...,[],,,,,,,,,
45,43,,28500,Before The Dinosaurs - Walking With Monsters,Before The Dinosaurs - Walking With Monsters,,,,,87.0,...,"[{'iso_3166_1': 'GB', 'name': 'United Kingdom'}]",,,,,,,,,
46,44,,118013,Endeavour,Endeavour,Rookie detective Endeavor Morse faces his firs...,,,,98.0,...,[],,,,,,,,,
47,45,,15257,Hulk vs. Wolverine,Hulk vs. Wolverine,,,,,38.0,...,"[{'iso_3166_1': 'US', 'name': 'United States o...",,,,,,,,,
48,46,,55576,Last Stand at Saber River,Last Stand at Saber River,,,,,,...,"[{'iso_3166_1': 'US', 'name': 'United States o...",,,,,,,,,
49,47,,293412,Running Wild,Running Wild,,,,,,...,[],,,,,,,,,


In [19]:
original_df[original_df['imdb_id'].isnull()]

Unnamed: 0,index,imdb_id,kaggle_id,title,original_title,tagline,belongs_to_collection,wikipedia_url,imdb_link,runtime,...,production_countries,distributor,producers,director,starring,cinematography,editors,writers,composers,based_on


In [20]:
# Drop Null imdb_id
automated_noNulls_df = automated_df['imdb_id'].dropna()
len(automated_noNulls_df)

6058

In [21]:
automated_noNulls_df

0       tt0098987
1       tt0098994
2       tt0099005
3       tt0099012
4       tt0099018
          ...    
6070    tt4765284
6071    tt3567666
6072    tt5639354
6073    tt5390066
6074    tt6304162
Name: imdb_id, Length: 6058, dtype: object

In [22]:
# Drop Null imdb_id
original_noNulls_df = original_df['imdb_id'].dropna()
len(original_noNulls_df)

6051

In [23]:
original_noNulls_df

0       tt0098987
1       tt0098994
2       tt0099005
3       tt0099012
4       tt0099018
          ...    
6046    tt3567666
6047    tt5639354
6048    tt5390066
6049    tt6304162
6050    tt3859310
Name: imdb_id, Length: 6051, dtype: object

In [34]:
# Reference adapted from https://stackoverflow.com/questions/47131361/diff-between-two-dataframes-in-pandas
diff_df = pd.merge(original_noNulls_df, automated_noNulls_df, how='right', indicator='Exist')
diff_df = diff_df.loc[diff_df['Exist'] != 'both']
diff_df

Unnamed: 0,imdb_id,Exist
53,tt0100530,right_only
251,tt0101664,right_only
488,tt0144618,right_only
1335,tt0116310,right_only
1409,tt0056196,right_only
3623,tt0045793,right_only
4010,tt0491145,right_only


In [139]:
diff_df = diff_df.reset_index(drop=True)

In [140]:
diff_df

Unnamed: 0,imdb_id,Missing
0,tt0100530,right_only
1,tt0101664,right_only
2,tt0144618,right_only
3,tt0116310,right_only
4,tt0056196,right_only
5,tt0045793,right_only
6,tt0491145,right_only


In [161]:
len_diff = len(diff_df)

for i in range(len_diff):
    
    imdb_id = diff_df['imdb_id'].iloc[i]
    
    temp_df = automated_df.loc[automated_df['imdb_id'] == imdb_id]
    
    if i == 0:
        missing_df = temp_df
        
    else:
        missing_df = missing_df.append(temp_df)

missing_df        

Unnamed: 0,index,imdb_id,kaggle_id,title,original_title,tagline,belongs_to_collection,wikipedia_url,imdb_link,runtime,...,production_countries,distributor,producers,director,starring,cinematography,editors,writers,composers,based_on
70,180,tt0100530,10170,The Russia House,The Russia House,,,https://en.wikipedia.org/wiki/The_Russia_House,https://www.imdb.com/title/tt0100530/,122.0,...,"[{'iso_3166_1': 'US', 'name': 'United States o...",,,,,,,,,
268,260,tt0101664,91396,The Dark Wind,The Dark Wind,,,https://en.wikipedia.org/wiki/The_Dark_Wind,https://www.imdb.com/title/tt0101664/,111.0,...,"[{'iso_3166_1': 'US', 'name': 'United States o...",,,,,,,,,
505,553,tt0144618,16486,The Spirit of Christmas,The Spirit of Christmas,,,https://en.wikipedia.org/wiki/The_Spirit_of_Ch...,https://www.imdb.com/title/tt0144618/,4.0,...,"[{'iso_3166_1': 'US', 'name': 'United States o...",,,,,,,,,
1352,1345,tt0116310,168283,Fire on the Mountain,Fire on the Mountain,,,https://en.wikipedia.org/wiki/Fire_on_the_Moun...,https://www.imdb.com/title/tt0116310/,72.0,...,[],,,,,,,,,
1426,1418,tt0056196,43004,Long Day's Journey Into Night,Long Day's Journey Into Night,PRIDE...POWER...PASSION...PAIN!,,https://en.wikipedia.org/wiki/Long_Day%27s_Jou...,https://www.imdb.com/title/tt0056196/,174.0,...,"[{'iso_3166_1': 'US', 'name': 'United States o...",,,,,,,Eugene O'Neill,,
3636,3629,tt0045793,11426,From Here to Eternity,From Here to Eternity,Pouring out of impassioned pages...brawling th...,,https://en.wikipedia.org/wiki/The_Holiday,https://www.imdb.com/title/tt00457939/,118.0,...,"[{'iso_3166_1': 'US', 'name': 'United States o...","{""Columbia Pictures"",""(United States)"",""Univer...","{""Nancy Meyers"",""Bruce A. Block""}",Nancy Meyers,"{""Kate Winslet"",""Cameron Diaz"",""Jude Law"",""Jac...",Dean Cundey,Joe Hutshing,Nancy Meyers,Hans Zimmer,
4023,4011,tt0491145,19551,Shadow Puppets,Shadow Puppets,,,https://en.wikipedia.org/wiki/Shadow_Puppets_(...,https://www.imdb.com/title/tt0491145/,103.0,...,"[{'iso_3166_1': 'CA', 'name': 'Canada'}, {'iso...",,,,"{""Jolene Blalock"",""Tony Todd"",""James Marsters""...",,,,,
