### Merging Netflix Data with IMDB Dataset (titles and ratings) to perform EDA!

In [1]:
import pandas as pd 


In [2]:
## Loading IMDB Data
basics = pd.read_csv("title.basics.tsv.gz", sep='\t', na_values='\\N', low_memory=False)
ratings = pd.read_csv("title.ratings.tsv.gz", sep='\t', na_values='\\N', low_memory=False)

## Loading Netflix Data
netflix = pd.read_csv("netflix_titles.csv")



In [3]:
print("basics -> ",basics.columns)
print("\n\n\n\n")
print("ratings -> ", ratings.columns)
print("\n\n\n\n")
print("netflix -> ",  netflix.columns)

basics ->  Index(['tconst', 'titleType', 'primaryTitle', 'originalTitle', 'isAdult',
       'startYear', 'endYear', 'runtimeMinutes', 'genres'],
      dtype='object')





ratings ->  Index(['tconst', 'averageRating', 'numVotes'], dtype='object')





netflix ->  Index(['show_id', 'type', 'title', 'director', 'cast', 'country', 'date_added',
       'release_year', 'rating', 'duration', 'listed_in', 'description'],
      dtype='object')


In [4]:
#Filtering IMDB for 'movie' and 'tvSeries'
imdb_movies_tv = basics[(basics['titleType'].isin(['movie','tvSeries']))].copy()


# Release years into int
imdb_movies_tv['startYear'] = pd.to_numeric(imdb_movies_tv['startYear'],errors='coerce')
netflix['release_year'] = pd.to_numeric(netflix['release_year'],errors='coerce')

# Clean titles for better match
netflix['title_clean'] = netflix['title'].str.lower().str.strip()
imdb_movies_tv['primaryTitle_clean'] = imdb_movies_tv['primaryTitle'].str.lower().str.strip()


In [5]:
# Merge on title and year
merged = pd.merge(netflix, imdb_movies_tv,
                  left_on=['title_clean', 'release_year'],
                  right_on=['primaryTitle_clean', 'startYear'],
                  how='inner')

In [6]:

merged = pd.merge(merged, ratings, on='tconst', how='left')

In [7]:
merged.head()

Unnamed: 0,show_id,type,title,director,cast,country,date_added,release_year,rating,duration,...,primaryTitle,originalTitle,isAdult,startYear,endYear,runtimeMinutes,genres,primaryTitle_clean,averageRating,numVotes
0,s1,Movie,Dick Johnson Is Dead,Kirsten Johnson,,United States,"September 25, 2021",2020,PG-13,90 min,...,Dick Johnson Is Dead,Dick Johnson Is Dead,0.0,2020.0,,89.0,"Biography,Documentary,Drama",dick johnson is dead,7.4,7477.0
1,s3,TV Show,Ganglands,Julien Leclercq,"Sami Bouajila, Tracy Gotoas, Samuel Jouy, Nabi...",,"September 24, 2021",2021,TV-MA,1 Season,...,Ganglands,Braqueurs,0.0,2021.0,,44.0,"Action,Crime,Drama",ganglands,7.2,4827.0
2,s4,TV Show,Jailbirds New Orleans,,,,"September 24, 2021",2021,TV-MA,1 Season,...,Jailbirds New Orleans,Jailbirds New Orleans,0.0,2021.0,,,"Documentary,Reality-TV",jailbirds new orleans,6.5,329.0
3,s7,Movie,My Little Pony: A New Generation,"Robert Cullen, José Luis Ucha","Vanessa Hudgens, Kimiko Glenn, James Marsden, ...",,"September 24, 2021",2021,PG,91 min,...,My Little Pony: A New Generation,My Little Pony: A New Generation,0.0,2021.0,,90.0,"Adventure,Animation,Comedy",my little pony: a new generation,6.8,4859.0
4,s8,Movie,Sankofa,Haile Gerima,"Kofi Ghanaba, Oyafunmike Ogunlano, Alexandra D...","United States, Ghana, Burkina Faso, United Kin...","September 24, 2021",1993,TV-MA,125 min,...,Sankofa,Sankofa,0.0,1993.0,,125.0,Drama,sankofa,7.0,876.0


In [8]:
merged.columns

Index(['show_id', 'type', 'title', 'director', 'cast', 'country', 'date_added',
       'release_year', 'rating', 'duration', 'listed_in', 'description',
       'title_clean', 'tconst', 'titleType', 'primaryTitle', 'originalTitle',
       'isAdult', 'startYear', 'endYear', 'runtimeMinutes', 'genres',
       'primaryTitle_clean', 'averageRating', 'numVotes'],
      dtype='object')

In [9]:
merged.to_csv('merged_title_ratings_onNetflix.csv')