## Netflix 데이터에 IMDB 평점 컬럼 삽입

In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

import os

In [2]:
df_ratings = pd.read_csv("/content/drive/MyDrive/DATATHON/dataset/title.ratings.tsv",
                         sep="\t",low_memory=False, na_values=["\\N","nan"], usecols=["tconst","averageRating"])
df_ratings.head()

Unnamed: 0,tconst,averageRating
0,tt0000001,5.7
1,tt0000002,6.0
2,tt0000003,6.5
3,tt0000004,5.8
4,tt0000005,6.2


In [3]:
df_ratings.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1221991 entries, 0 to 1221990
Data columns (total 2 columns):
 #   Column         Non-Null Count    Dtype  
---  ------         --------------    -----  
 0   tconst         1221991 non-null  object 
 1   averageRating  1221991 non-null  float64
dtypes: float64(1), object(1)
memory usage: 18.6+ MB


In [4]:
df_ratings.describe()

Unnamed: 0,averageRating
count,1221991.0
mean,6.924441
std,1.391565
min,1.0
25%,6.2
50%,7.1
75%,7.9
max,10.0


In [5]:
df_title_basics  = pd.read_csv("/content/drive/MyDrive/DATATHON/dataset/title.basics.tsv",
                               sep="\t",low_memory=False, na_values=["\\N","nan"], usecols=["tconst", "primaryTitle"])
df_title_basics.head()

Unnamed: 0,tconst,primaryTitle
0,tt0000001,Carmencita
1,tt0000002,Le clown et ses chiens
2,tt0000003,Pauvre Pierrot
3,tt0000004,Un bon bock
4,tt0000005,Blacksmith Scene


In [6]:
df_title_basics.info()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8755374 entries, 0 to 8755373
Data columns (total 2 columns):
 #   Column        Dtype 
---  ------        ----- 
 0   tconst        object
 1   primaryTitle  object
dtypes: object(2)
memory usage: 133.6+ MB


In [7]:
rating = pd.merge(df_ratings, df_title_basics, how='inner', on='tconst')
rating

Unnamed: 0,tconst,averageRating,primaryTitle
0,tt0000001,5.7,Carmencita
1,tt0000002,6.0,Le clown et ses chiens
2,tt0000003,6.5,Pauvre Pierrot
3,tt0000004,5.8,Un bon bock
4,tt0000005,6.2,Blacksmith Scene
...,...,...,...
1221981,tt9916690,6.6,Horrid Henry Delivers the Milk
1221982,tt9916720,5.7,The Nun 2
1221983,tt9916730,8.4,6 Gunn
1221984,tt9916766,6.7,Episode #10.15


In [8]:
rating = pd.DataFrame({'title': rating.primaryTitle,
                       "Rating": rating.averageRating})
rating

Unnamed: 0,title,Rating
0,Carmencita,5.7
1,Le clown et ses chiens,6.0
2,Pauvre Pierrot,6.5
3,Un bon bock,5.8
4,Blacksmith Scene,6.2
...,...,...
1221981,Horrid Henry Delivers the Milk,6.6
1221982,The Nun 2,5.7
1221983,6 Gunn,8.4
1221984,Episode #10.15,6.7


In [9]:
df = pd.read_csv("/content/drive/MyDrive/DATATHON/dataset/netflix_titles.csv")
df.head()

Unnamed: 0,show_id,type,title,director,cast,country,date_added,release_year,rating,duration,listed_in,description
0,s1,Movie,Dick Johnson Is Dead,Kirsten Johnson,,United States,"September 25, 2021",2020,PG-13,90 min,Documentaries,"As her father nears the end of his life, filmm..."
1,s2,TV Show,Blood & Water,,"Ama Qamata, Khosi Ngema, Gail Mabalane, Thaban...",South Africa,"September 24, 2021",2021,TV-MA,2 Seasons,"International TV Shows, TV Dramas, TV Mysteries","After crossing paths at a party, a Cape Town t..."
2,s3,TV Show,Ganglands,Julien Leclercq,"Sami Bouajila, Tracy Gotoas, Samuel Jouy, Nabi...",,"September 24, 2021",2021,TV-MA,1 Season,"Crime TV Shows, International TV Shows, TV Act...",To protect his family from a powerful drug lor...
3,s4,TV Show,Jailbirds New Orleans,,,,"September 24, 2021",2021,TV-MA,1 Season,"Docuseries, Reality TV","Feuds, flirtations and toilet talk go down amo..."
4,s5,TV Show,Kota Factory,,"Mayur More, Jitendra Kumar, Ranjan Raj, Alam K...",India,"September 24, 2021",2021,TV-MA,2 Seasons,"International TV Shows, Romantic TV Shows, TV ...",In a city of coaching centers known to train I...


In [10]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8807 entries, 0 to 8806
Data columns (total 12 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   show_id       8807 non-null   object
 1   type          8807 non-null   object
 2   title         8807 non-null   object
 3   director      6173 non-null   object
 4   cast          7982 non-null   object
 5   country       7976 non-null   object
 6   date_added    8797 non-null   object
 7   release_year  8807 non-null   int64 
 8   rating        8803 non-null   object
 9   duration      8804 non-null   object
 10  listed_in     8807 non-null   object
 11  description   8807 non-null   object
dtypes: int64(1), object(11)
memory usage: 825.8+ KB


In [11]:
datajoin = pd.merge(df, rating, how = 'inner', on='title')
datajoin

Unnamed: 0,show_id,type,title,director,cast,country,date_added,release_year,rating,duration,listed_in,description,Rating
0,s1,Movie,Dick Johnson Is Dead,Kirsten Johnson,,United States,"September 25, 2021",2020,PG-13,90 min,Documentaries,"As her father nears the end of his life, filmm...",7.4
1,s2,TV Show,Blood & Water,,"Ama Qamata, Khosi Ngema, Gail Mabalane, Thaban...",South Africa,"September 24, 2021",2021,TV-MA,2 Seasons,"International TV Shows, TV Dramas, TV Mysteries","After crossing paths at a party, a Cape Town t...",7.9
2,s2,TV Show,Blood & Water,,"Ama Qamata, Khosi Ngema, Gail Mabalane, Thaban...",South Africa,"September 24, 2021",2021,TV-MA,2 Seasons,"International TV Shows, TV Dramas, TV Mysteries","After crossing paths at a party, a Cape Town t...",7.3
3,s2,TV Show,Blood & Water,,"Ama Qamata, Khosi Ngema, Gail Mabalane, Thaban...",South Africa,"September 24, 2021",2021,TV-MA,2 Seasons,"International TV Shows, TV Dramas, TV Mysteries","After crossing paths at a party, a Cape Town t...",7.2
4,s2,TV Show,Blood & Water,,"Ama Qamata, Khosi Ngema, Gail Mabalane, Thaban...",South Africa,"September 24, 2021",2021,TV-MA,2 Seasons,"International TV Shows, TV Dramas, TV Mysteries","After crossing paths at a party, a Cape Town t...",6.7
...,...,...,...,...,...,...,...,...,...,...,...,...,...
26895,s8806,Movie,Zoom,Peter Hewitt,"Tim Allen, Courteney Cox, Chevy Chase, Kate Ma...",United States,"January 11, 2020",2006,PG,88 min,"Children & Family Movies, Comedies","Dragged from civilian life, a former superhero...",4.6
26896,s8806,Movie,Zoom,Peter Hewitt,"Tim Allen, Courteney Cox, Chevy Chase, Kate Ma...",United States,"January 11, 2020",2006,PG,88 min,"Children & Family Movies, Comedies","Dragged from civilian life, a former superhero...",6.0
26897,s8806,Movie,Zoom,Peter Hewitt,"Tim Allen, Courteney Cox, Chevy Chase, Kate Ma...",United States,"January 11, 2020",2006,PG,88 min,"Children & Family Movies, Comedies","Dragged from civilian life, a former superhero...",6.7
26898,s8807,Movie,Zubaan,Mozez Singh,"Vicky Kaushal, Sarah-Jane Dias, Raaghav Chanan...",India,"March 2, 2019",2015,TV-14,111 min,"Dramas, International Movies, Music & Musicals",A scrappy but poor boy worms his way into a ty...,6.1


In [12]:
datajoin.drop_duplicates(subset=["title"])

Unnamed: 0,show_id,type,title,director,cast,country,date_added,release_year,rating,duration,listed_in,description,Rating
0,s1,Movie,Dick Johnson Is Dead,Kirsten Johnson,,United States,"September 25, 2021",2020,PG-13,90 min,Documentaries,"As her father nears the end of his life, filmm...",7.4
1,s2,TV Show,Blood & Water,,"Ama Qamata, Khosi Ngema, Gail Mabalane, Thaban...",South Africa,"September 24, 2021",2021,TV-MA,2 Seasons,"International TV Shows, TV Dramas, TV Mysteries","After crossing paths at a party, a Cape Town t...",7.9
5,s3,TV Show,Ganglands,Julien Leclercq,"Sami Bouajila, Tracy Gotoas, Samuel Jouy, Nabi...",,"September 24, 2021",2021,TV-MA,1 Season,"Crime TV Shows, International TV Shows, TV Act...",To protect his family from a powerful drug lor...,7.2
7,s4,TV Show,Jailbirds New Orleans,,,,"September 24, 2021",2021,TV-MA,1 Season,"Docuseries, Reality TV","Feuds, flirtations and toilet talk go down amo...",6.7
8,s5,TV Show,Kota Factory,,"Mayur More, Jitendra Kumar, Ranjan Raj, Alam K...",India,"September 24, 2021",2021,TV-MA,2 Seasons,"International TV Shows, Romantic TV Shows, TV ...",In a city of coaching centers known to train I...,9.3
...,...,...,...,...,...,...,...,...,...,...,...,...,...
26871,s8803,Movie,Zodiac,David Fincher,"Mark Ruffalo, Jake Gyllenhaal, Robert Downey J...",United States,"November 20, 2019",2007,R,158 min,"Cult Movies, Dramas, Thrillers","A political cartoonist, a crime reporter and a...",6.7
26879,s8804,TV Show,Zombie Dumb,,,,"July 1, 2019",2018,TV-Y7,2 Seasons,"Kids' TV, Korean TV Shows, TV Comedies","While living alone in a spooky town, a young g...",5.5
26880,s8805,Movie,Zombieland,Ruben Fleischer,"Jesse Eisenberg, Woody Harrelson, Emma Stone, ...",United States,"November 1, 2019",2009,R,88 min,"Comedies, Horror Movies",Looking to survive in a world taken over by zo...,7.3
26885,s8806,Movie,Zoom,Peter Hewitt,"Tim Allen, Courteney Cox, Chevy Chase, Kate Ma...",United States,"January 11, 2020",2006,PG,88 min,"Children & Family Movies, Comedies","Dragged from civilian life, a former superhero...",7.8
