# Project 3 : Binging Netflix Data

## Part 1 --- ETL (Extract, Transform, Load)

In [2]:
# Dependencies
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import requests
import json

### Wrangling Data
- Base dataset from kaggle, some entries missing IMDb score;
- Merging with two datasets from IMDb, one contains title+id, one contains score+id, merge on title;

In [3]:
# Read the 'Netflix_Engagement_Plus.csv'data file as engagement_df
engagement_df = pd.read_csv("Resources/Netflix_Engagement_Plus.csv")
engagement_df.head()

Unnamed: 0,Title,Available Globally?,Release Date,Hours Viewed,Number of Ratings,Rating,Genre,Key Words,Description
0,The Night Agent: Season 1,Yes,2023-03-23,812100000,7696.0,6.0,"['Biography', 'Drama', 'History']","persian empire,empire,5th century b.c.,achaeme...",
1,Ginny & Georgia: Season 2,Yes,2023-01-05,665100000,5216.0,5.7,"['Comedy', 'Drama', 'Romance']","producer,three word title,headstrong,arranged ...",The film follows headstrong Ginny who meets Su...
2,The Glory: Season 1 // 더 글로리: 시즌 1,Yes,2022-12-30,622800000,11869.0,8.4,['Short'],,
3,Wednesday: Season 1,Yes,2022-11-23,507700000,,,['Talk-Show'],youtube video,MsMojo counts down the top 10 Wednesday (2022)...
4,Queen Charlotte: A Bridgerton Story,Yes,2023-05-04,503000000,50077.0,7.4,"['Drama', 'History', 'Romance']","prequel,queen,historical,england,queen charlot...","Betrothed against her will to King George, you..."


In [4]:
# Read the 'data_title.tsv'data file as title_df
title_df = pd.read_csv("Resources/data_title.tsv",sep = '\t') 
title_df.head()

  title_df = pd.read_csv("Resources/data_title.tsv",sep = '\t')


Unnamed: 0,tconst,titleType,primaryTitle,originalTitle,isAdult,startYear,endYear,runtimeMinutes,genres
0,tt0000001,short,Carmencita,Carmencita,0,1894,\N,1,"Documentary,Short"
1,tt0000002,short,Le clown et ses chiens,Le clown et ses chiens,0,1892,\N,5,"Animation,Short"
2,tt0000003,short,Pauvre Pierrot,Pauvre Pierrot,0,1892,\N,4,"Animation,Comedy,Romance"
3,tt0000004,short,Un bon bock,Un bon bock,0,1892,\N,12,"Animation,Short"
4,tt0000005,short,Blacksmith Scene,Blacksmith Scene,0,1893,\N,1,"Comedy,Short"


In [5]:
# Read the 'data.tsv'data file as score_df
score_df = pd.read_csv("Resources/data_ratings.tsv",sep = '\t') 
score_df.head()

Unnamed: 0,tconst,averageRating,numVotes
0,tt0000001,5.7,2008
1,tt0000002,5.7,270
2,tt0000003,6.5,1926
3,tt0000004,5.4,178
4,tt0000005,6.2,2701


In [6]:
# Filtering out the df with missing ratings part, name it 'missing_rating_engagement_df'
missing_rating_engagement_df = engagement_df[engagement_df['Rating'].isnull()]
missing_rating_engagement_df

Unnamed: 0,Title,Available Globally?,Release Date,Hours Viewed,Number of Ratings,Rating,Genre,Key Words,Description
3,Wednesday: Season 1,Yes,2022-11-23,507700000,,,['Talk-Show'],youtube video,MsMojo counts down the top 10 Wednesday (2022)...
7,Outer Banks: Season 3,Yes,2023-02-23,402500000,,,,,
10,Manifest: Season 4,Yes,2022-11-04,262600000,,,"['Documentary', 'Short']",,
11,Kaleidoscope: Limited Series,Yes,2023-01-01,252500000,,,,,
12,Firefly Lane: Season 2,Yes,2022-12-02,251500000,,,,,
...,...,...,...,...,...,...,...,...,...
18325,حكايات بنات الجزء ١: Season 2,No,,100000,,,,,
18326,حكايات بنات الجزء ١: Season 3,No,,100000,,,,,
18329,두근두근 내 인생,No,,100000,,,,,
18330,라디오 스타,No,,100000,,,,,


In [7]:
# Merge title_df and score_df datasets to create score_titile_df with both title and score
score_titile_df = pd.merge(title_df,score_df,on="tconst",how="outer")
score_titile_df.head()

Unnamed: 0,tconst,titleType,primaryTitle,originalTitle,isAdult,startYear,endYear,runtimeMinutes,genres,averageRating,numVotes
0,tt0000001,short,Carmencita,Carmencita,0,1894,\N,1,"Documentary,Short",5.7,2008.0
1,tt0000002,short,Le clown et ses chiens,Le clown et ses chiens,0,1892,\N,5,"Animation,Short",5.7,270.0
2,tt0000003,short,Pauvre Pierrot,Pauvre Pierrot,0,1892,\N,4,"Animation,Comedy,Romance",6.5,1926.0
3,tt0000004,short,Un bon bock,Un bon bock,0,1892,\N,12,"Animation,Short",5.4,178.0
4,tt0000005,short,Blacksmith Scene,Blacksmith Scene,0,1893,\N,1,"Comedy,Short",6.2,2701.0


In [8]:
# Merge score_titile_df with missing_rating_engagement_df
rating_engagement_df = pd.merge(missing_rating_engagement_df,score_titile_df,left_on="Title",right_on="originalTitle",how="inner")
rating_engagement_df

Unnamed: 0,Title,Available Globally?,Release Date,Hours Viewed,Number of Ratings,Rating,Genre,Key Words,Description,tconst,titleType,primaryTitle,originalTitle,isAdult,startYear,endYear,runtimeMinutes,genres,averageRating,numVotes
0,AKA,Yes,2023-04-28,120000000,,,,,,tt0317052,movie,AKA,AKA,0,2002,\N,123,"Drama,Romance",6.3,1239.0
1,AKA,Yes,2023-04-28,120000000,,,,,,tt10730418,short,AKA,AKA,0,2018,\N,17,"Adventure,Family,Fantasy",,
2,AKA,Yes,2023-04-28,120000000,,,,,,tt1117442,tvEpisode,AKA,AKA,0,2007,\N,43,"Action,Crime,Drama",8.1,36.0
3,AKA,Yes,2023-04-28,120000000,,,,,,tt13444004,short,AKA,AKA,0,2020,\N,15,Short,,
4,AKA,Yes,2023-04-28,120000000,,,,,,tt13815300,tvEpisode,AKA,AKA,0,2015,\N,\N,"Music,Talk-Show",,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2657,Zulu,No,,100000,,,,,,tt0058777,movie,Zulu,Zulu,0,1964,\N,138,"Drama,History,War",7.7,41872.0
2658,Zulu,No,,100000,,,,,,tt21418892,movie,Zulu,Zulu,0,2005,\N,50,Documentary,,
2659,Zulu,No,,100000,,,,,,tt2249221,movie,Zulu,Zulu,0,2013,\N,110,"Action,Crime,Drama",6.7,19668.0
2660,Zulu,No,,100000,,,,,,tt7232874,tvEpisode,Zulu,Zulu,0,2015,\N,18,"Comedy,Documentary,History",7.7,8.0


In [9]:
# Filtering out the entries with ratings in order to narrow down the wanted matching title
rating_engagement_df = rating_engagement_df[rating_engagement_df['averageRating'].notnull()]
rating_engagement_df

Unnamed: 0,Title,Available Globally?,Release Date,Hours Viewed,Number of Ratings,Rating,Genre,Key Words,Description,tconst,titleType,primaryTitle,originalTitle,isAdult,startYear,endYear,runtimeMinutes,genres,averageRating,numVotes
0,AKA,Yes,2023-04-28,120000000,,,,,,tt0317052,movie,AKA,AKA,0,2002,\N,123,"Drama,Romance",6.3,1239.0
2,AKA,Yes,2023-04-28,120000000,,,,,,tt1117442,tvEpisode,AKA,AKA,0,2007,\N,43,"Action,Crime,Drama",8.1,36.0
6,AKA,Yes,2023-04-28,120000000,,,,,,tt27197387,movie,AKA,AKA,0,2023,\N,122,"Action,Crime,Thriller",6.6,12119.0
10,Stranger Things 3,Yes,2019-07-04,67000000,,,"['Short', 'Comedy']",,,tt8046346,tvEpisode,Stranger Things 3,Stranger Things 3,0,2017,\N,\N,"Action,Adventure,Comedy",9.4,16.0
11,The Gray Man,Yes,2022-07-22,58300000,,,,,,tt0360615,movie,The Gray Man,The Gray Man,0,2002,\N,87,"Action,Crime,Drama",7.9,42.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2652,Wild Venice,No,,100000,,,"['Comedy', 'Family']",,,tt27571435,movie,Wild Venice,Wild Venice,0,2014,\N,52,Documentary,6.6,8.0
2656,Zog,No,,100000,,,"['Animation', 'Short', 'Comedy']",,"Almost 250,000 years ago, Zog, the first femin...",tt9109620,tvMovie,Zog,Zog,0,2018,\N,26,"Animation,Comedy,Family",7.3,1326.0
2657,Zulu,No,,100000,,,,,,tt0058777,movie,Zulu,Zulu,0,1964,\N,138,"Drama,History,War",7.7,41872.0
2659,Zulu,No,,100000,,,,,,tt2249221,movie,Zulu,Zulu,0,2013,\N,110,"Action,Crime,Drama",6.7,19668.0


In [10]:
# Checking the data types of all columns
rating_engagement_df.dtypes

Title                   object
Available Globally?     object
Release Date            object
Hours Viewed             int64
Number of Ratings      float64
Rating                 float64
Genre                   object
Key Words               object
Description             object
tconst                  object
titleType               object
primaryTitle            object
originalTitle           object
isAdult                 object
startYear               object
endYear                 object
runtimeMinutes          object
genres                  object
averageRating          float64
numVotes               float64
dtype: object

In [11]:
# Converting the data type of 'Release Date' to datetime in order to extract the year
rating_engagement_df['Release Date'] = pd.to_datetime(rating_engagement_df['Release Date'])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  rating_engagement_df['Release Date'] = pd.to_datetime(rating_engagement_df['Release Date'])


In [12]:
# Extracting the year from 'Release Date' to new column called 'release_year'
rating_engagement_df['release_year'] = rating_engagement_df['Release Date'].dt.strftime('%Y')
rating_engagement_df

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  rating_engagement_df['release_year'] = rating_engagement_df['Release Date'].dt.strftime('%Y')


Unnamed: 0,Title,Available Globally?,Release Date,Hours Viewed,Number of Ratings,Rating,Genre,Key Words,Description,tconst,...,primaryTitle,originalTitle,isAdult,startYear,endYear,runtimeMinutes,genres,averageRating,numVotes,release_year
0,AKA,Yes,2023-04-28,120000000,,,,,,tt0317052,...,AKA,AKA,0,2002,\N,123,"Drama,Romance",6.3,1239.0,2023
2,AKA,Yes,2023-04-28,120000000,,,,,,tt1117442,...,AKA,AKA,0,2007,\N,43,"Action,Crime,Drama",8.1,36.0,2023
6,AKA,Yes,2023-04-28,120000000,,,,,,tt27197387,...,AKA,AKA,0,2023,\N,122,"Action,Crime,Thriller",6.6,12119.0,2023
10,Stranger Things 3,Yes,2019-07-04,67000000,,,"['Short', 'Comedy']",,,tt8046346,...,Stranger Things 3,Stranger Things 3,0,2017,\N,\N,"Action,Adventure,Comedy",9.4,16.0,2019
11,The Gray Man,Yes,2022-07-22,58300000,,,,,,tt0360615,...,The Gray Man,The Gray Man,0,2002,\N,87,"Action,Crime,Drama",7.9,42.0,2022
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2652,Wild Venice,No,NaT,100000,,,"['Comedy', 'Family']",,,tt27571435,...,Wild Venice,Wild Venice,0,2014,\N,52,Documentary,6.6,8.0,
2656,Zog,No,NaT,100000,,,"['Animation', 'Short', 'Comedy']",,"Almost 250,000 years ago, Zog, the first femin...",tt9109620,...,Zog,Zog,0,2018,\N,26,"Animation,Comedy,Family",7.3,1326.0,
2657,Zulu,No,NaT,100000,,,,,,tt0058777,...,Zulu,Zulu,0,1964,\N,138,"Drama,History,War",7.7,41872.0,
2659,Zulu,No,NaT,100000,,,,,,tt2249221,...,Zulu,Zulu,0,2013,\N,110,"Action,Crime,Drama",6.7,19668.0,


In [13]:
# Keep the scores that has the same 'startYear' and 'release_year'
manual_rating_engagement_df = rating_engagement_df[rating_engagement_df['startYear']==rating_engagement_df['release_year']]
manual_rating_engagement_df

Unnamed: 0,Title,Available Globally?,Release Date,Hours Viewed,Number of Ratings,Rating,Genre,Key Words,Description,tconst,...,primaryTitle,originalTitle,isAdult,startYear,endYear,runtimeMinutes,genres,averageRating,numVotes,release_year
6,AKA,Yes,2023-04-28,120000000,,,,,,tt27197387,...,AKA,AKA,0,2023,\N,122,"Action,Crime,Thriller",6.6,12119.0,2023
14,The Gray Man,Yes,2022-07-22,58300000,,,,,,tt1649418,...,The Gray Man,The Gray Man,0,2022,\N,122,"Action,Thriller",6.5,229717.0,2022
16,The Gray Man,Yes,2022-07-22,58300000,,,,,,tt21400610,...,The Gray Man,The Gray Man,0,2022,\N,\N,Comedy,7.9,14.0,2022
17,The Gray Man,Yes,2022-07-22,58300000,,,,,,tt21433764,...,The Gray Man,The Gray Man,0,2022,\N,\N,Comedy,6.9,17.0,2022
19,Blood & Gold,Yes,2023-05-26,51600000,,,,,,tt18073328,...,Blood & Gold,Blood & Gold,0,2023,\N,98,"Action,Drama,War",6.5,15493.0,2023
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2221,Louis C.K. 2017,Yes,2017-04-04,200000,,,"['Short', 'News']","reference to louis c.k.,reference to harvey we...",Sexual allegations erupt everywhere in Hollywo...,tt6736782,...,Louis C.K. 2017,Louis C.K. 2017,0,2017,\N,74,Comedy,7.6,10738.0,2017
2446,Jen Kirkman: I'm Gonna Die Alone (And I Feel F...,Yes,2015-05-22,100000,,,,,,tt4703660,...,Jen Kirkman: I'm Gonna Die Alone (And I Feel F...,Jen Kirkman: I'm Gonna Die Alone (And I Feel F...,0,2015,\N,78,Comedy,6.8,1636.0,2015
2489,Nicole Byer: BBW (Big Beautiful Weirdo),Yes,2021-12-07,100000,,,,,,tt15204492,...,Nicole Byer: BBW (Big Beautiful Weirdo),Nicole Byer: BBW (Big Beautiful Weirdo),0,2021,\N,65,Comedy,5.9,618.0,2021
2504,Patton Oswalt: Annihilation,Yes,2017-10-17,100000,,,,,,tt7026230,...,Patton Oswalt: Annihilation,Patton Oswalt: Annihilation,0,2017,\N,66,Comedy,7.3,2490.0,2017


In [14]:
test_df = title_df[title_df["primaryTitle"] == "#NoFilter"]
test_df

Unnamed: 0,tconst,titleType,primaryTitle,originalTitle,isAdult,startYear,endYear,runtimeMinutes,genres
2311652,tt12428990,tvEpisode,#NoFilter,#NoFilter,0,2017,\N,\N,Documentary
6268781,tt26368084,tvSeries,#NoFilter,Sem Filtro,0,2023,\N,30,Comedy
6917152,tt28664132,short,#NoFilter,#NoFilter,0,2023,\N,22,"Comedy,Drama,Short"
7491157,tt3411820,short,#NoFilter,#NoFilter,0,2013,\N,6,"Drama,Short"
7512525,tt3460710,tvEpisode,#NoFilter,#NoFilter,0,2013,\N,\N,Comedy
8087511,tt4767674,tvEpisode,#NoFilter,#NoFilter,0,2016,\N,23,"Drama,Romance"
8535870,tt5781880,tvSeries,#NoFilter,#NoFilter,0,2016,\N,\N,Talk-Show
9614938,tt8174472,tvEpisode,#NoFilter,#NoFilter,0,2018,\N,\N,Comedy
9640800,tt8230776,tvSeries,#NoFilter,#NoFilter,0,2018,2018,17,Comedy
9681895,tt8320494,tvShort,#NoFilter,#NoFilter,0,2018,\N,17,"Comedy,Short"


In [15]:
test_df[test_df['titleType'] == "tvSeries"]

Unnamed: 0,tconst,titleType,primaryTitle,originalTitle,isAdult,startYear,endYear,runtimeMinutes,genres
6268781,tt26368084,tvSeries,#NoFilter,Sem Filtro,0,2023,\N,30,Comedy
8535870,tt5781880,tvSeries,#NoFilter,#NoFilter,0,2016,\N,\N,Talk-Show
9640800,tt8230776,tvSeries,#NoFilter,#NoFilter,0,2018,2018,17,Comedy


In [16]:
test2_df = title_df[title_df["titleType"] == "tvSeries"]
test2_df

Unnamed: 0,tconst,titleType,primaryTitle,originalTitle,isAdult,startYear,endYear,runtimeMinutes,genres
34971,tt0035599,tvSeries,Voice of Firestone Televues,Voice of Firestone Televues,0,1943,1947,15,\N
35172,tt0035803,tvSeries,The German Weekly Review,Die Deutsche Wochenschau,0,1940,1945,12,"Documentary,News"
37600,tt0038276,tvSeries,You Are an Artist,You Are an Artist,0,1946,1955,15,Talk-Show
38434,tt0039120,tvSeries,Americana,Americana,0,1947,1949,30,"Family,Game-Show"
38435,tt0039121,tvSeries,Birthday Party,Birthday Party,0,1947,1949,30,Family
...,...,...,...,...,...,...,...,...,...
10415589,tt9916210,tvSeries,Rumpole of the Bailey,Rumpole of the Bailey,0,\N,\N,\N,\N
10415592,tt9916216,tvSeries,Kalyanam Mudhal Kadhal Varai,Kalyanam Mudhal Kadhal Varai,0,2014,2017,22,Romance
10415593,tt9916218,tvSeries,Lost in Food,Lost in Food,0,2016,2017,\N,Talk-Show
10415673,tt9916380,tvSeries,Meie aasta Aafrikas,Meie aasta Aafrikas,0,2019,\N,43,"Adventure,Comedy,Family"


In [17]:
test2_df = test2_df[test2_df["startYear"] == "2012"]
test2_df

Unnamed: 0,tconst,titleType,primaryTitle,originalTitle,isAdult,startYear,endYear,runtimeMinutes,genres
199475,tt0207877,tvSeries,Counterpoint,Counterpoint,0,2012,\N,\N,\N
263377,tt0275126,tvSeries,The Di Palma Forum at UNLV,The Di Palma Forum at UNLV,0,2012,\N,\N,\N
285835,tt0298637,tvSeries,Escape with ET,Escape with ET,0,2012,\N,\N,Family
321427,tt0335630,tvSeries,Onkel Reje og børnenes brevkasse,Onkel Reje og børnenes brevkasse,0,2012,\N,\N,\N
467868,tt0487001,tvSeries,NFL Primetime,NFL Primetime,0,2012,\N,60,"News,Sport,Talk-Show"
...,...,...,...,...,...,...,...,...,...
10398963,tt9880204,tvSeries,Royal Rasoi,Royal Rasoi,0,2012,2013,25,Talk-Show
10399215,tt9880738,tvSeries,Night and Me,Night and Me,0,2012,\N,\N,Family
10400048,tt9882602,tvSeries,Tube Tube: Back2Work,Tube Tube: Back2Work,0,2012,2012,\N,\N
10403171,tt9889344,tvSeries,"Silver Spoon, Sterling Shackles","Silver Spoon, Sterling Shackles",0,2012,2012,\N,Drama


In [18]:
test2_df = title_df[title_df['originalTitle'].str.contains('The Glory')]
test2_df

ValueError: Cannot mask with non-boolean array containing NA / NaN values

In [19]:
title_df

Unnamed: 0,tconst,titleType,primaryTitle,originalTitle,isAdult,startYear,endYear,runtimeMinutes,genres
0,tt0000001,short,Carmencita,Carmencita,0,1894,\N,1,"Documentary,Short"
1,tt0000002,short,Le clown et ses chiens,Le clown et ses chiens,0,1892,\N,5,"Animation,Short"
2,tt0000003,short,Pauvre Pierrot,Pauvre Pierrot,0,1892,\N,4,"Animation,Comedy,Romance"
3,tt0000004,short,Un bon bock,Un bon bock,0,1892,\N,12,"Animation,Short"
4,tt0000005,short,Blacksmith Scene,Blacksmith Scene,0,1893,\N,1,"Comedy,Short"
...,...,...,...,...,...,...,...,...,...
10415893,tt9916848,tvEpisode,Episode #3.17,Episode #3.17,0,2009,\N,\N,"Action,Drama,Family"
10415894,tt9916850,tvEpisode,Episode #3.19,Episode #3.19,0,2010,\N,\N,"Action,Drama,Family"
10415895,tt9916852,tvEpisode,Episode #3.20,Episode #3.20,0,2010,\N,\N,"Action,Drama,Family"
10415896,tt9916856,short,The Wind,The Wind,0,2015,\N,27,Short


In [20]:
test2_df = title_df[title_df['originalTitle'].notnull()]
test2_df

Unnamed: 0,tconst,titleType,primaryTitle,originalTitle,isAdult,startYear,endYear,runtimeMinutes,genres
0,tt0000001,short,Carmencita,Carmencita,0,1894,\N,1,"Documentary,Short"
1,tt0000002,short,Le clown et ses chiens,Le clown et ses chiens,0,1892,\N,5,"Animation,Short"
2,tt0000003,short,Pauvre Pierrot,Pauvre Pierrot,0,1892,\N,4,"Animation,Comedy,Romance"
3,tt0000004,short,Un bon bock,Un bon bock,0,1892,\N,12,"Animation,Short"
4,tt0000005,short,Blacksmith Scene,Blacksmith Scene,0,1893,\N,1,"Comedy,Short"
...,...,...,...,...,...,...,...,...,...
10415893,tt9916848,tvEpisode,Episode #3.17,Episode #3.17,0,2009,\N,\N,"Action,Drama,Family"
10415894,tt9916850,tvEpisode,Episode #3.19,Episode #3.19,0,2010,\N,\N,"Action,Drama,Family"
10415895,tt9916852,tvEpisode,Episode #3.20,Episode #3.20,0,2010,\N,\N,"Action,Drama,Family"
10415896,tt9916856,short,The Wind,The Wind,0,2015,\N,27,Short


In [21]:
x = test2_df['titleType'].unique()
x

array(['short', 'movie', 'tvShort', 'tvMovie', 'tvSeries', 'tvEpisode',
       'tvMiniSeries', 'tvSpecial', 'video', 'videoGame', 'tvPilot'],
      dtype=object)

In [22]:
test2_df = test2_df[test2_df['primaryTitle'].str.contains('The Glory')]
test2_df

Unnamed: 0,tconst,titleType,primaryTitle,originalTitle,isAdult,startYear,endYear,runtimeMinutes,genres
5331,tt0005397,short,The Glory of Clementina,The Glory of Clementina,0,1915,\N,\N,"Drama,Short"
5332,tt0005398,movie,The Glory of Youth,The Glory of Youth,0,1915,\N,\N,Drama
7906,tt0008016,movie,The Glory of Yolanda,The Glory of Yolanda,0,1917,\N,50,"Drama,Romance"
12991,tt0013184,movie,The Glory of Clementina,The Glory of Clementina,0,1922,\N,60,Drama
27193,tt0027682,movie,The Glory Trail,The Glory Trail,0,1936,\N,65,Western
...,...,...,...,...,...,...,...,...,...
9784658,tt8542786,tvSeries,The Glory Generation,The Glory Generation,0,2013,2013,29,Reality-TV
9810497,tt8600302,short,The Glory Years,The Glory Years,0,2019,\N,25,"Comedy,Short,Thriller"
9826745,tt8636564,tvEpisode,Thick Latina Deep Throats In The Gloryhole,Thick Latina Deep Throats In The Gloryhole,1,2016,\N,16,Adult
10134119,tt9308606,tvEpisode,Mazda RX-7 Spirit R: The Glory Days of Japanes...,Mazda RX-7 Spirit R: The Glory Days of Japanes...,0,2012,\N,8,Sport


In [23]:
test2_df = test2_df[test2_df["titleType"] == "tvSeries"]
test2_df

Unnamed: 0,tconst,titleType,primaryTitle,originalTitle,isAdult,startYear,endYear,runtimeMinutes,genres
2695020,tt1315221,tvSeries,The Glory of Team Batista,Chîmu bachisuta no eikô,0,2008,2014,\N,Drama
3432071,tt14509642,tvSeries,The Glory of Youth,Hao shou jiu wei,0,2021,2021,\N,Drama
5469781,tt21344706,tvSeries,The Glory,Deo geullori,0,2022,\N,50,"Drama,Mystery,Thriller"
6039468,tt23985694,tvSeries,The Glory of a Life,Shokoh-e Yek Zendegi,0,2017,2017,45,"Drama,Romance"
7957415,tt4470788,tvSeries,The Glory Trail,The Glory Trail,0,1965,\N,30,Documentary
8064837,tt4714132,tvSeries,The Glory Is Gone,Der Lack ist ab,0,2015,2018,10,"Comedy,Drama,Romance"
8851327,tt6492378,tvSeries,The Glory of Tang Dynasty,Da Tang rong yao,0,2017,\N,\N,"Drama,History,Romance"
9006440,tt6840484,tvSeries,The Glory Hole,The Glory Hole,0,2017,\N,22,Comedy
9784658,tt8542786,tvSeries,The Glory Generation,The Glory Generation,0,2013,2013,29,Reality-TV


In [24]:
test2_df[test2_df['startYear'] == "2019"]

Unnamed: 0,tconst,titleType,primaryTitle,originalTitle,isAdult,startYear,endYear,runtimeMinutes,genres


In [28]:
score_df[score_df['tconst'] == 'tt0386676']

Unnamed: 0,tconst,averageRating,numVotes
217759,tt0386676,9.0,682605
