# 1. Importing Libraries

In [98]:
import pandas as pd
import numpy as np

# 2. Data Loading

In [99]:
df = pd.read_csv('netflix1.csv')
df

Unnamed: 0,show_id,type,title,director,country,date_added,release_year,rating,duration,listed_in
0,s1,Movie,Dick Johnson Is Dead,Kirsten Johnson,United States,9/25/2021,2020,PG-13,90 min,Documentaries
1,s3,TV Show,Ganglands,Julien Leclercq,France,9/24/2021,2021,TV-MA,1 Season,"Crime TV Shows, International TV Shows, TV Act..."
2,s6,TV Show,Midnight Mass,Mike Flanagan,United States,9/24/2021,2021,TV-MA,1 Season,"TV Dramas, TV Horror, TV Mysteries"
3,s14,Movie,Confessions of an Invisible Girl,Bruno Garotti,Brazil,9/22/2021,2021,TV-PG,91 min,"Children & Family Movies, Comedies"
4,s8,Movie,Sankofa,Haile Gerima,United States,9/24/2021,1993,TV-MA,125 min,"Dramas, Independent Movies, International Movies"
...,...,...,...,...,...,...,...,...,...,...
8785,s8797,TV Show,Yunus Emre,Not Given,Turkey,1/17/2017,2016,TV-PG,2 Seasons,"International TV Shows, TV Dramas"
8786,s8798,TV Show,Zak Storm,Not Given,United States,9/13/2018,2016,TV-Y7,3 Seasons,Kids' TV
8787,s8801,TV Show,Zindagi Gulzar Hai,Not Given,Pakistan,12/15/2016,2012,TV-PG,1 Season,"International TV Shows, Romantic TV Shows, TV ..."
8788,s8784,TV Show,Yoko,Not Given,Pakistan,6/23/2018,2016,TV-Y,1 Season,Kids' TV


# 3. Exploring Data

In [100]:
df.dtypes

show_id         object
type            object
title           object
director        object
country         object
date_added      object
release_year     int64
rating          object
duration        object
listed_in       object
dtype: object

In [101]:
df.isna().any()

show_id         False
type            False
title           False
director        False
country         False
date_added      False
release_year    False
rating          False
duration        False
listed_in       False
dtype: bool

In [102]:
df.count()
# no null values

show_id         8790
type            8790
title           8790
director        8790
country         8790
date_added      8790
release_year    8790
rating          8790
duration        8790
listed_in       8790
dtype: int64

In [103]:
df.describe()

Unnamed: 0,release_year
count,8790.0
mean,2014.183163
std,8.825466
min,1925.0
25%,2013.0
50%,2017.0
75%,2019.0
max,2021.0


In [104]:
pd.DataFrame(df.apply(lambda x:x.unique()))
# see unique values

Unnamed: 0,0
show_id,"[s1, s3, s6, s14, s8, s9, s10, s939, s13, s940..."
type,"[Movie, TV Show]"
title,"[Dick Johnson Is Dead, Ganglands, Midnight Mas..."
director,"[Kirsten Johnson, Julien Leclercq, Mike Flanag..."
country,"[United States, France, Brazil, United Kingdom..."
date_added,"[9/25/2021, 9/24/2021, 9/22/2021, 5/1/2021, 9/..."
release_year,"[2020, 2021, 1993, 2019, 2013, 2014, 2018, 199..."
rating,"[PG-13, TV-MA, TV-PG, TV-14, TV-Y7, TV-Y, PG, ..."
duration,"[90 min, 1 Season, 91 min, 125 min, 9 Seasons,..."
listed_in,"[Documentaries, Crime TV Shows, International ..."


In [105]:
df.nunique()

show_id         8790
type               2
title           8787
director        4528
country           86
date_added      1713
release_year      74
rating            14
duration         220
listed_in        513
dtype: int64

In [106]:
# checking for duplicated rows
df.duplicated().any()

False

In [107]:
# checking for duplicates in column show id
df['show_id'].duplicated().any()

False

# 4. Data Cleaning

In [108]:
df = df.set_index('show_id')
df

Unnamed: 0_level_0,type,title,director,country,date_added,release_year,rating,duration,listed_in
show_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
s1,Movie,Dick Johnson Is Dead,Kirsten Johnson,United States,9/25/2021,2020,PG-13,90 min,Documentaries
s3,TV Show,Ganglands,Julien Leclercq,France,9/24/2021,2021,TV-MA,1 Season,"Crime TV Shows, International TV Shows, TV Act..."
s6,TV Show,Midnight Mass,Mike Flanagan,United States,9/24/2021,2021,TV-MA,1 Season,"TV Dramas, TV Horror, TV Mysteries"
s14,Movie,Confessions of an Invisible Girl,Bruno Garotti,Brazil,9/22/2021,2021,TV-PG,91 min,"Children & Family Movies, Comedies"
s8,Movie,Sankofa,Haile Gerima,United States,9/24/2021,1993,TV-MA,125 min,"Dramas, Independent Movies, International Movies"
...,...,...,...,...,...,...,...,...,...
s8797,TV Show,Yunus Emre,Not Given,Turkey,1/17/2017,2016,TV-PG,2 Seasons,"International TV Shows, TV Dramas"
s8798,TV Show,Zak Storm,Not Given,United States,9/13/2018,2016,TV-Y7,3 Seasons,Kids' TV
s8801,TV Show,Zindagi Gulzar Hai,Not Given,Pakistan,12/15/2016,2012,TV-PG,1 Season,"International TV Shows, Romantic TV Shows, TV ..."
s8784,TV Show,Yoko,Not Given,Pakistan,6/23/2018,2016,TV-Y,1 Season,Kids' TV


In [109]:
df = df.replace({'Not Given':np.nan})
df
# replace not given with Null

Unnamed: 0_level_0,type,title,director,country,date_added,release_year,rating,duration,listed_in
show_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
s1,Movie,Dick Johnson Is Dead,Kirsten Johnson,United States,9/25/2021,2020,PG-13,90 min,Documentaries
s3,TV Show,Ganglands,Julien Leclercq,France,9/24/2021,2021,TV-MA,1 Season,"Crime TV Shows, International TV Shows, TV Act..."
s6,TV Show,Midnight Mass,Mike Flanagan,United States,9/24/2021,2021,TV-MA,1 Season,"TV Dramas, TV Horror, TV Mysteries"
s14,Movie,Confessions of an Invisible Girl,Bruno Garotti,Brazil,9/22/2021,2021,TV-PG,91 min,"Children & Family Movies, Comedies"
s8,Movie,Sankofa,Haile Gerima,United States,9/24/2021,1993,TV-MA,125 min,"Dramas, Independent Movies, International Movies"
...,...,...,...,...,...,...,...,...,...
s8797,TV Show,Yunus Emre,,Turkey,1/17/2017,2016,TV-PG,2 Seasons,"International TV Shows, TV Dramas"
s8798,TV Show,Zak Storm,,United States,9/13/2018,2016,TV-Y7,3 Seasons,Kids' TV
s8801,TV Show,Zindagi Gulzar Hai,,Pakistan,12/15/2016,2012,TV-PG,1 Season,"International TV Shows, Romantic TV Shows, TV ..."
s8784,TV Show,Yoko,,Pakistan,6/23/2018,2016,TV-Y,1 Season,Kids' TV


In [110]:
df.loc[df.isna().any(axis=1)]

Unnamed: 0_level_0,type,title,director,country,date_added,release_year,rating,duration,listed_in
show_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
s852,Movie,99 Songs (Tamil),,Pakistan,5/21/2021,2021,TV-14,131 min,"Dramas, International Movies, Music & Musicals"
s4,TV Show,Jailbirds New Orleans,,Pakistan,9/24/2021,2021,TV-MA,1 Season,"Docuseries, Reality TV"
s15,TV Show,Crime Stories: India Detectives,,Pakistan,9/22/2021,2021,TV-MA,1 Season,"British TV Shows, Crime TV Shows, Docuseries"
s7,Movie,My Little Pony: A New Generation,"Robert Cullen, José Luis Ucha",,9/24/2021,2021,PG,91 min,Children & Family Movies
s12,TV Show,Bangkok Breaking,Kongkiat Komesiri,,9/23/2021,2021,TV-MA,1 Season,"Crime TV Shows, International TV Shows, TV Act..."
...,...,...,...,...,...,...,...,...,...
s8797,TV Show,Yunus Emre,,Turkey,1/17/2017,2016,TV-PG,2 Seasons,"International TV Shows, TV Dramas"
s8798,TV Show,Zak Storm,,United States,9/13/2018,2016,TV-Y7,3 Seasons,Kids' TV
s8801,TV Show,Zindagi Gulzar Hai,,Pakistan,12/15/2016,2012,TV-PG,1 Season,"International TV Shows, Romantic TV Shows, TV ..."
s8784,TV Show,Yoko,,Pakistan,6/23/2018,2016,TV-Y,1 Season,Kids' TV


In [111]:
df['date_added'] = pd.to_datetime(df['date_added'])
df

Unnamed: 0_level_0,type,title,director,country,date_added,release_year,rating,duration,listed_in
show_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
s1,Movie,Dick Johnson Is Dead,Kirsten Johnson,United States,2021-09-25,2020,PG-13,90 min,Documentaries
s3,TV Show,Ganglands,Julien Leclercq,France,2021-09-24,2021,TV-MA,1 Season,"Crime TV Shows, International TV Shows, TV Act..."
s6,TV Show,Midnight Mass,Mike Flanagan,United States,2021-09-24,2021,TV-MA,1 Season,"TV Dramas, TV Horror, TV Mysteries"
s14,Movie,Confessions of an Invisible Girl,Bruno Garotti,Brazil,2021-09-22,2021,TV-PG,91 min,"Children & Family Movies, Comedies"
s8,Movie,Sankofa,Haile Gerima,United States,2021-09-24,1993,TV-MA,125 min,"Dramas, Independent Movies, International Movies"
...,...,...,...,...,...,...,...,...,...
s8797,TV Show,Yunus Emre,,Turkey,2017-01-17,2016,TV-PG,2 Seasons,"International TV Shows, TV Dramas"
s8798,TV Show,Zak Storm,,United States,2018-09-13,2016,TV-Y7,3 Seasons,Kids' TV
s8801,TV Show,Zindagi Gulzar Hai,,Pakistan,2016-12-15,2012,TV-PG,1 Season,"International TV Shows, Romantic TV Shows, TV ..."
s8784,TV Show,Yoko,,Pakistan,2018-06-23,2016,TV-Y,1 Season,Kids' TV


# Total Content on Netflix

In [112]:
len(df)

8790

#  Top 10 countries with Netflix Content

In [113]:
by_cont = df.groupby('country')
by_cont = pd.DataFrame(by_cont.size())
by_cont.columns = ['amount of content']
by_cont.sort_values(by = 'amount of content',ascending = False).head(10)

Unnamed: 0_level_0,amount of content
country,Unnamed: 1_level_1
United States,3240
India,1057
United Kingdom,638
Pakistan,421
Canada,271
Japan,259
South Korea,214
France,213
Spain,182
Mexico,138


# Monthly Trend of Movie and TV Show Releases

In [114]:
df['month_added'] = df['date_added'].dt.month

In [115]:
df

Unnamed: 0_level_0,type,title,director,country,date_added,release_year,rating,duration,listed_in,month_added
show_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
s1,Movie,Dick Johnson Is Dead,Kirsten Johnson,United States,2021-09-25,2020,PG-13,90 min,Documentaries,9
s3,TV Show,Ganglands,Julien Leclercq,France,2021-09-24,2021,TV-MA,1 Season,"Crime TV Shows, International TV Shows, TV Act...",9
s6,TV Show,Midnight Mass,Mike Flanagan,United States,2021-09-24,2021,TV-MA,1 Season,"TV Dramas, TV Horror, TV Mysteries",9
s14,Movie,Confessions of an Invisible Girl,Bruno Garotti,Brazil,2021-09-22,2021,TV-PG,91 min,"Children & Family Movies, Comedies",9
s8,Movie,Sankofa,Haile Gerima,United States,2021-09-24,1993,TV-MA,125 min,"Dramas, Independent Movies, International Movies",9
...,...,...,...,...,...,...,...,...,...,...
s8797,TV Show,Yunus Emre,,Turkey,2017-01-17,2016,TV-PG,2 Seasons,"International TV Shows, TV Dramas",1
s8798,TV Show,Zak Storm,,United States,2018-09-13,2016,TV-Y7,3 Seasons,Kids' TV,9
s8801,TV Show,Zindagi Gulzar Hai,,Pakistan,2016-12-15,2012,TV-PG,1 Season,"International TV Shows, Romantic TV Shows, TV ...",12
s8784,TV Show,Yoko,,Pakistan,2018-06-23,2016,TV-Y,1 Season,Kids' TV,6


In [129]:
grouped = df.groupby(['month_added','type'])

In [130]:
grouped_df = pd.DataFrame(grouped.size())

In [131]:
grouped_df.columns = ['amount_of_releases']
grouped_df

Unnamed: 0_level_0,Unnamed: 1_level_0,amount_of_releases
month_added,type,Unnamed: 2_level_1
1,Movie,545
1,TV Show,192
2,Movie,382
2,TV Show,180
3,Movie,528
3,TV Show,213
4,Movie,549
4,TV Show,214
5,Movie,439
5,TV Show,193


In [132]:
grouped_df['amount_of_releases'].sum()

8790

In [133]:
movie_tv_trends_per_month = pd.DataFrame(grouped_df['amount_of_releases'] / grouped_df['amount_of_releases'].sum() * 100)
movie_tv_trends_per_month.columns = ['percentage released']
movie_tv_trends_per_month

Unnamed: 0_level_0,Unnamed: 1_level_0,percentage released
month_added,type,Unnamed: 2_level_1
1,Movie,6.200228
1,TV Show,2.1843
2,Movie,4.345848
2,TV Show,2.047782
3,Movie,6.006826
3,TV Show,2.423208
4,Movie,6.245734
4,TV Show,2.434585
5,Movie,4.994312
5,TV Show,2.195677


#  Yearly Trend of Movie and TV Show Releases

In [121]:
df['year_added']  = df['date_added'].dt.year
df

Unnamed: 0_level_0,type,title,director,country,date_added,release_year,rating,duration,listed_in,month_added,year_added
show_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
s1,Movie,Dick Johnson Is Dead,Kirsten Johnson,United States,2021-09-25,2020,PG-13,90 min,Documentaries,9,2021
s3,TV Show,Ganglands,Julien Leclercq,France,2021-09-24,2021,TV-MA,1 Season,"Crime TV Shows, International TV Shows, TV Act...",9,2021
s6,TV Show,Midnight Mass,Mike Flanagan,United States,2021-09-24,2021,TV-MA,1 Season,"TV Dramas, TV Horror, TV Mysteries",9,2021
s14,Movie,Confessions of an Invisible Girl,Bruno Garotti,Brazil,2021-09-22,2021,TV-PG,91 min,"Children & Family Movies, Comedies",9,2021
s8,Movie,Sankofa,Haile Gerima,United States,2021-09-24,1993,TV-MA,125 min,"Dramas, Independent Movies, International Movies",9,2021
...,...,...,...,...,...,...,...,...,...,...,...
s8797,TV Show,Yunus Emre,,Turkey,2017-01-17,2016,TV-PG,2 Seasons,"International TV Shows, TV Dramas",1,2017
s8798,TV Show,Zak Storm,,United States,2018-09-13,2016,TV-Y7,3 Seasons,Kids' TV,9,2018
s8801,TV Show,Zindagi Gulzar Hai,,Pakistan,2016-12-15,2012,TV-PG,1 Season,"International TV Shows, Romantic TV Shows, TV ...",12,2016
s8784,TV Show,Yoko,,Pakistan,2018-06-23,2016,TV-Y,1 Season,Kids' TV,6,2018


In [122]:
by_year_type = df.groupby(['year_added','type'])
by_year_type = pd.DataFrame(by_year_type.size())
by_year_type

Unnamed: 0_level_0,Unnamed: 1_level_0,0
year_added,type,Unnamed: 2_level_1
2008,Movie,1
2008,TV Show,1
2009,Movie,2
2010,Movie,1
2011,Movie,13
2012,Movie,3
2013,Movie,6
2013,TV Show,5
2014,Movie,19
2014,TV Show,5


In [123]:
by_year_type.columns = ['amount_of_releases']
yearly_trend = pd.DataFrame((by_year_type['amount_of_releases'] / by_year_type['amount_of_releases'].sum()) * 100)
yearly_trend.columns = ['percentage_of_releases']
yearly_trend

Unnamed: 0_level_0,Unnamed: 1_level_0,percentage_of_releases
year_added,type,Unnamed: 2_level_1
2008,Movie,0.011377
2008,TV Show,0.011377
2009,Movie,0.022753
2010,Movie,0.011377
2011,Movie,0.147895
2012,Movie,0.03413
2013,Movie,0.068259
2013,TV Show,0.056883
2014,Movie,0.216155
2014,TV Show,0.056883


# Top 10 Movies

In [124]:
filt = df['type'] == 'Movie'
df.loc[filt].sort_values(by='title').head(10)

Unnamed: 0_level_0,type,title,director,country,date_added,release_year,rating,duration,listed_in,month_added,year_added
show_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
s2037,Movie,#Alive,Cho Il,South Korea,2020-09-08,2020,TV-MA,99 min,"Horror Movies, International Movies, Thrillers",9,2020
s2305,Movie,#AnneFrank - Parallel Stories,"Sabina Fedeli, Anna Migotto",Italy,2020-07-01,2019,TV-14,95 min,"Documentaries, International Movies",7,2020
s2482,Movie,#FriendButMarried,Rako Prijanto,Indonesia,2020-05-21,2018,TV-G,102 min,"Dramas, International Movies, Romantic Movies",5,2020
s2325,Movie,#FriendButMarried 2,Rako Prijanto,Indonesia,2020-06-28,2020,TV-G,104 min,"Dramas, International Movies, Romantic Movies",6,2020
s5974,Movie,#Roxy,Michael Kennedy,Canada,2019-04-10,2018,TV-14,105 min,"Comedies, Romantic Movies",4,2019
s5696,Movie,#Rucker50,Robert McCullough Jr.,United States,2016-12-01,2016,TV-PG,56 min,"Documentaries, Sports Movies",12,2016
s655,Movie,#Selfie,Cristina Jacob,Romania,2021-06-21,2014,TV-MA,125 min,"Comedies, Dramas, International Movies",6,2021
s656,Movie,#Selfie 69,Cristina Jacob,Romania,2021-06-21,2016,TV-MA,119 min,"Comedies, Dramas, International Movies",6,2021
s5973,Movie,#cats_the_mewvie,Michael Margolis,Canada,2020-02-05,2020,TV-14,90 min,"Documentaries, International Movies",2,2020
s5278,Movie,#realityhigh,Fernando Lebrija,United States,2017-09-08,2017,TV-14,99 min,Comedies,9,2017


# Top 10 TV Show

In [125]:
filt = df['type'] == 'TV Show'
df.loc[filt].sort_values(by='title').head(10)

Unnamed: 0_level_0,type,title,director,country,date_added,release_year,rating,duration,listed_in,month_added,year_added
show_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
s2667,TV Show,#blackAF,,United States,2020-04-17,2020,TV-MA,1 Season,TV Comedies,4,2020
s2141,TV Show,(Un)Well,,United States,2020-08-12,2020,TV-MA,1 Season,Reality TV,8,2020
s1584,TV Show,100 Days My Prince,,South Korea,2020-12-07,2018,TV-14,1 Season,"International TV Shows, Romantic TV Shows, TV ...",12,2020
s2817,TV Show,100 Humans,,United States,2020-03-13,2020,TV-14,1 Season,"Docuseries, Science & Nature TV",3,2020
s5985,TV Show,100% Hotter,,United Kingdom,2019-11-01,2017,TV-14,1 Season,"British TV Shows, International TV Shows, Real...",11,2019
s5987,TV Show,12 Years Promise,,South Korea,2017-05-22,2014,TV-14,1 Season,"International TV Shows, Korean TV Shows, Roman...",5,2017
s2418,TV Show,13 Reasons Why,,United States,2020-06-05,2020,TV-MA,4 Seasons,"Crime TV Shows, TV Dramas, TV Mysteries",6,2020
s3562,TV Show,13 Reasons Why: Beyond the Reasons,,United States,2019-08-23,2019,TV-MA,3 Seasons,"Crime TV Shows, Docuseries",8,2019
s4351,TV Show,1983,,Poland,2018-11-30,2018,TV-MA,1 Season,"Crime TV Shows, International TV Shows, TV Dramas",11,2018
s3809,TV Show,1994,Diego Enrique Osorno,Mexico,2019-05-17,2019,TV-MA,1 Season,"Crime TV Shows, Docuseries, International TV S...",5,2019


#  Top_10_Directors

In [126]:
df.sort_values(by='director').head(10)

Unnamed: 0_level_0,type,title,director,country,date_added,release_year,rating,duration,listed_in,month_added,year_added
show_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
s6079,Movie,Abhinetri,A. L. Vijay,India,2018-05-01,2016,TV-14,131 min,"Comedies, International Movies, Sci-Fi & Fantasy",5,2018
s3538,Movie,Watchman,A. L. Vijay,India,2019-09-04,2019,TV-14,93 min,"Comedies, Dramas, International Movies",9,2019
s2390,Movie,Asura Guru,A. Raajdheep,India,2020-06-13,2020,TV-14,117 min,"Dramas, International Movies",6,2020
s5550,Movie,Salaakhen,A. Salaam,India,2017-04-01,1975,TV-14,134 min,"Action & Adventure, International Movies, Musi...",4,2017
s4682,Movie,Spyder,A.R. Murugadoss,India,2018-08-28,2017,TV-14,140 min,"Action & Adventure, International Movies",8,2018
s4050,Movie,Sarkar,A.R. Murugadoss,India,2019-03-02,2018,TV-MA,162 min,"Action & Adventure, Dramas, International Movies",3,2019
s3603,Movie,Jaoon Kahan Bata Ae Dil,Aadish Keluskar,India,2019-08-09,2018,TV-MA,107 min,"Dramas, Independent Movies, International Movies",8,2019
s5768,Movie,Harud,Aamir Bashir,India,2016-10-01,2010,TV-MA,100 min,"Dramas, International Movies",10,2016
s1023,Movie,Taare Zameen Par,Aamir Khan,India,2021-04-17,2007,PG,162 min,"Dramas, International Movies",4,2021
s2291,Movie,Tanu Weds Manu,Aanand Rai,India,2020-07-05,2011,TV-14,114 min,"Comedies, Dramas, International Movies",7,2020


In [127]:
# directors with the most contributions
by_direct = df.groupby('director')
by_direct = pd.DataFrame(by_direct.size())
by_direct.columns = ['contributions']
by_direct.sort_values(by = 'contributions',ascending = False).head(10)

Unnamed: 0_level_0,contributions
director,Unnamed: 1_level_1
Rajiv Chilaka,20
Alastair Fothergill,18
"Raúl Campos, Jan Suter",18
Marcus Raboy,16
Suhas Kadav,16
Jay Karas,14
Cathy Garcia-Molina,13
Martin Scorsese,12
Youssef Chahine,12
Jay Chapman,12
