In [50]:
import pandas as pd
import numpy as np
import statistics as st
from datetime import datetime as dt

In [25]:
df = pd.read_csv('netflix.csv')

# Overall 

In [26]:
df.columns

Index(['show_id', 'type', 'title', 'director', 'cast', 'country', 'date_added',
       'release_year', 'rating', 'duration', 'listed_in', 'description'],
      dtype='object')

In [27]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6234 entries, 0 to 6233
Data columns (total 12 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   show_id       6234 non-null   int64 
 1   type          6234 non-null   object
 2   title         6234 non-null   object
 3   director      4265 non-null   object
 4   cast          5664 non-null   object
 5   country       5758 non-null   object
 6   date_added    6223 non-null   object
 7   release_year  6234 non-null   int64 
 8   rating        6224 non-null   object
 9   duration      6234 non-null   object
 10  listed_in     6234 non-null   object
 11  description   6234 non-null   object
dtypes: int64(2), object(10)
memory usage: 584.6+ KB


In [28]:
df.head()

Unnamed: 0,show_id,type,title,director,cast,country,date_added,release_year,rating,duration,listed_in,description
0,81145628,Movie,Norm of the North: King Sized Adventure,"Richard Finn, Tim Maltby","Alan Marriott, Andrew Toth, Brian Dobson, Cole...","United States, India, South Korea, China","September 9, 2019",2019,TV-PG,90 min,"Children & Family Movies, Comedies",Before planning an awesome wedding for his gra...
1,80117401,Movie,Jandino: Whatever it Takes,,Jandino Asporaat,United Kingdom,"September 9, 2016",2016,TV-MA,94 min,Stand-Up Comedy,Jandino Asporaat riffs on the challenges of ra...
2,70234439,TV Show,Transformers Prime,,"Peter Cullen, Sumalee Montano, Frank Welker, J...",United States,"September 8, 2018",2013,TV-Y7-FV,1 Season,Kids' TV,"With the help of three human allies, the Autob..."
3,80058654,TV Show,Transformers: Robots in Disguise,,"Will Friedle, Darren Criss, Constance Zimmer, ...",United States,"September 8, 2018",2016,TV-Y7,1 Season,Kids' TV,When a prison ship crash unleashes hundreds of...
4,80125979,Movie,#realityhigh,Fernando Lebrija,"Nesta Cooper, Kate Walsh, John Michael Higgins...",United States,"September 8, 2017",2017,TV-14,99 min,Comedies,When nerdy high schooler Dani finally attracts...


# DATA CLEANING

In [29]:
#Check duplicate
df.duplicated().sum()

0

In [30]:
#Check null
df.isna().sum()

show_id            0
type               0
title              0
director        1969
cast             570
country          476
date_added        11
release_year       0
rating            10
duration           0
listed_in          0
description        0
dtype: int64

In [31]:
null_columns = ['director','cast','country','date_added','rating']

In [32]:
for x in null_columns:
    df[x].fillna(st.mode(df[x].dropna(how = 'any', axis = 0)), inplace =True)

In [33]:
df.isna().sum()

show_id         0
type            0
title           0
director        0
cast            0
country         0
date_added      0
release_year    0
rating          0
duration        0
listed_in       0
description     0
dtype: int64

In [34]:
df.head()

Unnamed: 0,show_id,type,title,director,cast,country,date_added,release_year,rating,duration,listed_in,description
0,81145628,Movie,Norm of the North: King Sized Adventure,"Richard Finn, Tim Maltby","Alan Marriott, Andrew Toth, Brian Dobson, Cole...","United States, India, South Korea, China","September 9, 2019",2019,TV-PG,90 min,"Children & Family Movies, Comedies",Before planning an awesome wedding for his gra...
1,80117401,Movie,Jandino: Whatever it Takes,"Raúl Campos, Jan Suter",Jandino Asporaat,United Kingdom,"September 9, 2016",2016,TV-MA,94 min,Stand-Up Comedy,Jandino Asporaat riffs on the challenges of ra...
2,70234439,TV Show,Transformers Prime,"Raúl Campos, Jan Suter","Peter Cullen, Sumalee Montano, Frank Welker, J...",United States,"September 8, 2018",2013,TV-Y7-FV,1 Season,Kids' TV,"With the help of three human allies, the Autob..."
3,80058654,TV Show,Transformers: Robots in Disguise,"Raúl Campos, Jan Suter","Will Friedle, Darren Criss, Constance Zimmer, ...",United States,"September 8, 2018",2016,TV-Y7,1 Season,Kids' TV,When a prison ship crash unleashes hundreds of...
4,80125979,Movie,#realityhigh,Fernando Lebrija,"Nesta Cooper, Kate Walsh, John Michael Higgins...",United States,"September 8, 2017",2017,TV-14,99 min,Comedies,When nerdy high schooler Dani finally attracts...


In [48]:
df.loc[df['release_year'] == 2016].groupby('type', as_index= False).agg(sum = ('title','count'))

Unnamed: 0,type,sum
0,Movie,593
1,TV Show,237


In [55]:
df.date_added = [x.strip() for x in df.date_added]
df.date_added = [dt.strptime(x,'%B %d, %Y') for x in df.date_added]

In [70]:
df.date_added = [dt.strftime(x,'%d/%m/%Y') for x in df.date_added]

In [67]:
df.date_added = df.date_added.astype('datetime64')

  df.date_added = df.date_added.astype('datetime64')


In [68]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6234 entries, 0 to 6233
Data columns (total 12 columns):
 #   Column        Non-Null Count  Dtype         
---  ------        --------------  -----         
 0   show_id       6234 non-null   int64         
 1   type          6234 non-null   object        
 2   title         6234 non-null   object        
 3   director      6234 non-null   object        
 4   cast          6234 non-null   object        
 5   country       6234 non-null   object        
 6   date_added    6234 non-null   datetime64[ns]
 7   release_year  6234 non-null   int64         
 8   rating        6234 non-null   object        
 9   duration      6234 non-null   object        
 10  listed_in     6234 non-null   object        
 11  description   6234 non-null   object        
dtypes: datetime64[ns](1), int64(2), object(9)
memory usage: 584.6+ KB


In [71]:
df.head()

Unnamed: 0,show_id,type,title,director,cast,country,date_added,release_year,rating,duration,listed_in,description
0,81145628,Movie,Norm of the North: King Sized Adventure,"Richard Finn, Tim Maltby","Alan Marriott, Andrew Toth, Brian Dobson, Cole...","United States, India, South Korea, China",09/09/2019,2019,TV-PG,90 min,"Children & Family Movies, Comedies",Before planning an awesome wedding for his gra...
1,80117401,Movie,Jandino: Whatever it Takes,"Raúl Campos, Jan Suter",Jandino Asporaat,United Kingdom,09/09/2016,2016,TV-MA,94 min,Stand-Up Comedy,Jandino Asporaat riffs on the challenges of ra...
2,70234439,TV Show,Transformers Prime,"Raúl Campos, Jan Suter","Peter Cullen, Sumalee Montano, Frank Welker, J...",United States,09/08/2018,2013,TV-Y7-FV,1 Season,Kids' TV,"With the help of three human allies, the Autob..."
3,80058654,TV Show,Transformers: Robots in Disguise,"Raúl Campos, Jan Suter","Will Friedle, Darren Criss, Constance Zimmer, ...",United States,09/08/2018,2016,TV-Y7,1 Season,Kids' TV,When a prison ship crash unleashes hundreds of...
4,80125979,Movie,#realityhigh,Fernando Lebrija,"Nesta Cooper, Kate Walsh, John Michael Higgins...",United States,09/08/2017,2017,TV-14,99 min,Comedies,When nerdy high schooler Dani finally attracts...


In [76]:
df_gb = df.groupby(['type','listed_in'], as_index= False).agg(count = ('listed_in','count'))

In [77]:
df_gb

Unnamed: 0,type,listed_in,count
0,Movie,Action & Adventure,68
1,Movie,"Action & Adventure, Anime Features, Children &...",1
2,Movie,"Action & Adventure, Anime Features, Classic Mo...",1
3,Movie,"Action & Adventure, Anime Features, Horror Movies",1
4,Movie,"Action & Adventure, Anime Features, Internatio...",25
...,...,...,...
456,TV Show,"TV Horror, TV Mysteries, TV Thrillers",3
457,TV Show,"TV Horror, TV Mysteries, Teen TV Shows",1
458,TV Show,"TV Horror, Teen TV Shows",1
459,TV Show,"TV Sci-Fi & Fantasy, TV Thrillers",1


In [106]:
df_pivot = df_gb.pivot(index = 'type', columns = 'listed_in', values = 'count')
df_pivot

listed_in,Action & Adventure,"Action & Adventure, Anime Features, Children & Family Movies","Action & Adventure, Anime Features, Classic Movies","Action & Adventure, Anime Features, Horror Movies","Action & Adventure, Anime Features, International Movies","Action & Adventure, Anime Features, Sci-Fi & Fantasy","Action & Adventure, Children & Family Movies","Action & Adventure, Children & Family Movies, Classic Movies","Action & Adventure, Children & Family Movies, Comedies","Action & Adventure, Children & Family Movies, Dramas",...,"TV Dramas, TV Sci-Fi & Fantasy, Teen TV Shows","TV Dramas, TV Thrillers","TV Dramas, Teen TV Shows","TV Horror, TV Mysteries, TV Sci-Fi & Fantasy","TV Horror, TV Mysteries, TV Thrillers","TV Horror, TV Mysteries, Teen TV Shows","TV Horror, Teen TV Shows","TV Sci-Fi & Fantasy, TV Thrillers",TV Shows,Thrillers
type,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Movie,68.0,1.0,1.0,1.0,25.0,5.0,2.0,3.0,2.0,1.0,...,,,,,,,,,,40.0
TV Show,,,,,,,,,,,...,1.0,5.0,4.0,4.0,3.0,1.0,1.0,1.0,10.0,


In [107]:
df_pivot.rename_axis(mapper = None, axis = 1, inplace = True)
df_pivot

Unnamed: 0_level_0,Action & Adventure,"Action & Adventure, Anime Features, Children & Family Movies","Action & Adventure, Anime Features, Classic Movies","Action & Adventure, Anime Features, Horror Movies","Action & Adventure, Anime Features, International Movies","Action & Adventure, Anime Features, Sci-Fi & Fantasy","Action & Adventure, Children & Family Movies","Action & Adventure, Children & Family Movies, Classic Movies","Action & Adventure, Children & Family Movies, Comedies","Action & Adventure, Children & Family Movies, Dramas",...,"TV Dramas, TV Sci-Fi & Fantasy, Teen TV Shows","TV Dramas, TV Thrillers","TV Dramas, Teen TV Shows","TV Horror, TV Mysteries, TV Sci-Fi & Fantasy","TV Horror, TV Mysteries, TV Thrillers","TV Horror, TV Mysteries, Teen TV Shows","TV Horror, Teen TV Shows","TV Sci-Fi & Fantasy, TV Thrillers",TV Shows,Thrillers
type,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Movie,68.0,1.0,1.0,1.0,25.0,5.0,2.0,3.0,2.0,1.0,...,,,,,,,,,,40.0
TV Show,,,,,,,,,,,...,1.0,5.0,4.0,4.0,3.0,1.0,1.0,1.0,10.0,


In [108]:
df_pivot.reset_index(inplace = True)

In [110]:
df_melt = df_pivot.melt(id_vars = 'type', var_name = 'listed_in', value_name = 'count')
df_melt

Unnamed: 0,type,listed_in,count
0,Movie,Action & Adventure,68.0
1,TV Show,Action & Adventure,
2,Movie,"Action & Adventure, Anime Features, Children &...",1.0
3,TV Show,"Action & Adventure, Anime Features, Children &...",
4,Movie,"Action & Adventure, Anime Features, Classic Mo...",1.0
...,...,...,...
917,TV Show,"TV Sci-Fi & Fantasy, TV Thrillers",1.0
918,Movie,TV Shows,
919,TV Show,TV Shows,10.0
920,Movie,Thrillers,40.0
