In [40]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.express as px
from textblob import TextBlob # For sentiment analysis


In [41]:
data = pd.read_csv('netflix_titles.csv')

In [42]:
data.shape

(8807, 12)

In [43]:
data.columns

Index(['show_id', 'type', 'title', 'director', 'cast', 'country', 'date_added',
       'release_year', 'rating', 'duration', 'listed_in', 'description'],
      dtype='object')

In [44]:
data.head(2)

Unnamed: 0,show_id,type,title,director,cast,country,date_added,release_year,rating,duration,listed_in,description
0,s1,Movie,Dick Johnson Is Dead,Kirsten Johnson,,United States,"September 25, 2021",2020,PG-13,90 min,Documentaries,"As her father nears the end of his life, filmmaker Kirsten Johnson stages his death in inventive and comical ways to help them both face the inevitable."
1,s2,TV Show,Blood & Water,,"Ama Qamata, Khosi Ngema, Gail Mabalane, Thabang Molaba, Dillon Windvogel, Natasha Thahane, Arno Greeff, Xolile Tshabalala, Getmore Sithole, Cindy Mahlangu, Ryle De Morny, Greteli Fincham, Sello Maake Ka-Ncube, Odwa Gwanya, Mekaila Mathys, Sandi Schultz, Duane Williams, Shamilla Miller, Patrick Mofokeng",South Africa,"September 24, 2021",2021,TV-MA,2 Seasons,"International TV Shows, TV Dramas, TV Mysteries","After crossing paths at a party, a Cape Town teen sets out to prove whether a private-school swimming star is her sister who was abducted at birth."


#### Lokking at the distribution of content rating on netflix

In [45]:
d = data.groupby(['rating']).size().reset_index(name='counts')

In [46]:
piechart = px.pie(d, values = 'counts', names='rating', title='Distribution of rating', color_discrete_sequence=px.colors.qualitative.Set3)
piechart.show()

#### Top 5 Actors and Directors

In [47]:
data['director']=data['director'].fillna('No Director Available')

In [48]:
filtered_director = pd.DataFrame()

In [49]:
filtered_director = data['director'].str.split(',', expand=True).stack()
filtered_director.head(9)

0  0          Kirsten Johnson
1  0    No Director Available
2  0          Julien Leclercq
3  0    No Director Available
4  0    No Director Available
5  0            Mike Flanagan
6  0            Robert Cullen
   1           José Luis Ucha
7  0             Haile Gerima
dtype: object

In [50]:
filtered_director = filtered_director.to_frame()

In [51]:
filtered_director.columns = ['Director']

In [52]:
directors = filtered_director.groupby(['Director']).size().reset_index(name='Total Content')
directors.head()

Unnamed: 0,Director,Total Content
0,Aaron Moorhead,2
1,Aaron Woolf,1
2,Abbas Alibhai Burmawalla,1
3,Abdullah Al Noor,1
4,Abhinav Shiv Tiwari,1


In [53]:
directors=directors[directors.Director != 'No Director Available']

In [54]:
directors = directors.sort_values(by=['Total Content'], ascending=False)

In [55]:
directors_top_5 = directors.head()
directors_top_5

Unnamed: 0,Director,Total Content
4021,Rajiv Chilaka,22
4068,Raúl Campos,18
261,Jan Suter,18
4652,Suhas Kadav,16
3235,Marcus Raboy,16


In [56]:
directors_top_5 = directors_top_5.sort_values(by=['Total Content'])
directors_top_5

Unnamed: 0,Director,Total Content
4652,Suhas Kadav,16
3235,Marcus Raboy,16
4068,Raúl Campos,18
261,Jan Suter,18
4021,Rajiv Chilaka,22


#### List of the Top 5 Directors on Netflix

In [57]:
fig_1 = px.bar(directors_top_5, x='Total Content', y='Director', title='Top 5 Directors on Netflix')
fig_1.show()

#### Check the top 5 Actors

In [58]:
data['cast']=data['cast'].fillna('No Cast Present')

In [59]:
filtered_cast = pd.DataFrame()

In [60]:
filtered_cast = data['cast'].str.split(',', expand=True).stack()

In [61]:
filtered_cast.head()

0  0    No Cast Present
1  0         Ama Qamata
   1        Khosi Ngema
   2      Gail Mabalane
   3     Thabang Molaba
dtype: object

In [62]:
filtered_cast = filtered_cast.to_frame()

In [63]:
filtered_cast.columns = ['Actor']

In [64]:
actors = filtered_cast.groupby(by=['Actor']).size().reset_index(name='Total Content')
actors.head()

Unnamed: 0,Actor,Total Content
0,Jr.,2
1,"""Riley"" Lakdhar Dridi",1
2,'Najite Dede,1
3,2 Chainz,1
4,2Mex,1


In [65]:
actors = actors[actors.Actor != 'No Cast Present']

In [66]:
actors=actors.sort_values(by=['Total Content'], ascending=False)
actors.head()

Unnamed: 0,Actor,Total Content
2612,Anupam Kher,39
26941,Rupa Bhimani,31
30303,Takahiro Sakurai,30
15541,Julie Tejwani,28
23624,Om Puri,27


In [67]:
actors_top_5 = actors.head()

In [68]:
actors_top_5 = actors_top_5.sort_values(by=['Total Content'], ascending=True)
actors_top_5

Unnamed: 0,Actor,Total Content
23624,Om Puri,27
15541,Julie Tejwani,28
30303,Takahiro Sakurai,30
26941,Rupa Bhimani,31
2612,Anupam Kher,39


#### These are the Top 5 Actors on Netflix

In [69]:
fig_2 = px.bar(actors_top_5, x='Total Content', y='Actor', title='Top 5 Actors on Netflix')
fig_2

#### Analyzing the Content on Netflix over the Years

In [70]:
df1= data[['type', 'release_year']]

In [71]:
df1.isnull().sum()

type            0
release_year    0
dtype: int64

In [72]:
df2 = df1.groupby(['release_year', 'type']).size().reset_index(name='Total Content')
df2.head()

Unnamed: 0,release_year,type,Total Content
0,1925,TV Show,1
1,1942,Movie,2
2,1943,Movie,3
3,1944,Movie,3
4,1945,Movie,3


In [73]:
df2 = df2[df2['release_year'] > 2010]

#### Trend of Content Produced over the Years on Netflix

In [74]:
fig_3 = px.line(df2, x='release_year', y='Total Content', title='Trend of Content Produced in the Years', color='type')
fig_3

#### Sentiment analysis of the content on netflix

In [75]:
df_1 = data[['release_year', 'description']]

In [76]:
df_1 = df_1.rename(columns={'release_year':'Release Year'})

In [77]:
for index, row in df_1.iterrows():
    d=row['description']
    testimonial=TextBlob(d)
    s=testimonial.sentiment.polarity
    if s==0:
        sent = 'Neutral'
    elif s>0:
        sent = 'Positive'
    else:
        sent = 'Negative'
    df_1.loc[[index, 2], 'Sentiment']=sent

df_1 = df_1.groupby(['Release Year', 'Sentiment']).size().reset_index(name='Total Content')

df_1 = df_1[df_1['Release Year'] >= 2010]
fig_4 = px.bar(df_1, x='Release Year', y='Total Content', color='Sentiment', title='Sentiment Content on Netflix')
fig_4

In [78]:
!jupyter nbconvert --to html DA_portfolio_project_11.ipynb

[NbConvertApp] Converting notebook DA_portfolio_project_11.ipynb to html
[NbConvertApp] Writing 4384366 bytes to DA_portfolio_project_11.html
