In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import plotly.express as px
from textblob import TextBlob, Word, Blobber
from textblob.classifiers import NaiveBayesClassifier
from textblob.taggers import NLTKTagger

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
df = pd.read_csv('/kaggle/input/netflix-shows/netflix_titles.csv')
df.head()

# Checking for attributes

In [None]:
df.shape

In [None]:
df.columns

# Taking the count of rating available


In [None]:
x = df.groupby(['rating']).size().reset_index(name='counts')
print(x)

# Data Cleaning

In [None]:
df['director']=df['director'].fillna('Director not specified')
df.head()

In [None]:
directors_list = pd.DataFrame()
print(directors_list)

In [None]:
directors_list = df['director'].str.split(',', expand=True).stack()
print(directors_list)

In [None]:
directors_list = directors_list.to_frame()
print(directors_list)

In [None]:
directors_list.columns = ['Directors']
print(directors_list)

In [None]:
directors = directors_list.groupby(['Directors']).size().reset_index(name='Total Count')
print(directors)

In [None]:
directors = directors[directors.Directors != 'Director not specified']
print(directors)

In [None]:
directors = directors.sort_values(by=['Total Count'], ascending=False)
print(directors)

# TOP 5 DIRECTORS

In [None]:
top5_Directors = directors.head()
print(top5_Directors)

# Visualization of TOP 5 Directors

In [None]:
top5_Directors = top5_Directors.sort_values(by=['Total Count'])
barChart = px.bar(top5_Directors, x='Total Count', y = 'Directors', title = 'Top 5 Directors on Netflix')

barChart.show(renderer='iframe')

# DISTRIBUTION OF CONTENT RATINGS ON NETFLIX

In [None]:
pieChart = px.pie(x, values='counts', names='rating', title='Distribution of content ratings on Netflix')

pieChart.show(renderer='iframe')

In [None]:
df['cast'] = df['cast'].fillna("No cast specified")
cast_df = pd.DataFrame()
cast_df = df['cast'].str.split(',', expand=True).stack()
cast_df = cast_df.to_frame()
cast_df.columns = ['Actor']
actors = cast_df.groupby(['Actor']).size().reset_index(name='Total Count')
actors = actors[actors.Actor != "No cast specified"]
actors = actors.sort_values(by=['Total Count'], ascending=False)
top5_Actors = actors.head()
top5_Actors = top5_Actors.sort_values(by=['Total Count'])
Actors_barChart = px.bar(top5_Actors, x='Total Count', y='Actor', title='Top 5 Actors on Netflix')
Actors_barChart.show(renderer='iframe')

# Analyzing the content produced on netflix based on years

In [None]:
df1 = df[['type', 'release_year']]
df1 = df.rename(columns = {"release_year":"Release Year", "type": "Type"})
df2 = df1.groupby(['Release Year', 'Type']).size().reset_index(name='Total Count')

In [None]:
print(df2)

In [None]:
df2 = df2[df2['Release Year']>=2000]

In [None]:
graph = px.line(df2, x = "Release Year", y = "Total Count", color = "Type", title = "Trend of Content Produced on Netflix Every Year")
graph.show(renderer='iframe')

# Sentiment Analysis of Netflix Content

In [None]:
df3 = df[['release_year', 'description']]
df3 = df3.rename(columns = {'release_year':'Release Year', 'description':'Description'})
for index, row in df3.iterrows():
    desc = row['Description']
    testimonial = TextBlob(desc)
    pola = testimonial.sentiment.polarity
    if pola == 0:
        senti = "Neutral"
    elif pola > 0:
        senti = "Positive"
    else:
        senti = "Negative"
    df3.loc[[index, 2], 'Sentiment'] = senti

df3 = df3.groupby(['Release Year','Sentiment']).size().reset_index(name = 'Total Count')
df3 = df3[df3['Release Year']>2005]

barGraph = px.bar(df3, x = 'Release Year', y = 'Total Count', color = 'Sentiment', title = 'Sentiment Analysis of Netflix Content')
barGraph.show(renderer='iframe')