In [None]:
import pandas as pd
import numpy as np
import plotly.express as px
from collections import Counter
from plotly.subplots import make_subplots
import plotly.graph_objs as go

## Netflix

In [None]:
df = pd.read_csv('../input/netflix-shows/netflix_titles.csv')

In [None]:
df.head()

In [None]:
df.isna().sum()/df.shape[0]

**Dropping unnecessary columns**

In [None]:
df.drop(['description','show_id'],axis = 1,inplace = True)

In [None]:
df.dropna(inplace = True) #Dropping NAN rows

In [None]:
df.isna().sum()/df.shape[0] #Cleaned

## EDA

**Top Directors and Countries**

In [None]:
directors = Counter(df.director)

In [None]:
topdirectors = directors.most_common(10)

In [None]:
td = px.bar(x = [i[0] for i in topdirectors],y = [i[1] for i in topdirectors],color=[i[1] for i in topdirectors])
td.update_layout(title = 'TOP DIRECTORS',xaxis_title = 'Directors',yaxis_title = 'No. of Shows/Movies')


In [None]:
countries = Counter(df.country)

In [None]:
topcountries = countries.most_common(10)

In [None]:
tc = px.bar(x = [i[0] for i in topcountries],y = [i[1] for i in topcountries],color=[i[1] for i in topcountries])
tc.update_layout(title = 'TOP COUNTRIES',xaxis_title = 'COUNTRIES',yaxis_title = 'Count')


**Top Cast**

In [None]:
cast = Counter(df.cast)

In [None]:
topcasts = cast.most_common(10)

In [None]:
tcast = px.bar(x = [i[0] for i in topcasts],y = [i[1] for i in topcasts],color=[i[1] for i in topcasts])
tcast.update_layout(title = 'TOP CASTS',xaxis_title = 'Name of Cast',yaxis_title = 'Count')


**Top Years**

In [None]:
ryear = Counter(df.release_year)

In [None]:
topry = ryear.most_common(10)

In [None]:
tyear = px.bar(x = [i[0] for i in topry],y = [i[1] for i in topry],color=[i[1] for i in topry],width = 1000)
tyear.update_layout(title = 'TOP YEARS',xaxis_title = 'Years',yaxis_title = 'Count')


**Top Movies Duration**

In [None]:
movies = df.type == 'Movie'

In [None]:
md = Counter(df[movies].duration)

In [None]:
topmd = md.most_common(10)

In [None]:
mdt = px.bar(x = [i[0] for i in topmd],y = [i[1] for i in topmd],color=[i[1] for i in topmd],width = 1000)
mdt.update_layout(title = 'TOP MOVIES DURATION',xaxis_title = 'Duration',yaxis_title = 'Count')


**Top Shows Duration**

In [None]:
shows = df.type == 'TV Show'

In [None]:
sd = Counter(df[shows].duration)

In [None]:
topsd = sd.most_common(10)

In [None]:
sdt = px.bar(x = [i[0] for i in topsd],y = [i[1] for i in topsd],color=[i[1] for i in topsd],width = 1000)
sdt.update_layout(title = 'TOP SHOWS DURATION',xaxis_title = 'Duration',yaxis_title = 'Count')


**TOP Genres**

In [None]:
tl = Counter(df.listed_in)

In [None]:
topl = tl.most_common(10)

In [None]:
toplisted = px.bar(x = [i[0] for i in topl],y = [i[1] for i in topl],color=[i[1] for i in topl],width = 1000)
toplisted.update_layout(title = 'TOP Genres',xaxis_title = 'Genres',yaxis_title = 'Count')


**Top Ratings**

In [None]:
tr = Counter(df.rating)

In [None]:
topr = tr.most_common(10)

In [None]:
toprating = px.bar(x = [i[0] for i in topr],y = [i[1] for i in topr],color=[i[1] for i in topr],width = 1000)
toprating.update_layout(title = 'TOP Ratings',xaxis_title = 'Rating Name',yaxis_title = 'Count')


## Amazon Prime

In [None]:
df1 = pd.read_csv('../input/amazon-prime-movies-and-tv-shows/amazon_prime_titles.csv')

In [None]:
df1.head()

**Top Directors and Countries**

In [None]:
adirectors = Counter(df1.director)

In [None]:
atopdirectors = adirectors.most_common(10)

In [None]:
atd = px.bar(x = [i[0] for i in atopdirectors],y = [i[1] for i in atopdirectors],range_y = [0,300],color=[i[1] for i in atopdirectors])
atd.update_layout(title = 'TOP DIRECTORS',xaxis_title = 'Directors',yaxis_title = 'No. of Shows/Movies')


In [None]:
acountry = Counter(df1.country)

In [None]:
atopcountry = acountry.most_common(10)

In [None]:
atco = px.bar(x = [i[0] for i in atopcountry],y = [i[1] for i in atopcountry],range_y = [0,400])
atco.update_layout(title = 'TOP COUNTRY',xaxis_title = 'Country Name',yaxis_title = 'No. of Movies/Shows')


**Top Cast**

In [None]:
acast = Counter(df1.cast)

In [None]:
atopcast = acast.most_common(10)

In [None]:
atc = px.bar(x = [i[0] for i in atopcast],y = [i[1] for i in atopcast],color=[i[1] for i in atopcast],range_y = [0,200])
atc.update_layout(title = 'TOP CAST',xaxis_title = 'Name of Cast',yaxis_title = 'No. of Movies/Shows')


**Top Year**

In [None]:
ayear = Counter(df1.release_year)
toprya = ayear.most_common(8)
tyeara = px.bar(x = [i[0] for i in toprya],y = [i[1] for i in toprya],color=[i[1] for i in toprya],width = 1000)
tyeara.update_layout(title = 'TOP YEARS',xaxis_title = 'Years',yaxis_title = 'Count')


**Top Movies Duration**

In [None]:
amovies = df1.type == 'Movie'

In [None]:
amovies = Counter(df1[amovies].duration)
topmovies = amovies.most_common(8)
tmovies = px.bar(x = [i[0] for i in topmovies],y = [i[1] for i in topmovies],color=[i[1] for i in topmovies],width = 1000)
tmovies.update_layout(title = 'TOP Movie Duration',xaxis_title = 'Duration(Min)',yaxis_title = 'Count')


**Top Shows Duration**

In [None]:
ashows = df1.type == 'TV Show'

In [None]:
ashows = Counter(df1[ashows].duration)
atopshows = ashows.most_common(8)
atshows = px.bar(x = [i[0] for i in atopshows],y = [i[1] for i in atopshows],color=[i[1] for i in atopshows],width = 1000)
atshows.update_layout(title = 'TOP Shows Duration',xaxis_title = 'Duration',yaxis_title = 'Count')


**Top Genres**

In [None]:
atl = Counter(df1.listed_in)
atopl = atl.most_common(10)
atoplisted = px.bar(x = [i[0] for i in atopl],y = [i[1] for i in atopl],color=[i[1] for i in atopl],width = 1000)
atoplisted.update_layout(title = 'TOP Genres',xaxis_title = 'Genres',yaxis_title = 'Count')


**Top Ratings**

In [None]:
atr = Counter(df1.rating)
atopr = atr.most_common(10)
atoprating = px.bar(x = [i[0] for i in atopr],y = [i[1] for i in atopr],color=[i[1] for i in atopr],width = 1000)
atoprating.update_layout(title = 'TOP Ratings',xaxis_title = 'Rating Name',yaxis_title = 'Count')


## Comparing Netflix & Amazon Prime

**Top Directors**

In [None]:
fig = make_subplots(rows=1, cols=1)
fig.add_trace(
    go.Bar(x = [i[0] for i in topdirectors],y = [i[1] for i in topdirectors],name = 'Netflix',width=.5))
fig.add_trace(
    go.Bar(x = [i[0] for i in atopdirectors],y = [i[1] for i in atopdirectors],name = 'Amazon',width = .5))
fig.update_layout(height=600, width=800, title_text="Top Directors Comparision")

**Top Countries**

In [None]:
fig = make_subplots(rows=1, cols=1)
fig.add_trace(
    go.Bar(x = [i[0] for i in topcountries],y = [i[1] for i in topcountries],name = 'Netflix',width=.5))
fig.add_trace(
    go.Bar(x = [i[0] for i in atopcountry],y = [i[1] for i in atopcountry],name = 'Amazon',width = .5))
fig.update_layout(height=600, width=800, title_text="Top Countries Comparision")

**Top Cast Comparision**

In [None]:
fig = make_subplots(rows=1, cols=1)
fig.add_trace(
    go.Bar(x = [i[0] for i in topcasts],y = [i[1] for i in topcasts],name = 'Netflix',width=.5))
fig.add_trace(
    go.Bar(x = [i[0] for i in atopcast],y = [i[1] for i in atopcast],name = 'Amazon',width = .5))
fig.update_layout(height=600, width=800, title_text="Top Cast Comparision")

**Top Years**

In [None]:
fig = make_subplots(rows=1, cols=1)
fig.add_trace(
    go.Bar(x = [i[0] for i in topry],y = [i[1] for i in topry],name = 'Netflix',width=.5))
fig.add_trace(
    go.Bar(x = [i[0] for i in toprya],y = [i[1] for i in toprya],name = 'Amazon',width = .5))
fig.update_layout(height=600, width=800, title_text="Top Years Comparision")

**Top Movie Duration**

In [None]:
fig = make_subplots(rows=1, cols=1)
fig.add_trace(
    go.Bar(x = [i[0] for i in topmd],y = [i[1] for i in topmd],name = 'Netflix',width=.5))
fig.add_trace(
    go.Bar(x = [i[0] for i in topmovies],y = [i[1] for i in topmovies],name = 'Amazon',width = .5))
fig.update_layout(height=600, width=800, title_text="Top Movie Duration Comparision")

**Top Show Duration**

In [None]:
fig = make_subplots(rows=1, cols=1)
fig.add_trace(
    go.Bar(x = [i[0] for i in topsd],y = [i[1] for i in topsd],name = 'Netflix',width=.5))
fig.add_trace(
    go.Bar(x = [i[0] for i in atopshows],y = [i[1] for i in atopshows],name = 'Amazon',width = .5))
fig.update_layout(height=600, width=800, title_text="Top Show Duration Comparision")

**Top Listed-In**

In [None]:
fig = make_subplots(rows=1, cols=1)
fig.add_trace(
    go.Bar(x = [i[0] for i in topl],y = [i[1] for i in topl],name = 'Netflix',width=.5))
fig.add_trace(
    go.Bar(x = [i[0] for i in atopl],y = [i[1] for i in atopl],name = 'Amazon',width = .5))
fig.update_layout(height=600, width=800, title_text="Top Listed-In Comparision")

**Top Ratings**

In [None]:
fig = make_subplots(rows=1, cols=1)
fig.add_trace(
    go.Bar(x = [i[0] for i in topr],y = [i[1] for i in topr],name = 'Netflix',width=.5))
fig.add_trace(
    go.Bar(x = [i[0] for i in atopr],y = [i[1] for i in atopr],name = 'Amazon',width = .5))
fig.update_layout(height=600, width=800, title_text="Top Rating Comparision")

## Hope this notebook have helped you in getting meaningful Insights, If yes, please upvote! Thank you!
