# Final Deliverables Amazon Prime vs Netflix

In [1]:
import pandas as pd
import numpy as np
import plotly.express as px
from collections import Counter
from plotly.subplots import make_subplots
import plotly.graph_objs as go

## Netflix

In [2]:
from google.colab import files
uploaded = files.upload()

Saving netflix_titles.csv to netflix_titles.csv


In [4]:
df = pd.read_csv('netflix_titles.csv')

In [None]:
df.head()

Unnamed: 0,show_id,type,title,director,cast,country,date_added,release_year,rating,duration,listed_in,description
0,s1,Movie,Dick Johnson Is Dead,Kirsten Johnson,,United States,"September 25, 2021",2020,PG-13,90 min,Documentaries,"As her father nears the end of his life, filmm..."
1,s2,TV Show,Blood & Water,,"Ama Qamata, Khosi Ngema, Gail Mabalane, Thaban...",South Africa,"September 24, 2021",2021,TV-MA,2 Seasons,"International TV Shows, TV Dramas, TV Mysteries","After crossing paths at a party, a Cape Town t..."
2,s3,TV Show,Ganglands,Julien Leclercq,"Sami Bouajila, Tracy Gotoas, Samuel Jouy, Nabi...",,"September 24, 2021",2021,TV-MA,1 Season,"Crime TV Shows, International TV Shows, TV Act...",To protect his family from a powerful drug lor...
3,s4,TV Show,Jailbirds New Orleans,,,,"September 24, 2021",2021,TV-MA,1 Season,"Docuseries, Reality TV","Feuds, flirtations and toilet talk go down amo..."
4,s5,TV Show,Kota Factory,,"Mayur More, Jitendra Kumar, Ranjan Raj, Alam K...",India,"September 24, 2021",2021,TV-MA,2 Seasons,"International TV Shows, Romantic TV Shows, TV ...",In a city of coaching centers known to train I...


In [7]:
df.isna().sum()/df.shape[0]

show_id         0.000000
type            0.000000
title           0.000000
director        0.299080
cast            0.093675
country         0.094357
date_added      0.001135
release_year    0.000000
rating          0.000454
duration        0.000341
listed_in       0.000000
description     0.000000
dtype: float64

**Dropping unnecessary columns**

In [8]:
df.drop(['description','show_id'],axis = 1,inplace = True)

In [9]:
df.dropna(inplace = True) #Dropping NAN rows

In [10]:
df.isna().sum()/df.shape[0] #Cleaned

type            0.0
title           0.0
director        0.0
cast            0.0
country         0.0
date_added      0.0
release_year    0.0
rating          0.0
duration        0.0
listed_in       0.0
dtype: float64

**Top Directors and Countries**

In [11]:
directors = Counter(df.director)

In [12]:
topdirectors = directors.most_common(10)

In [13]:
td = px.bar(x = [i[0] for i in topdirectors],y = [i[1] for i in topdirectors],color=[i[1] for i in topdirectors])
td.update_layout(title = 'TOP DIRECTORS',xaxis_title = 'Directors',yaxis_title = 'No. of Shows/Movies')


In [14]:
countries = Counter(df.country)

In [15]:
topcountries = countries.most_common(10)

In [16]:
tc = px.bar(x = [i[0] for i in topcountries],y = [i[1] for i in topcountries],color=[i[1] for i in topcountries])
tc.update_layout(title = 'TOP COUNTRIES',xaxis_title = 'COUNTRIES',yaxis_title = 'Count')


**Top Cast**

In [17]:
cast = Counter(df.cast)

In [18]:
topcasts = cast.most_common(10)

In [19]:
tcast = px.bar(x = [i[0] for i in topcasts],y = [i[1] for i in topcasts],color=[i[1] for i in topcasts])
tcast.update_layout(title = 'TOP CASTS',xaxis_title = 'Name of Cast',yaxis_title = 'Count')


**Top Years**

In [20]:
ryear = Counter(df.release_year)

In [21]:
topry = ryear.most_common(10)

In [22]:
tyear = px.bar(x = [i[0] for i in topry],y = [i[1] for i in topry],color=[i[1] for i in topry],width = 1000)
tyear.update_layout(title = 'TOP YEARS',xaxis_title = 'Years',yaxis_title = 'Count')


**Top Movies Duration**

In [23]:
movies = df.type == 'Movie'

In [24]:
md = Counter(df[movies].duration)

In [25]:
topmd = md.most_common(10)

In [26]:
mdt = px.bar(x = [i[0] for i in topmd],y = [i[1] for i in topmd],color=[i[1] for i in topmd],width = 1000)
mdt.update_layout(title = 'TOP MOVIES DURATION',xaxis_title = 'Duration',yaxis_title = 'Count')


**Top Shows Duration**

In [27]:
shows = df.type == 'TV Show'

In [28]:
sd = Counter(df[shows].duration)

In [29]:
topsd = sd.most_common(10)

In [30]:
sdt = px.bar(x = [i[0] for i in topsd],y = [i[1] for i in topsd],color=[i[1] for i in topsd],width = 1000)
sdt.update_layout(title = 'TOP SHOWS DURATION',xaxis_title = 'Duration',yaxis_title = 'Count')


**TOP Genres**

In [31]:
tl = Counter(df.listed_in)

In [32]:
topl = tl.most_common(10)

In [33]:
toplisted = px.bar(x = [i[0] for i in topl],y = [i[1] for i in topl],color=[i[1] for i in topl],width = 1000)
toplisted.update_layout(title = 'TOP Genres',xaxis_title = 'Genres',yaxis_title = 'Count')


**Top Ratings**

In [34]:
tr = Counter(df.rating)

In [35]:
topr = tr.most_common(10)

In [36]:
toprating = px.bar(x = [i[0] for i in topr],y = [i[1] for i in topr],color=[i[1] for i in topr],width = 1000)
toprating.update_layout(title = 'TOP Ratings',xaxis_title = 'Rating Name',yaxis_title = 'Count')


## Amazon Prime

In [3]:
from google.colab import files
uploaded = files.upload()

Saving amazon_prime_titles.csv to amazon_prime_titles.csv


In [5]:
df1 = pd.read_csv('amazon_prime_titles.csv')

In [6]:
df1.head()

Unnamed: 0,show_id,type,title,director,cast,country,date_added,release_year,rating,duration,listed_in,description
0,s1,Movie,The Grand Seduction,Don McKellar,"Brendan Gleeson, Taylor Kitsch, Gordon Pinsent",Canada,"March 30, 2021",2014,,113 min,"Comedy, Drama",A small fishing village must procure a local d...
1,s2,Movie,Take Care Good Night,Girish Joshi,"Mahesh Manjrekar, Abhay Mahajan, Sachin Khedekar",India,"March 30, 2021",2018,13+,110 min,"Drama, International",A Metro Family decides to fight a Cyber Crimin...
2,s3,Movie,Secrets of Deception,Josh Webber,"Tom Sizemore, Lorenzo Lamas, Robert LaSardo, R...",United States,"March 30, 2021",2017,,74 min,"Action, Drama, Suspense",After a man discovers his wife is cheating on ...
3,s4,Movie,Pink: Staying True,Sonia Anderson,"Interviews with: Pink, Adele, Beyoncé, Britney...",United States,"March 30, 2021",2014,,69 min,Documentary,"Pink breaks the mold once again, bringing her ..."
4,s5,Movie,Monster Maker,Giles Foster,"Harry Dean Stanton, Kieran O'Brien, George Cos...",United Kingdom,"March 30, 2021",1989,,45 min,"Drama, Fantasy",Teenage Matt Banting wants to work with a famo...


**Top Directors and Countries**

In [37]:
adirectors = Counter(df1.director)

In [38]:
atopdirectors = adirectors.most_common(10)

In [39]:
atd = px.bar(x = [i[0] for i in atopdirectors],y = [i[1] for i in atopdirectors],range_y = [0,300],color=[i[1] for i in atopdirectors])
atd.update_layout(title = 'TOP DIRECTORS',xaxis_title = 'Directors',yaxis_title = 'No. of Shows/Movies')


In [40]:
acountry = Counter(df1.country)

In [41]:
atopcountry = acountry.most_common(10)

In [42]:
atco = px.bar(x = [i[0] for i in atopcountry],y = [i[1] for i in atopcountry],range_y = [0,400])
atco.update_layout(title = 'TOP COUNTRY',xaxis_title = 'Country Name',yaxis_title = 'No. of Movies/Shows')


**Top Cast**

In [43]:
acast = Counter(df1.cast)

In [44]:
atopcast = acast.most_common(10)

In [45]:
atc = px.bar(x = [i[0] for i in atopcast],y = [i[1] for i in atopcast],color=[i[1] for i in atopcast],range_y = [0,200])
atc.update_layout(title = 'TOP CAST',xaxis_title = 'Name of Cast',yaxis_title = 'No. of Movies/Shows')


**Top Year**

In [46]:
ayear = Counter(df1.release_year)
toprya = ayear.most_common(8)
tyeara = px.bar(x = [i[0] for i in toprya],y = [i[1] for i in toprya],color=[i[1] for i in toprya],width = 1000)
tyeara.update_layout(title = 'TOP YEARS',xaxis_title = 'Years',yaxis_title = 'Count')


**Top Movies Duration**

In [47]:
amovies = df1.type == 'Movie'

In [48]:
amovies = Counter(df1[amovies].duration)
topmovies = amovies.most_common(8)
tmovies = px.bar(x = [i[0] for i in topmovies],y = [i[1] for i in topmovies],color=[i[1] for i in topmovies],width = 1000)
tmovies.update_layout(title = 'TOP Movie Duration',xaxis_title = 'Duration(Min)',yaxis_title = 'Count')


**Top Shows Duration**

In [49]:
ashows = df1.type == 'TV Show'

In [50]:
ashows = Counter(df1[ashows].duration)
atopshows = ashows.most_common(8)
atshows = px.bar(x = [i[0] for i in atopshows],y = [i[1] for i in atopshows],color=[i[1] for i in atopshows],width = 1000)
atshows.update_layout(title = 'TOP Shows Duration',xaxis_title = 'Duration',yaxis_title = 'Count')


**Top Genres**

In [51]:
atl = Counter(df1.listed_in)
atopl = atl.most_common(10)
atoplisted = px.bar(x = [i[0] for i in atopl],y = [i[1] for i in atopl],color=[i[1] for i in atopl],width = 1000)
atoplisted.update_layout(title = 'TOP Genres',xaxis_title = 'Genres',yaxis_title = 'Count')


**Top Ratings**

In [52]:
atr = Counter(df1.rating)
atopr = atr.most_common(10)
atoprating = px.bar(x = [i[0] for i in atopr],y = [i[1] for i in atopr],color=[i[1] for i in atopr],width = 1000)
atoprating.update_layout(title = 'TOP Ratings',xaxis_title = 'Rating Name',yaxis_title = 'Count')


## Comparing Netflix & Amazon Prime

**Top Directors**

In [53]:
fig = make_subplots(rows=1, cols=1)
fig.add_trace(
    go.Bar(x = [i[0] for i in topdirectors],y = [i[1] for i in topdirectors],name = 'Netflix',width=.5))
fig.add_trace(
    go.Bar(x = [i[0] for i in atopdirectors],y = [i[1] for i in atopdirectors],name = 'Amazon',width = .5))
fig.update_layout(height=600, width=800, title_text="Top Directors Comparision")

**Top Countries**

In [54]:
fig = make_subplots(rows=1, cols=1)
fig.add_trace(
    go.Bar(x = [i[0] for i in topcountries],y = [i[1] for i in topcountries],name = 'Netflix',width=.5))
fig.add_trace(
    go.Bar(x = [i[0] for i in atopcountry],y = [i[1] for i in atopcountry],name = 'Amazon',width = .5))
fig.update_layout(height=600, width=800, title_text="Top Countries Comparision")

**Top Cast Comparision**

In [55]:
fig = make_subplots(rows=1, cols=1)
fig.add_trace(
    go.Bar(x = [i[0] for i in topcasts],y = [i[1] for i in topcasts],name = 'Netflix',width=.5))
fig.add_trace(
    go.Bar(x = [i[0] for i in atopcast],y = [i[1] for i in atopcast],name = 'Amazon',width = .5))
fig.update_layout(height=600, width=800, title_text="Top Cast Comparision")

**Top Years**

In [56]:
fig = make_subplots(rows=1, cols=1)
fig.add_trace(
    go.Bar(x = [i[0] for i in topry],y = [i[1] for i in topry],name = 'Netflix',width=.5))
fig.add_trace(
    go.Bar(x = [i[0] for i in toprya],y = [i[1] for i in toprya],name = 'Amazon',width = .5))
fig.update_layout(height=600, width=800, title_text="Top Years Comparision")

**Top Movie Duration**

In [57]:
fig = make_subplots(rows=1, cols=1)
fig.add_trace(
    go.Bar(x = [i[0] for i in topmd],y = [i[1] for i in topmd],name = 'Netflix',width=.5))
fig.add_trace(
    go.Bar(x = [i[0] for i in topmovies],y = [i[1] for i in topmovies],name = 'Amazon',width = .5))
fig.update_layout(height=600, width=800, title_text="Top Movie Duration Comparision")

**Top Show Duration**

In [58]:
fig = make_subplots(rows=1, cols=1)
fig.add_trace(
    go.Bar(x = [i[0] for i in topsd],y = [i[1] for i in topsd],name = 'Netflix',width=.5))
fig.add_trace(
    go.Bar(x = [i[0] for i in atopshows],y = [i[1] for i in atopshows],name = 'Amazon',width = .5))
fig.update_layout(height=600, width=800, title_text="Top Show Duration Comparision")

**Top Listed-In**

In [59]:
fig = make_subplots(rows=1, cols=1)
fig.add_trace(
    go.Bar(x = [i[0] for i in topl],y = [i[1] for i in topl],name = 'Netflix',width=.5))
fig.add_trace(
    go.Bar(x = [i[0] for i in atopl],y = [i[1] for i in atopl],name = 'Amazon',width = .5))
fig.update_layout(height=600, width=800, title_text="Top Listed-In Comparision")

**Top Ratings**

In [60]:
fig = make_subplots(rows=1, cols=1)
fig.add_trace(
    go.Bar(x = [i[0] for i in topr],y = [i[1] for i in topr],name = 'Netflix',width=.5))
fig.add_trace(
    go.Bar(x = [i[0] for i in atopr],y = [i[1] for i in atopr],name = 'Amazon',width = .5))
fig.update_layout(height=600, width=800, title_text="Top Rating Comparision")