In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
import matplotlib.image as mpimg
import math
import plotly.express as px
import plotly.graph_objects as go

from matplotlib.offsetbox import (TextArea, DrawingArea, OffsetImage,
                                  AnnotationBbox)
from plotly.colors import n_colors
from plotly.subplots import make_subplots
from IPython.display import Image
from colorama import Fore, Back, Style
y_ = Fore.YELLOW
r_ = Fore.RED
g_ = Fore.GREEN
b_ = Fore.BLUE
m_ = Fore.MAGENTA
sr_ = Style.RESET_ALL

In [None]:
custom_colors = ["#ff6b6b","#95d5b2","#a2d2ff","#72efdd"]
customPalette = sns.set_palette(sns.color_palette(custom_colors))

In [None]:
sns.palplot(sns.color_palette(custom_colors),size=1)

In [None]:
netflix_p = sns.light_palette(custom_colors[0], reverse=True)
sns.palplot(sns.color_palette(netflix_p),size=1)

In [None]:
hulu_p = sns.light_palette(custom_colors[1], reverse=True)
sns.palplot(sns.color_palette(hulu_p),size=1)

In [None]:

prime_p = sns.light_palette(custom_colors[2], reverse=True)
sns.palplot(sns.color_palette(prime_p),size=1)

In [None]:
disney_p = sns.dark_palette(custom_colors[3], reverse=True)
sns.palplot(sns.color_palette(disney_p),size=1)

In [None]:
df_tv = pd.read_csv('tv_shows.csv')
df_movies = pd.read_csv('netflix_titles.csv')

In [None]:
df_tv = df_tv.drop(['Unnamed: 0'], axis = 1) 
df_tv.head(5)

In [None]:
df_movies = df_movies.drop(['Unnamed: 0','ID'], axis = 1) 
df_movies.head(5)

In [None]:
len(df_movies['Directors'].unique())

In [None]:
len(df_movies['Genres'].unique())

In [None]:
def splitting(dataframe,col):
    result = dataframe[col].str.get_dummies(',')
    print('Done!')
    return result

In [None]:
m_genres = splitting(df_movies,'Genres')
m_lang = splitting(df_movies,'Language')

In [None]:
def val_sum(df,c):
    return df[c].sum(axis=0)

In [None]:
val_counts = []
dfs = [df_movies,df_tv]
cols = ['Netflix','Hulu','Prime Video','Disney+']

for x in dfs:
    for y in cols:
        val_counts.append(val_sum(x,y))

In [None]:
val_counts

In [None]:
def donut(i,df,sizes,title):
    plt.subplot(i)
    plt.pie(sizes, explode=explode, labels=labels, colors=colors,
                autopct='%1.1f%%', shadow=True)

    centre_circle = plt.Circle((0,0),0.5,color='black', fc='white',linewidth=1.25)
    fig = plt.gcf()
    fig.gca().add_artist(centre_circle)
    plt.title(title)
    plt.axis('equal')

In [None]:
fig = plt.subplots(figsize=(16, 8))
labels = 'Netflix', 'Hulu','Prime','Disney+'
sizes1 = [val_counts[0], val_counts[1],val_counts[2],val_counts[3]]
sizes2 = [val_counts[4], val_counts[5],val_counts[6],val_counts[7]]
colors = custom_colors
explode = (0, 0, 0, 0) 

donut(121,df_movies,sizes1,'Movies')
donut(122,df_tv,sizes2,'TV shows')
plt.show()

In [None]:
fig = make_subplots(rows=1, cols=2)

fig.add_trace(go.Indicator(
    mode = "number",
    value = 4113,
    number={'font':{'color': custom_colors[2],'size':100}},
    title = {"text": "Movies<br><span style='font-size:0.8em;color:gray'>On Prime Video</span>"},
    domain = {'x': [0, 0.5], 'y': [0.6, 1]}))

fig.add_trace(go.Indicator(
    mode = "number",
    value = 1971,
    number={'font':{'color': custom_colors[2],'size':100}},
    title = {"text": "TV Shows<br><span style='font-size:0.8em;color:gray'>On Netflix</span>"},
    domain = {'x': [0.5, 1], 'y': [0, 0.4]}))

fig.show()

In [None]:
df_movies['IMDb'] = df_movies['IMDb'].str.split('/').str[0]
df_movies['Rotten Tomatoes'] = df_movies['Rotten Tomatoes'].str.split('/').str[0]
df_tv['IMDb'] = df_tv['IMDb'].str.split('/').str[0]
df_tv['Rotten Tomatoes'] = df_tv['Rotten Tomatoes'].str.split('/').str[0]

In [None]:
def sunburst(dataframe,platform,c):
    dataframe=dataframe.loc[dataframe[platform] == 1]
    dataframe=dataframe.sort_values(by='IMDb', ascending=False)
    rating = dataframe[0:10]
    fig =px.sunburst(
    rating,
    path=['Title','Genres'],
    values='IMDb',
    color='IMDb',
    color_continuous_scale=c)
    fig.show()

In [None]:
sunburst(df_movies,'Netflix','amp')

In [None]:
sunburst(df_movies,'Hulu','Blugrn')

In [None]:
sunburst(df_movies,'Prime Video','haline')

In [None]:
sunburst(df_movies,'Disney+','dense')

In [None]:
def kde(i,dataframe,platform,c):
    plt.subplot(i)
    dataframe=dataframe.loc[dataframe[platform] == 1]
    sns.kdeplot(data=dataframe['Runtime'], color=custom_colors[c],shade=True)
    plt.xlabel('Runtime in minutes', fontsize = 15)
    plt.legend(fontsize = 15);
    plt.subplot(i+1)
    sns.kdeplot(data=dataframe['Year'], color=custom_colors[c],shade=True)
    plt.xlabel('Release Year', fontsize = 15)
    plt.legend(fontsize = 15);

In [None]:
plt.figure(figsize = (16, 8))

kde(421,df_movies,'Netflix',0)
kde(423,df_movies,'Hulu',1)
kde(425,df_movies,'Prime Video',2)
kde(427,df_movies,'Disney+',3)

In [None]:
post_1980 = df_movies[df_movies.Year >= 1980]
year_movies = post_1980.groupby('Year')[['Netflix','Hulu','Prime Video','Disney+']].sum()

plt.figure(figsize = (16, 8))
sns.lineplot(x=year_movies.index,y=year_movies['Netflix'])
sns.lineplot(x=year_movies.index,y=year_movies['Hulu'])
sns.lineplot(x=year_movies.index,y=year_movies['Prime Video'])
sns.lineplot(x=year_movies.index,y=year_movies['Disney+'])
plt.xlabel('Release Year', fontsize = 15)
plt.ylabel('Count', fontsize = 15)
plt.show()

In [None]:
df_t = df_tv.copy()
df_t = df_t[df_t['Age'].notna()]

In [None]:
df_t['Age']=df_t['Age'].str.replace('+','')
df_t['Age']=df_t['Age'].str.replace('all','0')
df_t['Age']=df_t['Age'].astype(str).astype(int)

In [None]:
def barplot(i,dataframe,platform,p):
    plt.subplot(i)
    dataframe=dataframe.loc[dataframe[platform] == 1]
    dataframe = dataframe['Age'].value_counts().reset_index()
    dataframe.columns=['age','count']
    sns.barplot(x="age", y="count", data=dataframe,palette=p)
    plt.xlabel('Age', fontsize = 15)
    plt.ylabel(platform, fontsize = 15)

In [None]:
plt.figure(figsize = (16, 8))
tv_age = df_t.groupby('Age')[['Netflix','Hulu','Prime Video','Disney+']].sum()
sns.heatmap(tv_age,cmap="YlGnBu")
plt.show()

In [None]:
r = df_tv.sort_values(by='IMDb', ascending=False)
r = r[0:20]
r = r[['Title','IMDb','Netflix','Hulu','Prime Video','Disney+']]
r['IMDb'] = r['IMDb'].astype(float)

In [None]:
r.style.bar(subset=["Netflix"], color='#ff6b6b')\
.bar(subset=["Hulu"], color='#95d5b2')\
.bar(subset=["Prime Video"], color='#a2d2ff')\
.bar(subset=["Disney+"], color='#72efdd')\
.bar(subset=["IMDb"], color='#').background_gradient(cmap='Purples')

In [None]:
r1 = df_movies.sort_values(by='IMDb', ascending=False)
r1 = r1[0:10]
r1['Movies']='Movies'
fig = px.treemap(r1, path=['Movies','Title', 'Genres','Language'], values='IMDb',color='IMDb',color_continuous_scale='Purp')
fig.show()

In [None]:
df_m = df_movies.copy()
df_m = df_m.dropna()

df_m['Rotten Tomatoes']=df_m['Rotten Tomatoes'].str.replace('%','')
df_m['Rotten Tomatoes']=df_m['Rotten Tomatoes'].astype(str).astype(int)
df_m['Directors']=df_m['Directors'].astype('str')

In [None]:
df_m=df_m.sort_values(by='Rotten Tomatoes', ascending=False)
rating = df_m[0:20]
sns.catplot(x="Rotten Tomatoes", y="Directors",data=rating, palette=netflix_p,height=7,kind="point");

In [None]:
def dist(i,dataframe,platform,c):
    plt.subplot(i)
    dataframe=dataframe.loc[dataframe[platform] == 1]
    sns.distplot(dataframe['IMDb'], color=custom_colors[c],vertical=True)
    
    plt.ylabel('IMDb rating, '+platform, fontsize = 15)
    plt.subplot(i+1)
    sns.distplot(dataframe['Rotten Tomatoes'], color=custom_colors[c],vertical=True)
    plt.ylabel('Rotten Tomatoes, '+platform, fontsize = 15)

In [None]:
plt.figure(figsize = (20, 20))

dist(421,df_m,'Netflix',0)
dist(423,df_m,'Hulu',1)
dist(425,df_m,'Prime Video',2)
dist(427,df_m,'Disney+',3)

In [None]:
df_m['Rotten Tomatoes'] = df_m['Rotten Tomatoes'].astype(float)
df_m['IMDb'] = df_m['IMDb'].astype(float)

plt.figure(figsize = (16, 8))
sns.regplot(x = df_m['Rotten Tomatoes'],y = df_m['IMDb'],x_bins=10,marker="+",color="#FDE74C")
plt.show()

In [None]:
df_l_merged = pd.concat([df_movies, m_lang], axis = 1, sort = False)
df_g_merged = pd.concat([df_movies, m_genres], axis = 1, sort = False)

In [None]:
def bar(dataframe,platform,c):
    dataframe=dataframe.loc[dataframe[platform] == 1]
    val_counts = dataframe.iloc[:,15:].sum(axis=0).sort_values(ascending=False)
    val_counts2 = pd.DataFrame(val_counts,columns=['Number of movies'])
    return val_counts2[0:20].style.bar(subset=["Number of movies",], color=c)

In [None]:
bar(df_l_merged,'Netflix','#ff6b6b')

In [None]:
bar(df_g_merged,'Netflix','#ff6b6b')

In [None]:
bar(df_l_merged,'Hulu','#95d5b2')

In [None]:
bar(df_g_merged,'Hulu','#95d5b2')

In [None]:
bar(df_l_merged,'Prime Video','#a2d2ff')

In [None]:
bar(df_g_merged,'Prime Video','#a2d2ff')

In [None]:
bar(df_l_merged,'Disney+','#72efdd')

In [None]:
bar(df_g_merged,'Disney+','#72efdd')