## Explore and Preprocess Data

In [1]:
# import libraries
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
import seaborn as sns
import dash_daq as daq

import plotly.express as px

import plotly.io as pio
import plotly.graph_objs as go

from collections import Counter
from pycountry_convert import country_name_to_country_alpha3
import pycountry

In [2]:
df = pd.read_csv('disney_plus_titles.csv')

In [3]:
df.shape

(1450, 12)

In [4]:
df.head()

Unnamed: 0,show_id,type,title,director,cast,country,date_added,release_year,rating,duration,listed_in,description
0,s1,Movie,Duck the Halls: A Mickey Mouse Christmas Special,"Alonso Ramirez Ramos, Dave Wasson","Chris Diamantopoulos, Tony Anselmo, Tress MacN...",,"November 26, 2021",2016,TV-G,23 min,"Animation, Family",Join Mickey and the gang as they duck the halls!
1,s2,Movie,Ernest Saves Christmas,John Cherry,"Jim Varney, Noelle Parker, Douglas Seale",,"November 26, 2021",1988,PG,91 min,Comedy,Santa Claus passes his magic bag to a new St. ...
2,s3,Movie,Ice Age: A Mammoth Christmas,Karen Disher,"Raymond Albert Romano, John Leguizamo, Denis L...",United States,"November 26, 2021",2011,TV-G,23 min,"Animation, Comedy, Family",Sid the Sloth is on Santa's naughty list.
3,s4,Movie,The Queen Family Singalong,Hamish Hamilton,"Darren Criss, Adam Lambert, Derek Hough, Alexa...",,"November 26, 2021",2021,TV-PG,41 min,Musical,"This is real life, not just fantasy!"
4,s5,TV Show,The Beatles: Get Back,,"John Lennon, Paul McCartney, George Harrison, ...",,"November 25, 2021",2021,,1 Season,"Docuseries, Historical, Music",A three-part documentary from Peter Jackson ca...


In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1450 entries, 0 to 1449
Data columns (total 12 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   show_id       1450 non-null   object
 1   type          1450 non-null   object
 2   title         1450 non-null   object
 3   director      977 non-null    object
 4   cast          1260 non-null   object
 5   country       1231 non-null   object
 6   date_added    1447 non-null   object
 7   release_year  1450 non-null   int64 
 8   rating        1447 non-null   object
 9   duration      1450 non-null   object
 10  listed_in     1450 non-null   object
 11  description   1450 non-null   object
dtypes: int64(1), object(11)
memory usage: 136.1+ KB


In [6]:
df.drop(['director','date_added'],axis=1)

Unnamed: 0,show_id,type,title,cast,country,release_year,rating,duration,listed_in,description
0,s1,Movie,Duck the Halls: A Mickey Mouse Christmas Special,"Chris Diamantopoulos, Tony Anselmo, Tress MacN...",,2016,TV-G,23 min,"Animation, Family",Join Mickey and the gang as they duck the halls!
1,s2,Movie,Ernest Saves Christmas,"Jim Varney, Noelle Parker, Douglas Seale",,1988,PG,91 min,Comedy,Santa Claus passes his magic bag to a new St. ...
2,s3,Movie,Ice Age: A Mammoth Christmas,"Raymond Albert Romano, John Leguizamo, Denis L...",United States,2011,TV-G,23 min,"Animation, Comedy, Family",Sid the Sloth is on Santa's naughty list.
3,s4,Movie,The Queen Family Singalong,"Darren Criss, Adam Lambert, Derek Hough, Alexa...",,2021,TV-PG,41 min,Musical,"This is real life, not just fantasy!"
4,s5,TV Show,The Beatles: Get Back,"John Lennon, Paul McCartney, George Harrison, ...",,2021,,1 Season,"Docuseries, Historical, Music",A three-part documentary from Peter Jackson ca...
...,...,...,...,...,...,...,...,...,...,...
1445,s1446,Movie,X-Men Origins: Wolverine,"Hugh Jackman, Liev Schreiber, Danny Huston, wi...","United States, United Kingdom",2009,PG-13,108 min,"Action-Adventure, Family, Science Fiction",Wolverine unites with legendary X-Men to fight...
1446,s1447,Movie,Night at the Museum: Battle of the Smithsonian,"Ben Stiller, Amy Adams, Owen Wilson, Hank Azar...","United States, Canada",2009,PG,106 min,"Action-Adventure, Comedy, Family",Larry Daley returns to rescue some old friends...
1447,s1448,Movie,Eddie the Eagle,"Tom Costello, Jo Hartley, Keith Allen, Dickon ...","United Kingdom, Germany, United States",2016,PG-13,107 min,"Biographical, Comedy, Drama","True story of Eddie Edwards, a British ski-jum..."
1448,s1449,Movie,Bend It Like Beckham,"Parminder Nagra, Keira Knightley, Jonathan Rhy...","United Kingdom, Germany, United States",2003,PG-13,112 min,"Buddy, Comedy, Coming of Age",Despite the wishes of their traditional famili...


In [7]:
Types = df['type'].value_counts()
movies_count = df[df['type'] == 'Movie']['type'].count()
tv_shows_count = df[df['type'] == 'TV Show']['type'].count()

### Helper Functions

In [8]:
def split_data_by_space(x):
    try:
        x = x.split(' ')
        return x
    except:
        return np.nan

In [9]:
duration_list = []
for d in df['duration']:
    duration_list.append(int(split_data_by_space(d)[0]))
df['duration'] = duration_list
df

Unnamed: 0,show_id,type,title,director,cast,country,date_added,release_year,rating,duration,listed_in,description
0,s1,Movie,Duck the Halls: A Mickey Mouse Christmas Special,"Alonso Ramirez Ramos, Dave Wasson","Chris Diamantopoulos, Tony Anselmo, Tress MacN...",,"November 26, 2021",2016,TV-G,23,"Animation, Family",Join Mickey and the gang as they duck the halls!
1,s2,Movie,Ernest Saves Christmas,John Cherry,"Jim Varney, Noelle Parker, Douglas Seale",,"November 26, 2021",1988,PG,91,Comedy,Santa Claus passes his magic bag to a new St. ...
2,s3,Movie,Ice Age: A Mammoth Christmas,Karen Disher,"Raymond Albert Romano, John Leguizamo, Denis L...",United States,"November 26, 2021",2011,TV-G,23,"Animation, Comedy, Family",Sid the Sloth is on Santa's naughty list.
3,s4,Movie,The Queen Family Singalong,Hamish Hamilton,"Darren Criss, Adam Lambert, Derek Hough, Alexa...",,"November 26, 2021",2021,TV-PG,41,Musical,"This is real life, not just fantasy!"
4,s5,TV Show,The Beatles: Get Back,,"John Lennon, Paul McCartney, George Harrison, ...",,"November 25, 2021",2021,,1,"Docuseries, Historical, Music",A three-part documentary from Peter Jackson ca...
...,...,...,...,...,...,...,...,...,...,...,...,...
1445,s1446,Movie,X-Men Origins: Wolverine,Gavin Hood,"Hugh Jackman, Liev Schreiber, Danny Huston, wi...","United States, United Kingdom","June 4, 2021",2009,PG-13,108,"Action-Adventure, Family, Science Fiction",Wolverine unites with legendary X-Men to fight...
1446,s1447,Movie,Night at the Museum: Battle of the Smithsonian,Shawn Levy,"Ben Stiller, Amy Adams, Owen Wilson, Hank Azar...","United States, Canada","April 2, 2021",2009,PG,106,"Action-Adventure, Comedy, Family",Larry Daley returns to rescue some old friends...
1447,s1448,Movie,Eddie the Eagle,Dexter Fletcher,"Tom Costello, Jo Hartley, Keith Allen, Dickon ...","United Kingdom, Germany, United States","December 18, 2020",2016,PG-13,107,"Biographical, Comedy, Drama","True story of Eddie Edwards, a British ski-jum..."
1448,s1449,Movie,Bend It Like Beckham,Gurinder Chadha,"Parminder Nagra, Keira Knightley, Jonathan Rhy...","United Kingdom, Germany, United States","September 18, 2020",2003,PG-13,112,"Buddy, Comedy, Coming of Age",Despite the wishes of their traditional famili...


In [10]:
def split_data(x):
    try:
        x = x.split(',')
        return x
    except:
        return np.nan

In [11]:
def scatter_graph(dataframe,x,y,title):
    fig = px.scatter(dataframe, x=x, y=y, title=title)
    return fig
    

In [12]:
def bar_graph(dataframe,x,y,title, xtitles, ytitles):
    fig = px.bar(dataframe, x=x, y=y, title=title, barmode = 'group', labels = {x: xtitles, y: ytitles})
    return fig

In [13]:
def map_graph(dataframe):
    fig = px.choropleth(dataframe,color="count",locations='iso', title = 'Movies/TV Shows release in countries',hover_name='country',height=600)
    return fig

In [14]:
def celebrities_table(dataframe):
    table_header = [
    html.Thead(html.Tr([html.Th("Celebrity Name"), html.Th("Number of Movies")]))
    ]

    table_body = [html.Tbody([html.Tr([html.Td(row[1]), html.Td(row[2])])for index, row in dataframe.iterrows()])]

    table = dbc.Table(table_header + table_body, bordered=True)
    return table

In [15]:
df['country'].unique()

array([nan, 'United States', 'United States, Canada',
       'United States, Australia', 'Canada',
       'United States, United Kingdom', 'United States, South Korea',
       'Ireland, United States, Canada, United Kingdom, Denmark, Spain, Poland, Hungary',
       'France, United Kingdom', 'United Kingdom, Australia',
       'Ireland, United States', 'Canada, United States, France',
       'France, South Korea, Japan, United States', 'France',
       'United States, United Kingdom, Hungary', 'United States, Germany',
       'United States, United Kingdom, Australia', 'United States, India',
       'United States, Canada, United Kingdom, Singapore, Australia, Thailand',
       'Canada, United States',
       'South Korea, United States, China, Japan',
       'Australia, United Kingdom', 'United Kingdom',
       'United States, United Kingdom, South Korea',
       'United States, United Kingdom, Canada',
       'United States, Germany, United Kingdom',
       'United States, Canada, Ire

In [16]:
def year_country(df):
    countries=[]
    df2 = df[~(df['country'].isna())]

    for c in df2['country']:
        countries.extend(split_data(c))
    countries = list(map(str.strip, countries))
    countries_count = Counter(countries)
    countries_df = pd.DataFrame.from_dict(countries_count, orient='index').reset_index()
    countries_df =countries_df.rename(columns = {'index':'country',0:'count'})

    countries_df['country'].unique()
    countries={}
    for country in pycountry.countries:
        countries[country.name] = country.alpha_3
    codes = [countries.get(country,'Unknown code') for country in countries_df['country']]

    countries_df['iso']=codes
    countries_df = countries_df[~(countries_df['iso']=='Unknown code')]
    return countries_df

In [17]:
def year_category(dataframe):
    categories = []
    for c in dataframe['listed_in']:
        categories.extend(split_data(c))

    categories_count = Counter(categories)
    categories_df = pd.DataFrame(list(categories_count.items()), columns = ['Category','Count'])
    return categories_df

In [18]:
def top10_actors(dataframe):
    actors = []
    df_copy = dataframe[~(dataframe['cast'].isna())]
    for c in df_copy['cast']:
        actors.extend(split_data(c))
    actors_movie_count = Counter(actors)
    actors_df = pd.DataFrame(list(actors_movie_count.items()), columns = ['Actor','Count'])
    top10_actors_df = actors_df.sort_values(by='Count',ascending=False).reset_index().head(10)
    return top10_actors_df

# Dashboard

In [19]:
import dash
from jupyter_dash import JupyterDash
from dash import html
from dash import dcc
import plotly.express as px
from dash.dependencies import Input, Output, State

import dash_bootstrap_components as dbc

In [20]:
app = JupyterDash(external_stylesheets=[dbc.themes.LUX])

In [21]:
app.layout = html.Div([
    html.Div([
        html.Div([
                html.Div([
                    daq.Slider(
                        id='slider_year',
                        min=df['release_year'].min(),
                        max=df['release_year'].max(),
                        step=None,
                        value=2015,
                        color='#9b21bd',
                        marks = { str(i):str(i) for i in df['release_year'].unique() },
                        handleLabel={"label": "Year", "showCurrentValue": True, 'style':{'height':'33px', 'top':'20px', 'font-size':'10px'}},
                        labelPosition='top'
                    )
                    ],style = {'display': 'flex', 'align-items': 'center', 'justify-content': 'center','padding-top':'10px','background-color':'#1b1f36'}),
                dbc.Navbar(
                        dbc.Container([
                    html.A(
                        # Use row and col to control vertical alignment of logo / brand
                        dbc.Row(
                        [
                            dbc.Col(dbc.NavbarBrand("Disney Plus!",style={'color':'white'})),
                        ],
                        ),
                href="http://127.0.0.1:8050/",
                        ),
                    html.Div([dcc.Dropdown(id='my-dropdown',
                        options=[{'label': 'Movies', 'value': 'movies'},
                                {'label': 'TV Shows', 'value': 'tv'}], value = 'movies',
                        style={'textAlign': 'center', 'width': '300px', 'font-family': 'Sans-Serif', 'color': '#46454f'})],
                        className='dropdown'),
        
                    ]
                ,style = {'height': '10px'}),
                color="#1b1f36"
            )
        ], style={'background-color': '#dbd9ce'}),
        html.Div([
                    dbc.Row([
                        dbc.Col([
                            dbc.Card(
                            [
                                dbc.CardImg(src=app.get_asset_url('images.jpg'),style={'height':'2%','width':'25%','padding-top':'5px'},className = 'align-self-center', top=True),
                                dbc.CardBody([
                                    html.H6("Number of Movies", className="card-text",style={'font-size': '20px','color': '#0c1533', 'font-family':'Sans-serif'}),
                                    html.H1("{:d}".format(movies_count), id='num_movies', style={'font-size': '30px','color': '#0c1533'})
                                ])
                            ],
                            style={"width": "25rem",'box-shadow': '0 4px 8px 0 rgba(0,0,0,0.2)','border-radius': '5px'},
                        )
                        ], style={"height": "180px", 'font-family': 'Cursive', "background-color": "transparent", 'text-align':'center','padding-left':'220px'}),

                        dbc.Col([
                            dbc.Card(
                            [
                                dbc.CardImg(src=app.get_asset_url('tv-icon-21.jpg'),style={'height':'2%','width':'25%','padding-top':'5px'},className = 'align-self-center', top=True),
                                dbc.CardBody([
                                    html.H6("Number of TV Shows", className="card-text",style={'font-size': '20px','color': '#91003f', 'font-family':'Sans-serif'}),
                                    html.H1("{:d}".format(tv_shows_count), id='num_tv_shows', style={'font-size': '30px','color': '#91003f'})
                                ])
                            ],
                            style={"width": "25rem",'box-shadow': '0 4px 8px 0 rgba(0,0,0,0.2)','border-radius': '5px'},
                        )
                        ], style={"height": "180px", 'font-family': 'Cursive', "background-color": "transparent", 'text-align':'center','padding-left':'100px'}),

                    ]),
        ] ,style = {'margin-top':'30px','margin-bottom':'30px'}),

        html.Div([
            dbc.Row([
                 dbc.Col([
                    html.Div(dcc.Graph(id='my-graph',figure={}, responsive = True, style={'height':'350px'}),style= {'background-color' : '#f7e9e9', 'width':'100%', 'height': '400px','margin': '10px','box-shadow': '0 2px 4px 0 rgba(0,0,0,0.2)'}),   
                 ]),
                dbc.Col([
                    html.Div(dcc.Graph(id='scatter',figure={}, responsive = True, style={'height':'350px'}),style= {'background-color' : '#f7f1e9', 'width':'100%', 'height': '400px','margin': '10px','box-shadow': '0 2px 4px 0 rgba(0,0,0,0.2)'}),
                 ]),
                dbc.Col([
                    html.Div(dcc.Graph(id='bar',figure={}, responsive = True, style={'height':'350px'}),style= {'background-color' : '#f4f7e6', 'width':'100%', 'height': '400px','margin': '10px','box-shadow': '0 2px 4px 0 rgba(0,0,0,0.2)'}),
                 ])
            ],style={'padding-top': '25px'}),
            dbc.Row([
                dbc.Col([
                    html.Div(dcc.Graph(id='bar2',figure={}, responsive = True, style={'height':'350px'}),style= {'background-color' : '#f0fcf6', 'width':'100%', 'height': '400px','margin': '10px','box-shadow': '0 2px 4px 0 rgba(0,0,0,0.2)'}) 
                ],style = {'margin-top':'17px'}),
                dbc.Col([
                    html.Div(dcc.Graph(id='map',figure={}, responsive = True, style={'height':'350px'}),style= {'background-color' : '#fbf7ff', 'width':'100%', 'height': '400px','margin': '10px','box-shadow': '0 2px 4px 0 rgba(0,0,0,0.2)'}) 
                ],style = {'margin-top':'17px'}),
                dbc.Col([
                    html.H5("Top 10 Celebrities this year",style = {'text-align' : 'center'}),
                    html.Div(id ='table',style= {'background-color' : '#f0f0fc', 'width':'100%', 'height': '400px','margin': '10px','box-shadow': '0 2px 4px 0 rgba(0,0,0,0.2)', "maxHeight": "600px", "overflow": "scroll"}) 
                ],style ={'width': '40px'})
            ],style={'padding-top': '15px'})
            
            
        ],style = {'margin': '0px 50px'})
        
    ]),
    html.Div(id ='footer', children = [
        dbc.Row([html.P("Disney Plus!",style={'color':'white', 'font-size':'100%','padding-left':'20px'})]),
        dbc.Row([
        dbc.Col([html.P("Privacy Policy    |    Terms of Services",style={'text-align':'left','color':'white', 'font-size':'70%','padding-left':'10px'})]),
        dbc.Col([html.P("Ⓒ2022 All Rights Reserved", style={'text-align' : 'right', 'color':'white', 'font-size':'70%','padding-right':'10px'})])])
    ], style={'background-color':'#1b1f36', 'color':'white','margin-top': '30px'})
])

In [22]:
@app.callback(
    
    Output(component_id='my-graph', component_property='figure'),
    Output(component_id='scatter', component_property='figure'),
    Output(component_id='bar', component_property='figure'),
    Output(component_id='bar2', component_property='figure'),
    Output(component_id='map', component_property='figure'),
    
    Input(component_id='my-dropdown', component_property='value'),
    Input(component_id='slider_year', component_property='value')
)
def update_my_Div(input_dropdown,input_slider):
    df['Ones'] = 1
    fig = px.pie(df, names='type', values='Ones',color='type', title = 'Number of Movies/TV Shows',color_discrete_map={'Movie': '#0c1533', 'TV Show':'#91003f'})
    figures=[fig]
    if input_dropdown == 'movies':
        filtered_df = df[df['type']=='Movie']
        releases_df = filtered_df.groupby('release_year').size().sort_values(ascending=True).reset_index(name='counts')
        fig2 = scatter_graph(releases_df,'release_year','counts',"Total Movies")


        filtered_year = df[df['release_year']==input_slider]
        filtered_year = filtered_year[filtered_year['type']=='Movie']
        filtered_year_duration = filtered_year.sort_values(by='duration',ascending=True).reset_index()
        fig3 = bar_graph(filtered_year_duration,'title', 'duration', "Movies duration", 'movies', 'duration (mins)')
        
        
        categories_df = year_category(filtered_year)
        fig4 = bar_graph(categories_df,'Category','Count','Categories Count', 'Category', 'Number of Movies')
        
        countries_df = year_country(filtered_year)
        fig5 = map_graph(countries_df)
        
        
        figures = [fig,fig2,fig3,fig4,fig5]
        for graph in figures:
            graph.update_layout(
                plot_bgcolor = 'rgba(255, 255, 0, 0)',
                paper_bgcolor= 'rgba(255, 255, 0, 0)',
                font_color="#0c1533",
                title_font_color="#0c1533",
                legend_title_font_color="#0c1533"
            )
        return figures
    elif input_dropdown == 'tv':
        filtered_df = df[df['type']=='TV Show']
        releases_df = filtered_df.groupby('release_year').size().sort_values(ascending=True).reset_index(name='counts')
        fig2 = scatter_graph(releases_df,'release_year','counts',"Total TV Shows")


        filtered_year = df[df['release_year']==input_slider]
        filtered_year = filtered_year[filtered_year['type']=='TV Show']
        filtered_year_duration = filtered_year.sort_values(by='duration',ascending=True).reset_index()
        fig3 = bar_graph(filtered_year_duration,'title', 'duration', "TV Show duration", 'TV Show', 'duration (season)')


        categories_df = year_category(filtered_year)
        fig4 = bar_graph(categories_df,'Category','Count','Categories Count','Category', 'Number of TV Shows')
        
        countries_df = year_country(filtered_year)
        fig5 = map_graph(countries_df)
        
        figures = [fig,fig2,fig3,fig4,fig5]
        for graph in figures:
            graph.update_layout(
                plot_bgcolor = 'rgba(255, 255, 0, 0)',
                paper_bgcolor= 'rgba(255, 255, 0, 0)',
                font_color="#0c1533",
                title_font_color="#0c1533",
                legend_title_font_color="#0c1533"
            )
        return figures
    
    return [{},{},{},{},{}]

In [23]:
@app.callback(
    
    Output(component_id='table', component_property='children'),
    
    Input(component_id='my-dropdown', component_property='value'),
    Input(component_id='slider_year', component_property='value')
)
def update_table(input_dropdown,input_slider):
    
    if input_dropdown == 'movies':

        filtered_year = df[df['release_year']==input_slider]
        filtered_year = filtered_year[filtered_year['type']=='Movie']

        celebrities_df = top10_actors(filtered_year)
        table = celebrities_table(celebrities_df)
        return table
    
    elif input_dropdown == 'tv':

        filtered_year = df[df['release_year']==input_slider]
        filtered_year = filtered_year[filtered_year['type']=='TV Show']
        
        celebrities_df = top10_actors(filtered_year)
        table = celebrities_table(celebrities_df)
        return table
    
    return table

In [None]:
if __name__ == "__main__":
    app.run_server()