# Setup - Running Code in Colab

STEP - 1 mount the drive on colab using mount to colab button

In [1]:
# create symbolic link on colab folder "/content" to "Data" folder which contains the data file from the repo Introductory_Data_Analytics_Course
!ln -s "/content/drive/MyDrive/course_material_related_docs/Introductory_Data_Analytics_Course/Data/" "/content"

In [2]:
# install dash and jupyter dash
!pip install jupyter-dash
!pip install dash

Collecting jupyter-dash
  Downloading jupyter_dash-0.4.0-py3-none-any.whl (20 kB)
Collecting dash
  Downloading dash-1.21.0.tar.gz (1.1 MB)
[K     |████████████████████████████████| 1.1 MB 15.1 MB/s 
[?25hCollecting ansi2html
  Downloading ansi2html-1.6.0-py3-none-any.whl (14 kB)
Collecting flask-compress
  Downloading Flask_Compress-1.10.1-py3-none-any.whl (7.9 kB)
Collecting dash-core-components==1.17.1
  Downloading dash_core_components-1.17.1.tar.gz (3.7 MB)
[K     |████████████████████████████████| 3.7 MB 46.7 MB/s 
[?25hCollecting dash-html-components==1.1.4
  Downloading dash_html_components-1.1.4.tar.gz (83 kB)
[K     |████████████████████████████████| 83 kB 1.8 MB/s 
[?25hCollecting dash-table==4.12.0
  Downloading dash_table-4.12.0.tar.gz (1.8 MB)
[K     |████████████████████████████████| 1.8 MB 64.4 MB/s 
Collecting brotli
  Downloading Brotli-1.0.9-cp37-cp37m-manylinux1_x86_64.whl (357 kB)
[K     |████████████████████████████████| 357 kB 66.6 MB/s 
Building wheels f

# Dashboard code

In [3]:
# import the libraries
import pandas as pd
from jupyter_dash import JupyterDash
import dash_html_components as html
import dash_core_components as dcc
from dash.dependencies import Input, Output
import pandas as pd
import plotly.express as px


In [5]:
# read the file
dashboard_data = pd.read_csv('Data/combined_deployment_data.csv') # if running from repo on local, use the Data folder location in repo
dashboard_data.head()

Unnamed: 0,original_title,year,genre,duration,country,language,imdb_score,worldwide_gross_income,tomatometer_rating,imdb_scaled
0,The Kid,1921,"Comedy, Drama, Family",68,USA,"English, None",8.3,0.026916,100.0,83.0
1,A Woman of Paris: A Drama of Fate,1923,"Drama, Romance",82,USA,"None, English",7.0,0.011233,92.0,70.0
2,The Gold Rush,1925,"Adventure, Comedy, Drama",95,USA,"English, None",8.2,0.026916,100.0,82.0
3,Metropolis,1927,"Drama, Sci-Fi",153,Germany,German,8.3,1.349711,97.0,83.0
4,Sunrise: A Song of Two Humans,1927,"Drama, Romance",94,USA,English,8.1,0.121107,98.0,81.0


In [6]:
dashboard_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7148 entries, 0 to 7147
Data columns (total 10 columns):
 #   Column                  Non-Null Count  Dtype  
---  ------                  --------------  -----  
 0   original_title          7148 non-null   object 
 1   year                    7148 non-null   int64  
 2   genre                   7148 non-null   object 
 3   duration                7148 non-null   int64  
 4   country                 7148 non-null   object 
 5   language                7142 non-null   object 
 6   imdb_score              7148 non-null   float64
 7   worldwide_gross_income  7148 non-null   float64
 8   tomatometer_rating      7143 non-null   float64
 9   imdb_scaled             7148 non-null   float64
dtypes: float64(4), int64(2), object(4)
memory usage: 558.6+ KB


In [8]:
# create genre list
def convert_genre_list(genre):
  split_genre = genre.split(',')
  remove_spaces_genre_list = [x.strip() for x in split_genre]
  return remove_spaces_genre_list

# for forming the similar dataframe for tomatometer rating we can do so through following steps
list_genre = dashboard_data[['genre']].copy()
list_genre['genre_list'] = list_genre.apply(lambda row:convert_genre_list(row['genre']),axis=1)
list_genre.drop(['genre'],axis=1,inplace=True)
list_genre_explode = list_genre.explode('genre_list') 
list_genre_groupby = list_genre_explode.groupby('genre_list').size().reset_index().drop([0],axis=1)

# final genre lis
genre = list(list_genre_groupby['genre_list'].unique()) + ['All Genre']

In [10]:
# Create the Dash app
app = JupyterDash(__name__)

# Set up the app layout
app.layout = html.Div(children=[
    html.H1(children='Imdb vs Rotten tomatoes Ratings Dashboard'),
    html.H2(children='Year Released'),
    dcc.RangeSlider(
            id='year-released-range-slider',
            min=dashboard_data.year.min(),
            max=dashboard_data.year.max(),
            marks={str(y): str(y) for y in range(int(dashboard_data.year.min()), int(dashboard_data.year.max()), 5)},
            value=[dashboard_data.year.min(), dashboard_data.year.max()]
        ),
    html.Br(),
    html.H2(children='Box Office Earnings (in millions)'),
    dcc.RangeSlider(
            id='box-office-range-slider',
            min=dashboard_data.worldwide_gross_income.min(),
            max=dashboard_data.worldwide_gross_income.max(),
            marks={str(y): str(y) for y in range(int(dashboard_data.worldwide_gross_income.min()), \
                                                 int(dashboard_data.worldwide_gross_income.max()), 200)},
            value=[dashboard_data.worldwide_gross_income.min(), dashboard_data.worldwide_gross_income.max()]
        ),
    html.Br(),
    html.H2(children='Genre'),
    dcc.Dropdown(
        id = 'genre-dropdown',
        options=[{'label':i,'value':i} for i in genre],
        value='All Genre'
    ),
    html.Br(),
    dcc.Graph(id='rating-graph')
])


# Set up the callback function
@app.callback(
    Output(component_id='rating-graph', component_property='figure'),
    [
     Input(component_id='year-released-range-slider', component_property='value'),
     Input(component_id='box-office-range-slider',component_property='value'),
     Input(component_id='genre-dropdown',component_property='value')
    ]
)
def update_graph(selected_year,gross_income,genre_name):
    year_released_start, year_released_end = selected_year
    gross_income_start,gross_income_end = gross_income
    filtered_df1 = dashboard_data.loc[(dashboard_data['year'] >= year_released_start)&(dashboard_data['year'] <= year_released_end)]
    filtered_df2 = filtered_df1.loc[(filtered_df1['worldwide_gross_income']>=gross_income_start)&(filtered_df1['worldwide_gross_income']<=gross_income_end)]
    if genre_name == 'All Genre':
      genre_name_select = ''
    else:
      genre_name_select = genre_name
    filtered_final = filtered_df2.loc[filtered_df2['genre'].str.contains(genre_name_select)]
    scatter_fig = px.scatter(filtered_final,
                       x='imdb_scaled', y='tomatometer_rating',hover_name='original_title',
                       hover_data=['genre','worldwide_gross_income','year'],
                       range_x = [0,100],range_y=[-10,110],
                       title=f'Rating comparison - years selected {selected_year} - box office range {gross_income} - genre {genre_name}')
    return scatter_fig


# Run local server
app.run_server(mode='inline')

<IPython.core.display.Javascript object>