In [1]:
from jupyter_dash import JupyterDash
from dash import dcc
from dash import html 
import dash
import pandas as pd 
from dash.dependencies import Input,Output
import dash_bootstrap_components as dbc
import plotly.graph_objs as go
import numpy as np
import plotly.express as px


## Read and Explore Data

In [2]:
df = pd.read_csv("vgsales.csv")
df.head()
#print (genre)


Unnamed: 0,Rank,Name,Platform,Year,Genre,Publisher,NA_Sales,EU_Sales,JP_Sales,Other_Sales,Global_Sales
0,1,Wii Sports,Wii,2006.0,Sports,Nintendo,41.49,29.02,3.77,8.46,82.74
1,2,Super Mario Bros.,NES,1985.0,Platform,Nintendo,29.08,3.58,6.81,0.77,40.24
2,3,Mario Kart Wii,Wii,2008.0,Racing,Nintendo,15.85,12.88,3.79,3.31,35.82
3,4,Wii Sports Resort,Wii,2009.0,Sports,Nintendo,15.75,11.01,3.28,2.96,33.0
4,5,Pokemon Red/Pokemon Blue,GB,1996.0,Role-Playing,Nintendo,11.27,8.89,10.22,1.0,31.37


In [3]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 16598 entries, 0 to 16597
Data columns (total 11 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   Rank          16598 non-null  int64  
 1   Name          16598 non-null  object 
 2   Platform      16598 non-null  object 
 3   Year          16327 non-null  float64
 4   Genre         16598 non-null  object 
 5   Publisher     16540 non-null  object 
 6   NA_Sales      16598 non-null  float64
 7   EU_Sales      16598 non-null  float64
 8   JP_Sales      16598 non-null  float64
 9   Other_Sales   16598 non-null  float64
 10  Global_Sales  16598 non-null  float64
dtypes: float64(6), int64(1), object(4)
memory usage: 1.4+ MB


In [4]:
df.isna().sum()

Rank              0
Name              0
Platform          0
Year            271
Genre             0
Publisher        58
NA_Sales          0
EU_Sales          0
JP_Sales          0
Other_Sales       0
Global_Sales      0
dtype: int64

In [5]:
np.sort(df['Year'].unique())

array([1980., 1981., 1982., 1983., 1984., 1985., 1986., 1987., 1988.,
       1989., 1990., 1991., 1992., 1993., 1994., 1995., 1996., 1997.,
       1998., 1999., 2000., 2001., 2002., 2003., 2004., 2005., 2006.,
       2007., 2008., 2009., 2010., 2011., 2012., 2013., 2014., 2015.,
       2016., 2017., 2020.,   nan])

In [6]:
df2 = df.dropna(axis=0)
df2.isna().sum()

Rank            0
Name            0
Platform        0
Year            0
Genre           0
Publisher       0
NA_Sales        0
EU_Sales        0
JP_Sales        0
Other_Sales     0
Global_Sales    0
dtype: int64

In [7]:
# Create a new column for total sales
df2["total_sales"] = df2["NA_Sales"]+df2["EU_Sales"]+df2["JP_Sales"]+df2["Other_Sales"]


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2["total_sales"] = df2["NA_Sales"]+df2["EU_Sales"]+df2["JP_Sales"]+df2["Other_Sales"]


In [8]:
colors = ["black", "blue", "red", "yellow", "pink", "orange"]

available_indicators = df['Genre'].unique()
available_indicators = [str(x) for x in available_indicators]
genre = [x for x in available_indicators if x != 'nan']
available_indicators2 = df['Platform'].unique()
available_indicators2 = [str(x) for x in available_indicators2]
Platform = [x for x in available_indicators2 if x != 'nan']
################## get most 10 famous publishiers ##############
pub=[]
pub=df['Publisher'].value_counts()
x=pub[1:11]

## Data Preparation

In [9]:
# Define items in genres drop down list
Genre = [x for x in df2["Genre"].unique()]
Genre.append("All Platforms")
# Define items in regions drop down list
regions=["Worldwide","Europe","Japan","North Africa"]
# Get Top Publisher with biggest number of games in 2016
data = df2[df2["Year"]==2016].groupby(["Publisher"]).agg(number_of_games=("Name","count")).reset_index()
top_pub = data[data["number_of_games"] == data["number_of_games"].max()]["Publisher"].iloc[0]
# Get Top Publisher with largest sales in 2016
data = df2.groupby("Year").agg({"total_sales":"max"})
top_pub =df2[(df2["Year"]==2016) &(df2["total_sales"]==data.loc[2016]['total_sales'])]["Publisher"].iloc[0]
# Get The video game with top sales in 2016
data = df2.groupby("Year").agg({"total_sales":"max"})
top_game =df2[(df2["Year"]==2016) &(df2["total_sales"]==data.loc[2016]['total_sales'])]["Name"].iloc[0]
#################################### Main Layout #########################################

## Styles

In [10]:
#################################################################################
################################## Cards of BANs ##################################
first_card = dbc.Card(
    [
        dbc.CardHeader(html.H4(id="title1"),style={"background-color":"mediumvioletred"}),
        dbc.CardBody(
        [
            html.P(
               id="card1",style={"font-size": "30px"}
            ),
       
        ]    
    )]
 )

second_card =  dbc.Card(
    [
        dbc.CardHeader(html.H4(id="title2"),style={"background-color":"gold"}),
        dbc.CardBody(
        [
            html.P(
              id="card2",style={"font-size": "30px"}
            ),
        ]
            
    )]
 )

third_card =   dbc.Card(
    [
        dbc.CardHeader(html.H4(id="title3"),style={"background-color":"blue"}),
        dbc.CardBody(
        [
            html.P(
              id="card3",style={"font-size": "25px"}
            ),
        ]
    )]
 )

another_card = dbc.Card(
 
    dbc.CardBody(
        [
            html.H5("Select a Region", className="card-title"),
       
                 dcc.Dropdown(id='my-dpdn1', multi=False, value='Worldwide',
                             options=[{'label':x, 'value':x}
                                     for x in (regions)],
                            className = 'dcc_compon text-center m-4 mb-4',style={"color":"black"}),
          
           
          
        ]
    )
)
#################################################################################
bgcolor = "#f3f3f1"
template = {"layout": {"paper_bgcolor": bgcolor, "plot_bgcolor": bgcolor}}



## Dash App

In [11]:
## Create an app Instance

app = JupyterDash(external_stylesheets=[dbc.themes.DARKLY])
#MORPH #SOLAR #QUARTZ #MINTY
#app= dash.Dash(external_stylesheets =['https://codepen.io/chriddyp/pen/bWLwgP.css'])

########################################################################


app.layout =dbc.Container([
         
        dbc.Row([
            html.Br(),
            html.Br(),
             dbc.Col(html.Img(src="https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcRYJAlu65XYqpkpmKpFJTOWW0xQpBfTnc1qiA&usqp=CAU" , width=200, height=90)),
             dbc.Col(html.H1("Video Games Analytics",
                            className='font-weight-bold  m-4 mb-4',style={"background_color":"white"}),
                    width=9), 
             html.Br(),
             html.Br(),
             dbc.Col(first_card, width=4),
             dbc.Col(second_card, width=4),
             dbc.Col(third_card, width=4)
            ]
        ),
        html.Br(),
        

####################################### first 2 graphs ##########################################
dbc.Card(
     dbc.CardBody(
        dbc.Row([

            dbc.Col([
                   
            dbc.CardHeader(html.H5("Total Sales over years"),style={"background-color":"Blues"}),

                     html.Br(),
                    
                     dcc.Graph(id='fig11',config = {'displayModeBar': 'hover'},style= {
                                "border-radius": "5px",
                                "background-color": "#f3f3f1",
                                "margin": "1%",
                                "padding": "1%",
                                "position": "relative",
                                "box-shadow": "1px 1px 1px slategrey"
                            },className = 'dcc_compon text-center m-8 mb-8')
                
                    ],width=8),
             dbc.Col([
                 html.Br(),
                 another_card
              ],width=4,style={'border':'1px solid black', 'padding':'20px'})
          
                

              ]),
)),
#################################### years slider ######################################### 
   
    dbc.Row ([
       
          html.P('Years', className ='text-muted text-primary text-info'),
                dcc.Slider(id = 'select_year',
                           included = False,
                           updatemode = 'drag',
                           tooltip = {'always_visible': True},
                           min = 1980,
                           max = 2016,
                           value = 2016,
                           marks = {str(yr): str(yr) for yr in range(1980, 2021 ,2)},
                           className = 'dcc_compon form-range'),  


        ],),
     html.Br(),
#################################this row is for multible out put#########################################################      
 dbc.Row([

          dbc.Col([
                    dbc.CardHeader(html.H5("Total Sales by genres"),style={"background-color":"Blues"}),


                      dcc.Graph(id='fig12',config = {'displayModeBar': 'hover'}, style = {
                                "border-radius": "5px",
                                "background-color": "#f3f3f1",
                                "margin": "1%",
                                "padding": "1%",
                                "position": "relative",
                                "box-shadow": "1px 1px 1px slategrey"
                            })
                    ],width=6 ),

             ###################Sales by sub categories #######################
          
             
       dbc.Col([
            dbc.CardHeader(html.H5(id="fig2_title"),style={"background-color":"Blues"}),

              dcc.Graph(id='fig42',config = {'displayModeBar': 'hover'}, style = {
                            "border-radius": "5px",
                            "background-color": "#f3f3f1",
                            "margin": "1%",
                            "padding": "1%",
                            "position": "relative",
                            "box-shadow": "1px 1px 1px slategrey"
                        })
       ],width=6),
          
 ]),
    html.Br(),
    dbc.Row([
        dbc.Col(width=3),
        dbc.Col([

            dbc.CardHeader(html.H5(id="fig3_title"),style={"background-color":"Blues"}),

            dcc.Graph(id='fig21',config = {'displayModeBar': 'hover'}, style = {
                        "border-radius": "5px",
                        "background-color": "#f3f3f1",
                        "margin": "1%",
                        "padding": "1%",
                        "position": "center",
                        "box-shadow": "1px 1px 1px slategrey"
                    })
    ],width=6),
        dbc.Col(width=3),
             ]),





########################Basmaa row ########################################################
         dbc.Row([
             
          
             
              ########## donat chart ##################
      dbc.Col([
                  ###
                    html.Div([
                    html.Br(),
                    dbc.CardHeader(html.H5("Worldwide Number of Publishers over year"),style={"background-color":"Blues"}),    
                    dcc.Graph(id='g1'),
                        ],),
                   ]),
                   
         dbc.Col([
                  ###
                    html.Div([
                    html.Br(),
                    dbc.CardHeader(html.H5("Worldwide number of genres over year"),style={"background-color":"Blues"}),
                    dcc.Graph(id='g2'),
                        ],),
                   ]),
       
    ] ,

            ),

###########################################################################################
     dbc.Row([
          
              dbc.Col([
                  # Create bubble chart (Compare sales and publisher)
                    html.Div([
                    html.Br(),
                    html.P("Global Sales:"),
                    dcc.RangeSlider(
                            id='range-slider',
                            min=0, max=1787, step=0.5,
                            marks={
                                    0: '0',
                                    179: '179',
                                    358: '358',
                                    537: '537',
                                    715: '715',
                                    894: '894',
                                    1072: '1072',
                                    1250: '1250',
                                    1430: '1430',
                                    1608: '1608',
                                    1787: '1787'
                                },
                            value=[537, 1430]
                        ),

                    dcc.Graph(id='bubble_2',
                              config={'displayModeBar': 'hover'},),
                    
                        ],),
                   ]),



    ] ,

            ),


],fluid=False)






## Callbacks

In [12]:
####### This callback to update fig 1  #####################################################
@app.callback(Output('fig11', 'figure'),
             Input('my-dpdn1', 'value'))

def update_fig1(region):
    data = df2.copy()
    y="Global_Sales"
    if region =="Europe":
        y="EU_Sales"
    elif region =="North Africa":
        y="NA_Sales"
    elif region =="Japan":
        y="JP_Sales"
    sales_by_year = data.groupby("Year").agg({y:"sum"}).reset_index()
    sales_year = px.line(sales_by_year,x="Year", y=y,markers=True, labels={
                     y: "Total Sales in Millions $"})
    sales_year.update_layout( 
            template=template,
            barmode= "overlay",
            selectionrevision= True,
            height = 300,
            margin= {"l": 10, "r": 10, "t": 30, "b": 10},
            xaxis= {"automargin": True},
            yaxis= {
               
                "automargin": True,
             
            },
            font=dict(
            family="Courier New, monospace",
            size=12,
             ),
            selectdirection ="h",
            hovermode= "closest",
            dragmode= "select"
     
    )
    return sales_year

#### THis callback to update the BANS and fig 2 #################################
@app.callback(Output('fig12', 'figure'),
              Output('card1', 'children'),
              Output('title1', 'children'),
              Output('title2', 'children'),
              Output('title3', 'children'),
              Output('card2', 'children'),
               Output('card3', 'children'), 

             Input('select_year', 'value'),
              
            Input('my-dpdn1', 'value'))
def update_fig2(year,region):
    data = df2.copy()
    data = data[data["Year"]==year ]
    y="Global_Sales"
    if region =="Europe":
        y="EU_Sales"
    elif region =="North Africa":
        y="NA_Sales"
    elif region =="Japan":
        y="JP_Sales"
    sales_genres =data.groupby(["Genre"]).agg(genre_sales=(y,"sum")).reset_index()

    pop_fig = px.bar(sales_genres,x="Genre", y="genre_sales",custom_data=['Genre'],labels={
                     "genre_sales": "Total Sales in Millions $"})

  
    pop_fig.update_layout( 
            template=template,
            barmode= "overlay",
            selectionrevision= True,
            height = 400,

            margin= {"l": 10, "r": 10, "t": 30, "b": 10},
            xaxis= {"automargin": True},
            yaxis= {
               
                "automargin": True,
             
            },
            selectdirection ="h",
            hovermode= "closest",
            dragmode= "select"
     
    )
    grouped_data = data.groupby(["Genre"]) 
    total_sales = str(data[data["Year"]==year][y].sum().round(2))+" Million"
    df1 = data[data["Year"]==year].groupby(["Publisher"]).agg(sales_of_games=(y,"sum")).reset_index()
    top_pub = df1[df1["sales_of_games"] == df1["sales_of_games"].max()]["Publisher"].iloc[0]
    df3 = data[data["Year"]==year].groupby(["Name"]).agg(sales_of_games=(y,"sum")).reset_index()
    top_game = df3[df3["sales_of_games"] == df3["sales_of_games"].max()]["Name"].iloc[0]
    return pop_fig,total_sales,f"Total Sales in {year}",f"Top Publisher in {year}",f"Top Game in{year}",top_pub,top_game
 
    

@app.callback(
    Output('fig21', 'figure'),
    Input('fig12', 'clickData'),
    Input('select_year', 'value'),
    Input('my-dpdn1', 'value'))

def update_games_per_genre(clickData,year,region):
    click_genre = 'All Genres'
    
    data = df2.copy()
    data = data[data["Year"]==year ]
    y="Global_Sales"
    if region =="Europe":
        y="EU_Sales"
    elif region =="North Africa":
        y="NA_Sales"
    elif region =="Japan":
        y="JP_Sales"
    df1 = data.groupby(["Name"]).agg(sales_of_games=(y,"sum")).reset_index().sort_values("sales_of_games",ascending=False)
    top_games = df1.head(10)
    
    # Extract the major category clicked on for usage
    if clickData:
        
        click_genre = clickData['points'][0]['x']
        
        df3 = data[data["Genre"]==click_genre].groupby(["Name"]).agg(sales_of_games=(y,"sum")).reset_index().sort_values("sales_of_games",ascending=False)
        top_games = df3.head(10)
        
        
    myfig = px.pie(top_games, names='Name', values='sales_of_games',labels=list(top_games['Name']),hole=0.1)
    myfig.update_layout( template=template,showlegend=False,
            selectionrevision= True,
            margin= {"l": 10, "r": 10, "t": 30, "b": 10},
            height = 400,
            selectdirection ="h",
            hovermode= "closest",
            dragmode= "select"
     )
    myfig.update_traces(textposition='inside', textinfo='percent+label')
    return myfig  
 

In [13]:
# Create bar chart (number of games per publisher per year)
@app.callback(Output('bubble_2', 'figure'),
              [Input("range-slider", "value")])
def update_graph(slider_range):
    low, high = slider_range
    bubble_chart = df.groupby(['Publisher'])[['Global_Sales']].sum().reset_index()
    mask = (bubble_chart['Global_Sales'] > low) & (bubble_chart['Global_Sales'] < high)
    fig = px.scatter(
        bubble_chart[mask], x="Publisher", y="Global_Sales", 
        color="Publisher", size='Global_Sales', 
        #hover_data=['NA_Sales']
    )
                           
    return fig



In [14]:
# genre per number of games per year
@app.callback(
    Output(component_id='g2', component_property='figure'),
    Input(component_id='select_year', component_property='value')
)


def update_di(slidervalue):
    filtered_df=df2[df2.Year==slidervalue]
    game_genre =  filtered_df.groupby('Genre')['Name'].count().reset_index().sort_values("Name",ascending=False)
    game_genre = game_genre.reset_index()
    game_genre.drop("index",axis = 1,inplace=True)

    fig2 = go.Figure([go.Pie(labels=game_genre['Genre'], values=game_genre['Name'],hole=0.2)])  
    fig2.update_traces(hoverinfo='label+percent+value', textinfo='percent', textfont_size=15)
    #fig2.update_layout(title="Genre per Number of games Published",title_x=0.1)
    return fig2

In [15]:
@app.callback(
    Output(component_id='g1', component_property='figure'),
    Input(component_id='select_year', component_property='value')
)


def update_div(slidervalue):
    filtered_df=df2[df2.Year==slidervalue]
    publisher =  filtered_df.groupby('Publisher')['Name'].count().reset_index()
    publisher = publisher.sort_values('Name',ascending=False).reset_index()
    
    fig1 = go.Figure(go.Bar(x=publisher['Publisher'],y=publisher['Name'][:10],marker={'color': publisher['Name'][:10],'colorscale': 'viridis'}))
    fig1.update_layout(xaxis_title="Publisher Name",yaxis_title="number of games")
    
   
    return fig1


#### THis callback to update bar chart #################################

@app.callback(Output( 'fig42','figure'),
              Output( "fig2_title",'children'),
              Output( "fig3_title",'children'),

              [Input('select_year', 'value')],
              [Input('my-dpdn1', 'value')],
              [Input('fig12', 'clickData')])

   
def capture_hover_data(select_year,drop,clickData):
    data = df2.copy()
    data = data[data["Year"]==select_year ]
    if drop =="Europe":
        y="EU_Sales"
        Genre="Europe"
    elif drop =="North Africa":
        y="NA_Sales"
        Genre="North Africa"
    elif drop =="Japan":
        y="JP_Sales"
        Genre="Japan"
    elif drop =="Worldwide":
        y="Global_Sales"
        Genre="All"
    temp_pub =data.groupby(['Publisher']).agg(publisher_sales=(y,"sum")).reset_index().sort_values("publisher_sales",ascending=False)
    top_p=temp_pub.head(10)
    
    if clickData:
        
        genre_name = clickData['points'][0]['customdata']
        Genre = genre_name[0]
        s_temp =  data[(data['Genre'] == genre_name[0])].groupby(["Publisher"]).agg(publisher_sales=(y,"sum")).reset_index().sort_values("publisher_sales",ascending=False)
        top_p = s_temp.head(10)
        
    pub_fig = px.bar(top_p,y="Publisher", x="publisher_sales",orientation='h',labels={"publisher_sales":"Sales in Million $"})
    pub_fig.update_layout( 
            template=template,
            barmode= "overlay",
            selectionrevision= True,
            title=f"Sales by Publisher for {Genre} games",
            height = 400,
        
            margin= {"l": 10, "r": 10, "t": 30, "b": 10},
            xaxis= {"automargin": True},
            yaxis= {
               
                "automargin": True,
             
            },
            selectdirection ="h",
            hovermode= "closest",
            dragmode= "select"   )
        
    return pub_fig,f"Total sales by publisher for {Genre} games",f"Top games for {Genre} genre"
    
   

    






   ## Run

In [16]:
app.run_server(port=8054)

Dash app running on http://127.0.0.1:8054/
