# Global Video Game Sales Dashboard


## Profile and QA Data

In [1]:
# Importing Libraries

import numpy as np 
import pandas as pd 
import plotly.express as px

#from jupyter_dash import JupyterDash
from dash import Dash, dcc, html, dash_table,dash
import dash_bootstrap_components as dbc
from dash.dependencies import Output, Input
from dash.exceptions import PreventUpdate
from dash_bootstrap_templates import load_figure_template

In [2]:
# Importing Dataset

path = '../data/Video+Game+Sales/vgchartz-2024.csv'

game_sales = pd.read_csv(path, parse_dates=['release_date', 'last_update']).iloc[:,1:]

game_sales.head()

Unnamed: 0,title,console,genre,publisher,developer,critic_score,total_sales,na_sales,jp_sales,pal_sales,other_sales,release_date,last_update
0,Grand Theft Auto V,PS3,Action,Rockstar Games,Rockstar North,9.4,20.32,6.37,0.99,9.85,3.12,2013-09-17,NaT
1,Grand Theft Auto V,PS4,Action,Rockstar Games,Rockstar North,9.7,19.39,6.06,0.6,9.71,3.02,2014-11-18,2018-01-03
2,Grand Theft Auto: Vice City,PS2,Action,Rockstar Games,Rockstar North,9.6,16.15,8.41,0.47,5.49,1.78,2002-10-28,NaT
3,Grand Theft Auto V,X360,Action,Rockstar Games,Rockstar North,,15.86,9.06,0.06,5.33,1.42,2013-09-17,NaT
4,Call of Duty: Black Ops 3,PS4,Shooter,Activision,Treyarch,8.1,15.09,6.18,0.41,6.05,2.44,2015-11-06,2018-01-14


In [3]:
# Checking Data types and memory usage

game_sales.info(memory_usage='deep')

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 64016 entries, 0 to 64015
Data columns (total 13 columns):
 #   Column        Non-Null Count  Dtype         
---  ------        --------------  -----         
 0   title         64016 non-null  object        
 1   console       64016 non-null  object        
 2   genre         64016 non-null  object        
 3   publisher     64016 non-null  object        
 4   developer     63999 non-null  object        
 5   critic_score  6678 non-null   float64       
 6   total_sales   18922 non-null  float64       
 7   na_sales      12637 non-null  float64       
 8   jp_sales      6726 non-null   float64       
 9   pal_sales     12824 non-null  float64       
 10  other_sales   15128 non-null  float64       
 11  release_date  56965 non-null  datetime64[ns]
 12  last_update   17879 non-null  datetime64[ns]
dtypes: datetime64[ns](2), float64(6), object(5)
memory usage: 22.3 MB


In [4]:
# Renaming Column Names

game_sales = game_sales.rename(
    columns = {
        'title' : 'Title',
        'console' : 'Console',
        'genre' : 'Genre',
        'publisher' : 'Publisher',
        'developer' : 'Developer'
    }
)

game_sales.head()

Unnamed: 0,Title,Console,Genre,Publisher,Developer,critic_score,total_sales,na_sales,jp_sales,pal_sales,other_sales,release_date,last_update
0,Grand Theft Auto V,PS3,Action,Rockstar Games,Rockstar North,9.4,20.32,6.37,0.99,9.85,3.12,2013-09-17,NaT
1,Grand Theft Auto V,PS4,Action,Rockstar Games,Rockstar North,9.7,19.39,6.06,0.6,9.71,3.02,2014-11-18,2018-01-03
2,Grand Theft Auto: Vice City,PS2,Action,Rockstar Games,Rockstar North,9.6,16.15,8.41,0.47,5.49,1.78,2002-10-28,NaT
3,Grand Theft Auto V,X360,Action,Rockstar Games,Rockstar North,,15.86,9.06,0.06,5.33,1.42,2013-09-17,NaT
4,Call of Duty: Black Ops 3,PS4,Shooter,Activision,Treyarch,8.1,15.09,6.18,0.41,6.05,2.44,2015-11-06,2018-01-14


In [5]:
# Creating Release Year column from Release Date 

game_sales = game_sales.assign(
    release_year = game_sales['release_date'].dt.year
)

game_sales.head()

Unnamed: 0,Title,Console,Genre,Publisher,Developer,critic_score,total_sales,na_sales,jp_sales,pal_sales,other_sales,release_date,last_update,release_year
0,Grand Theft Auto V,PS3,Action,Rockstar Games,Rockstar North,9.4,20.32,6.37,0.99,9.85,3.12,2013-09-17,NaT,2013.0
1,Grand Theft Auto V,PS4,Action,Rockstar Games,Rockstar North,9.7,19.39,6.06,0.6,9.71,3.02,2014-11-18,2018-01-03,2014.0
2,Grand Theft Auto: Vice City,PS2,Action,Rockstar Games,Rockstar North,9.6,16.15,8.41,0.47,5.49,1.78,2002-10-28,NaT,2002.0
3,Grand Theft Auto V,X360,Action,Rockstar Games,Rockstar North,,15.86,9.06,0.06,5.33,1.42,2013-09-17,NaT,2013.0
4,Call of Duty: Black Ops 3,PS4,Shooter,Activision,Treyarch,8.1,15.09,6.18,0.41,6.05,2.44,2015-11-06,2018-01-14,2015.0


In [6]:
# Checking Summary

game_sales.describe()

Unnamed: 0,critic_score,total_sales,na_sales,jp_sales,pal_sales,other_sales,release_date,last_update,release_year
count,6678.0,18922.0,12637.0,6726.0,12824.0,15128.0,56965,17879,56965.0
mean,7.22044,0.349113,0.26474,0.102281,0.149472,0.043041,2006-11-14 06:33:03.491617792,2020-01-11 00:45:49.683986944,2006.359572
min,1.0,0.0,0.0,0.0,0.0,0.0,1971-12-03 00:00:00,2017-11-28 00:00:00,1971.0
25%,6.4,0.03,0.05,0.02,0.01,0.0,2001-03-28 00:00:00,2018-08-08 00:00:00,2001.0
50%,7.5,0.12,0.12,0.04,0.04,0.01,2008-09-16 00:00:00,2019-04-21 00:00:00,2008.0
75%,8.3,0.34,0.28,0.12,0.14,0.03,2012-12-27 00:00:00,2021-03-30 00:00:00,2012.0
max,10.0,20.32,9.76,2.13,9.85,3.12,2024-12-31 00:00:00,2024-01-28 00:00:00,2024.0
std,1.457066,0.807462,0.494787,0.168811,0.392653,0.126643,,,8.617813


## Preparing Data for Visualization

In [7]:
# Grouping Global Video games sales Over the Years

annual_sales = game_sales.groupby('release_year').agg({'total_sales' : 'sum'}).reset_index()

annual_sales.head()

Unnamed: 0,release_year,total_sales
0,1971.0,0.0
1,1973.0,0.0
2,1975.0,0.0
3,1977.0,2.5
4,1978.0,2.36


In [8]:
# Plotting Sales Trend

px.line(
    annual_sales,
    x = 'release_year',
    y = 'total_sales'
)

In [9]:
# Creating Top 10 Titles by total sales

top_10_titles = game_sales.groupby('Title').agg(
    {'total_sales' : 'sum'}).sort_values('total_sales', ascending = False).iloc[:10].reset_index()

top_10_titles.head()

Unnamed: 0,Title,total_sales
0,Grand Theft Auto V,64.29
1,Call of Duty: Black Ops,30.99
2,Call of Duty: Modern Warfare 3,30.71
3,Call of Duty: Black Ops II,29.59
4,Call of Duty: Ghosts,28.8


In [10]:
px.bar(
    top_10_titles[::-1],
    x = 'total_sales',
    y = 'Title'
)

## Building Dashboard



In [11]:
import plotly.io as pio
pio.templates.default = "plotly_dark"

dbc_css = "https://cdn.jsdelivr.net/gh/AnnMarieW/dash-bootstrap-templates/dbc.min.css"

app = Dash(__name__, external_stylesheets=[dbc.themes.SLATE, dbc_css])

app.layout = dbc.Container([
    # Title of Dashboard------
    dbc.Row(
        html.H1('Global Video Game Sales Dashboard', style={'textAlign' : 'center'})
    ),
    html.Hr(),

    dbc.Row(children=[

        # For Sales Over Year Chart
        dbc.Col(
            children = [
                # For Sales Dropdown
                dbc.Row(
                    dcc.Dropdown(
                            id = 'sales_dropdown',
                            options = [
                                {'label' : 'Total Sales', 'value' : 'total_sales'},
                                {'label' : 'Japan Sales', 'value' : 'jp_sales'},
                                {'label' : 'North America Sales', 'value' : 'na_sales'},
                                {'label' : 'Europe and Africa Sales', 'value' : 'pal_sales'},
                                {'label' : 'Rest of the World Sales', 'value' : 'other_sales'}
                            ],
                            value = 'total_sales',
                            className = 'dbc') 
                ),
                html.Br(),
                #html.Br(),

                # Sales Line Chart
                html.H4(id = 'sales_title', style={'textAlign' : 'center'}),
                dbc.Row(dcc.Graph(id = 'sales_chart'))
            ]
        ),

        # Top Seller by category
        dbc.Col(
            children = [
                # For Category Dropdown
                dbc.Row(
                    dcc.RadioItems(
                        id = 'category_dropdown',
                        options = ['Title', 'Genre', 'Publisher', 'Developer', 'Console'],
                        value = 'Genre',
                        className = 'dbc',
                        inline = True,
                        inputStyle={"margin-right": "8px", "margin-left": "10px"}
                    )
                ),
                html.Br(),
                #html.Br(),

                # For Category Bar Chart
                html.H4(id = 'category_title', style={'textAlign' : 'center'}),
                dbc.Row(dcc.Graph(id = 'category_chart'),style={
                                "flex": "1",
                                "padding": "10px"}
                )
            ]
        )
    ])
])

@app.callback(
    Output('sales_title', 'children'),
    Output('sales_chart', 'figure'),
    Output('category_title', 'children'),
    Output('category_chart', 'figure'),
    Input('sales_dropdown', 'value'),
    Input('category_dropdown', 'value')
)

def output_generator(sales, category):

    # Sales Data
    sales_df = game_sales.groupby('release_year',as_index=False).agg({sales : 'sum'})

    line = px.line(
        sales_df,
        x = 'release_year',
        y = sales
    )

    # For Interactive Title
    if sales == 'total_sales' : 
        sales_title = 'Total Video Game Sales Over Time'
    elif sales == 'jp_sales' :
        sales_title = 'Video Game Sales in Japan Over Time'
    elif sales == 'na_sales' :
        sales_title = 'Video Game Sales in North America Over Time'
    elif sales == 'pal_sales' :
        sales_title = 'Video Game Sales in Europe and Africa Over Time'
    else :
        sales_title = 'Video Game Sales in Rest of the World Over Time'



    # Category Data
    category_df = game_sales.groupby(category, as_index=False).agg({sales : 'sum'}).sort_values(
        sales, ascending = False).iloc[:10]

    bar = px.bar(
        category_df,
        x = category,
        y = sales
    )

    category_title = f'Top 10 {category}'

    return sales_title, line, category_title, bar

if __name__ == '__main__':
    app.run(jupyter_mode='tab', port= 8050)

Dash app running on http://127.0.0.1:8050/


<IPython.core.display.Javascript object>