In [60]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from typing import List, Union
import datetime
import os 

In [61]:
main_config = {
    "hovermode": "x unified",
    "xaxis": {"showgrid": False, "color": "white"},
    "yaxis": {"showgrid": False, "color": "white"},
    "legend": {"yanchor":"top", 
                "y":0.9, 
                "xanchor":"left",
                "x":0.1,
                "title": {"text": None},
                "font" :{"color":"white"},
                "bgcolor": "rgba(0,0,0,0.5)"},
    "margin": {"l":0, "r":0, "t":10, "b":0}
}

In [62]:
FILE_FOLDER = os.path.abspath('vgsales.csv')
df = pd.read_csv(FILE_FOLDER, sep="," ,decimal=',')
df = df.dropna()
df.drop('Rank', axis=1, inplace=True)


In [63]:
df['Year'] = pd.to_datetime(df['Year'], format='%Y').dt.year
df = df.sort_values(by='Year', ascending=True)
df = df[df['Year'] < 2017  ]
x = df[df['Year'] == 2000]


In [64]:
df['Other_Sales'] = df['Other_Sales'].astype('float')
df['Global_Sales'] = df['Global_Sales'].astype('float')
df['JP_Sales'] = df['JP_Sales'].astype('float')
df['EU_Sales'] = df['EU_Sales'].astype('float')
df['NA_Sales'] = df['NA_Sales'].astype('float')

In [None]:
df_global = df.sort_values(by='Global_Sales', ascending=False)



In [None]:
def df_factory(min, max, column: Union[str, List[str]], selected_column: Union[str, List[str]] = 'Global_Sales'):
    if min:
        _df = df_global[(df_global['Year'] >= min) &
                        (df_global['Year'] <= max)]
        _df = _df.groupby(column)[selected_column].sum().reset_index()
        _df = _df.sort_values(by=selected_column, ascending=False)
        return _df
    else:
        return df_global

In [65]:
pub = df.groupby(['Publisher'])['Global_Sales'].sum().sort_values(ascending=False).reset_index().head(5)
pub

Unnamed: 0,Publisher,Global_Sales
0,Nintendo,1784.43
1,Electronic Arts,1093.39
2,Activision,721.41
3,Sony Computer Entertainment,607.28
4,Ubisoft,473.25


In [66]:
df_platform = df.groupby(['Platform'])['Global_Sales'].sum().sort_values(ascending=False).reset_index().head(5)
df_platform

Unnamed: 0,Platform,Global_Sales
0,PS2,1233.46
1,X360,969.6
2,PS3,949.35
3,Wii,909.81
4,DS,818.62


In [67]:
x = list(df['Genre'].unique())
x.insert(0,'Global')
x



['Global',
 'Shooter',
 'Misc',
 'Action',
 'Sports',
 'Fighting',
 'Puzzle',
 'Racing',
 'Platform',
 'Simulation',
 'Adventure',
 'Role-Playing',
 'Strategy']

In [68]:
top_publishers = df.groupby(['Publisher'])['Global_Sales'].sum()
df_top_publishers = top_publishers.sort_values(ascending=False).head(10).reset_index()
df_top_publishers


Unnamed: 0,Publisher,Global_Sales
0,Nintendo,1784.43
1,Electronic Arts,1093.39
2,Activision,721.41
3,Sony Computer Entertainment,607.28
4,Ubisoft,473.25
5,Take-Two Interactive,399.3
6,THQ,340.44
7,Konami Digital Entertainment,278.56
8,Sega,270.66
9,Namco Bandai Games,253.65


In [69]:
df_top_games = df.groupby(['Name'])['Global_Sales'].sum()
df_top_games.sort_values(ascending=False).head(10).reset_index()

Unnamed: 0,Name,Global_Sales
0,Wii Sports,82.74
1,Grand Theft Auto V,55.92
2,Super Mario Bros.,45.31
3,Tetris,35.84
4,Mario Kart Wii,35.82
5,Wii Sports Resort,33.0
6,Pokemon Red/Pokemon Blue,31.37
7,Call of Duty: Modern Warfare 3,30.83
8,New Super Mario Bros.,30.01
9,Call of Duty: Black Ops II,29.72


In [73]:
df_top_plat = df.groupby(['Platform'])[['Global_Sales']].sum().reset_index()
df_top_plat.sort_values(by='Global_Sales', ascending=False, inplace=True)
df_top_plat = df_top_plat.head(10)

text_top_plat = [f'{x} - R${y} milhões' for x, y in zip(
        df_top_plat['Platform'], df_top_plat['Global_Sales'])]

fig_top_plat = px.bar(x=df_top_plat['Global_Sales'],
                                y=df_top_plat['Platform'],
                                orientation='h', text=text_top_plat)
fig_top_plat.update_layout(
        main_config, hovermode=False)

fig_top_plat


ValueError: Mime type rendering requires nbformat>=4.2.0 but it is not installed

In [None]:
df_top_games = df.groupby(['Year','Name'])[['Global_Sales']].sum().reset_index()
df_top_games.sort_values(by='Year', ascending=False, inplace=True)
vendas = df[df['Name'] == 'Super Mario Bros.']
vendas

Unnamed: 0,Name,Platform,Year,Genre,Publisher,NA_Sales,EU_Sales,JP_Sales,Other_Sales,Global_Sales
1,Super Mario Bros.,NES,1985,Platform,Nintendo,29.08,3.58,6.81,0.77,40.24
200,Super Mario Bros.,GB,1999,Platform,Nintendo,3.4,1.3,0.15,0.22,5.07
