In [1]:
import pandas as pd

In [31]:
df_vgchartz = pd.read_csv("C:/data/Video Game Sales/vgchartz-2024.csv")
df_vg_data_dictionary = pd.read_csv("C:/data/Video Game Sales/vg_data_dictionary.csv")

In [32]:
df_vg_data_dictionary

Unnamed: 0,Field,Description
0,img,URL slug for the box art at vgchartz.com
1,title,Game title
2,console,Console the game was released for
3,genre,Genre of the game
4,publisher,Publisher of the game
5,developer,Developer of the game
6,critic_score,Metacritic score (out of 10)
7,total_sales,Global sales of copies in millions
8,na_sales,North American sales of copies in millions
9,jp_sales,Japanese sales of copies in millions


### Seleção de colunas
##### Seleção das colunas relevantes para a análise, mantendo apenas os campos considerados essenciais: `title`, `console`, `genre`, `publisher`, `developer` e `release_date`.

In [33]:
df_vgchartz = df_vgchartz[["title", "console", "genre", "publisher", "developer", "release_date"]]
df_vgchartz.head()

Unnamed: 0,title,console,genre,publisher,developer,release_date
0,Grand Theft Auto V,PS3,Action,Rockstar Games,Rockstar North,2013-09-17
1,Grand Theft Auto V,PS4,Action,Rockstar Games,Rockstar North,2014-11-18
2,Grand Theft Auto: Vice City,PS2,Action,Rockstar Games,Rockstar North,2002-10-28
3,Grand Theft Auto V,X360,Action,Rockstar Games,Rockstar North,2013-09-17
4,Call of Duty: Black Ops 3,PS4,Shooter,Activision,Treyarch,2015-11-06


##### Conversão da release_date para datetime

In [34]:
df_vgchartz["release_date"] = pd.to_datetime(df_vgchartz["release_date"], errors="coerce")
# O parametro errors="coerce" faz com que ignore erros e transforma valores inválidos em NaT (Not a Time)

##### Criação da coluna Ano

In [35]:
df_vgchartz["ano"] = df_vgchartz["release_date"].dt.year.astype('Int64')
df_vgchartz.head()

Unnamed: 0,title,console,genre,publisher,developer,release_date,ano
0,Grand Theft Auto V,PS3,Action,Rockstar Games,Rockstar North,2013-09-17,2013
1,Grand Theft Auto V,PS4,Action,Rockstar Games,Rockstar North,2014-11-18,2014
2,Grand Theft Auto: Vice City,PS2,Action,Rockstar Games,Rockstar North,2002-10-28,2002
3,Grand Theft Auto V,X360,Action,Rockstar Games,Rockstar North,2013-09-17,2013
4,Call of Duty: Black Ops 3,PS4,Shooter,Activision,Treyarch,2015-11-06,2015


##### Seleção dos consoles mais relevantes para a análise e dicionário de mapeamento por fabricante de console

In [91]:
# Esta lista irá filtrar os consoles mais representativos do mercado de games
consoles_validos = ['PS', 'PS2', 'PS3', 'PS4', 'PS5', 'PSP', 'PSP', 'PSV', 
        'XB', 'X360', 'XOne', 'XS', 'XBL', 'Wii', 'WiiU', 'NS', 'DS', '3DS',
       'GBA', 'GB', 'NES', 'SNES', 'N64', 'GC', 'DSiW', 'VB', 'VC', 'GBC',
       'DS', 'GEN', 'SAT', 'DC', 'NG', 'SCD', 'GG', 'MS', 'Linux', 'PC',
        'And', 'iOS', '3DO']


# Dicionário de mapeamento por fabricante de console
fabricante_consoles = {
    # Sony
    'PS': 'Sony', 'PS2': 'Sony', 'PS3': 'Sony',
    'PS4': 'Sony', 'PS5': 'Sony', 'PSP': 'Sony', 'PSV': 'Sony',
    
    # Microsoft
    'XB': 'Microsoft', 'X360': 'Microsoft', 'XOne': 'Microsoft', 
    'XS': 'Microsoft', 'XBL': 'Microsoft', 

    # Nintendo
    'Wii': 'Nintendo', 'WiiU': 'Nintendo', 'NS': 'Nintendo',
    'DS': 'Nintendo', '3DS': 'Nintendo', 'GBA': 'Nintendo',
    'GB': 'Nintendo', 'NES': 'Nintendo', 'SNES': 'Nintendo',
    'N64': 'Nintendo', 'GC': 'Nintendo', 'DSiW' :'Nintendo',
    'VB' :'Nintendo', 'VC' :'Nintendo', 'GBC' :'Nintendo',
    'DS' :'Nintendo',

    # Sega
    'GEN': 'Sega', 'SAT': 'Sega', 'DC': 'Sega', 'DC': 'Sega',
    'MS': 'Sega',

    # Sega
    'NG': 'SNK', 'SCD': 'SNK', 'GG': 'SNK'
}

In [89]:
df_vgchartz_filtrado = df_vgchartz[df_vgchartz['console'].isin(consoles_validos)].copy()

In [90]:
df_vgchartz_filtrado.head()

Unnamed: 0,title,console,genre,publisher,developer,release_date,ano
0,Grand Theft Auto V,PS3,Action,Rockstar Games,Rockstar North,2013-09-17,2013
1,Grand Theft Auto V,PS4,Action,Rockstar Games,Rockstar North,2014-11-18,2014
2,Grand Theft Auto: Vice City,PS2,Action,Rockstar Games,Rockstar North,2002-10-28,2002
3,Grand Theft Auto V,X360,Action,Rockstar Games,Rockstar North,2013-09-17,2013
4,Call of Duty: Black Ops 3,PS4,Shooter,Activision,Treyarch,2015-11-06,2015


In [92]:
# Criação da coluna 'fabricante' para agrupar os consoles por empresa desenvolvedora
# (Sony, Microsoft, Nintendo, Sega ou Outros). Essa padronização facilita análises por fabricante.
df_vgchartz_filtrado['fabricante'] = df_vgchartz_filtrado['console'].map(fabricante_consoles).fillna('Outros')

In [98]:
df_vgchartz_filtrado.head()

Unnamed: 0,title,console,genre,publisher,developer,release_date,ano,fabricante
0,Grand Theft Auto V,PS3,Action,Rockstar Games,Rockstar North,2013-09-17,2013,Sony
1,Grand Theft Auto V,PS4,Action,Rockstar Games,Rockstar North,2014-11-18,2014,Sony
2,Grand Theft Auto: Vice City,PS2,Action,Rockstar Games,Rockstar North,2002-10-28,2002,Sony
3,Grand Theft Auto V,X360,Action,Rockstar Games,Rockstar North,2013-09-17,2013,Microsoft
4,Call of Duty: Black Ops 3,PS4,Shooter,Activision,Treyarch,2015-11-06,2015,Sony


In [94]:
df_vgchartz_filtrado.isnull().sum()

title              0
console            0
genre              0
publisher          0
developer         17
release_date    6648
ano             6648
fabricante         0
dtype: int64

In [96]:
# Adiciona o conteúdo de "Não informado" nos 17 registros nulos da coluna develope
df_vgchartz_filtrado["developer"] = df_vgchartz_filtrado["developer"].fillna("Não informado")

In [100]:
# Remove linhas com valores nulos nas colunas release_date e ano
df_vgchartz_filtrado = df_vgchartz_filtrado.dropna(subset=['release_date', 'ano']).copy()

In [101]:
df_vgchartz_filtrado.isnull().sum()

title           0
console         0
genre           0
publisher       0
developer       0
release_date    0
ano             0
fabricante      0
dtype: int64

In [102]:
df_vgchartz_filtrado.head()

Unnamed: 0,title,console,genre,publisher,developer,release_date,ano,fabricante
0,Grand Theft Auto V,PS3,Action,Rockstar Games,Rockstar North,2013-09-17,2013,Sony
1,Grand Theft Auto V,PS4,Action,Rockstar Games,Rockstar North,2014-11-18,2014,Sony
2,Grand Theft Auto: Vice City,PS2,Action,Rockstar Games,Rockstar North,2002-10-28,2002,Sony
3,Grand Theft Auto V,X360,Action,Rockstar Games,Rockstar North,2013-09-17,2013,Microsoft
4,Call of Duty: Black Ops 3,PS4,Shooter,Activision,Treyarch,2015-11-06,2015,Sony


In [105]:
df_vgchartz_filtrado.rename(columns={
    'title': 'Titulo',
    'console': 'Console',
    'genre': 'Genero',
    'publisher': 'Publicadora',
    'developer': 'Desenvolvedora',
    'release_date': 'Data_Lancamento',
    'ano': 'Ano',
    'fabricante': 'Fabricante'
}, inplace=True)

In [109]:
df_vgchartz_filtrado.head()

Unnamed: 0,Titulo,Console,Genero,Publicadora,Desenvolvedora,Data_Lancamento,Ano,Fabricante
0,Grand Theft Auto V,PS3,Action,Rockstar Games,Rockstar North,2013-09-17,2013,Sony
1,Grand Theft Auto V,PS4,Action,Rockstar Games,Rockstar North,2014-11-18,2014,Sony
2,Grand Theft Auto: Vice City,PS2,Action,Rockstar Games,Rockstar North,2002-10-28,2002,Sony
3,Grand Theft Auto V,X360,Action,Rockstar Games,Rockstar North,2013-09-17,2013,Microsoft
4,Call of Duty: Black Ops 3,PS4,Shooter,Activision,Treyarch,2015-11-06,2015,Sony


##### Criação de tabela calendário

In [111]:
data_min = df_vgchartz_filtrado['Data_Lancamento'].min()
data_max = df_vgchartz_filtrado['Data_Lancamento'].max()

df_tabela_calendario = pd.DataFrame({
    'date': pd.date_range(start=data_min, end=data_max, freq='D')
})

tabela_calendario['ano'] = tabela_calendario['date'].dt.year
tabela_calendario['mes'] = tabela_calendario['date'].dt.month
tabela_calendario['nome_mes'] = tabela_calendario['date'].dt.month_name()
tabela_calendario['trimestre'] = tabela_calendario['date'].dt.to_period('Q').astype(str)
tabela_calendario['dia_da_semana'] = tabela_calendario['date'].dt.day_name()
tabela_calendario['fim_de_semana'] = tabela_calendario['date'].dt.weekday >= 5
tabela_calendario['ano_mes'] = tabela_calendario['date'].dt.to_period('M').astype(str)

In [107]:
df_vgchartz_filtrado.to_csv("df_vgchartz_filtrado.csv", index=False, encoding="utf-8")
df_tabela_calendario.to_csv("tabela_calendario.csv", index=False, encoding="utf-8")