In [26]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import ast
from datetime import datetime
import json

from pathlib import Path
dataPath = Path("../data")
imagePath = Path("image")

# data processing for sankey

## load data 

In [27]:
games_clean = pd.read_csv(dataPath / "games_data_clean.csv")

## convert types

In [28]:
# Convert the string representation to an array of strings
games_clean = games_clean.copy()
games_clean.loc[:,'Genres'] = games_clean['Genres'].apply(lambda x : ast.literal_eval(x))
games_clean.loc[:,'Platforms'] = games_clean['Platforms'].apply(lambda x : ast.literal_eval(x))
games_clean.loc[:,'Developers'] = games_clean['Developers'].apply(lambda x : ast.literal_eval(x))

# convert numerical value as int
games_clean.loc[:,'Plays'] = games_clean['Plays'].apply(lambda x : int(x))
games_clean.loc[:,'Playing'] = games_clean['Playing'].apply(lambda x : int(x))
games_clean.loc[:,'Backlogs'] = games_clean['Backlogs'].apply(lambda x : int(x))
games_clean.loc[:,'Wishlist'] = games_clean['Wishlist'].apply(lambda x : int(x))
games_clean.loc[:,'Reviews'] = games_clean['Reviews'].apply(lambda x : int(x))

# convert rating as float
games_clean.loc[:,'Rating'] = games_clean['Rating'].apply(lambda x : float(x))

# covert the relaease dtae in dta format
def convert_to_date(date_str):
    if date_str == 'TBD':
        return None  # Return None for "TBD" dates
    else:
        return datetime.strptime(date_str, '%Y-%m-%d').date()
    
games_clean.loc[:,'Release_Date'] = games_clean['Release_Date'].apply(lambda x : convert_to_date(x))

## group by years

In [29]:
games_by_year = games_clean.copy()

# Extract the year from the Release_Date column
games_by_year['Year'] = pd.to_datetime(games_by_year['Release_Date']).dt.year
# Remove when release date is > 2023
games_by_year = games_by_year[games_by_year['Year'] <= 2023]

games_by_year



Unnamed: 0,Title,Release_Date,Developers,Summary,Platforms,Genres,Rating,Plays,Playing,Backlogs,Wishlist,Reviews,Year
0,Elden Ring,2022-02-25,"[FromSoftware, Bandai Namco Entertainment]","Elden Ring is a fantasy, action and open world...","[Windows PC, PlayStation 4, Xbox One, PlayStat...","[Adventure, RPG]",4.5,21000,4100,5600,5500,3000,2022
1,The Legend of Zelda: Breath of the Wild,2017-03-03,"[Nintendo, Nintendo EPD Production Group No. 3]",The Legend of Zelda: Breath of the Wild is the...,"[Wii U, Nintendo Switch]","[Adventure, Puzzle]",4.4,35000,3100,5600,3000,3000,2017
2,Hades,2018-12-07,[Supergiant Games],A rogue-lite hack and slash dungeon crawler in...,"[Windows PC, Mac, PlayStation 4, Xbox One, Pla...","[Adventure, Brawler, Indie, RPG]",4.3,25000,3500,7300,4000,2100,2018
3,Hollow Knight,2017-02-24,[Team Cherry],A 2D metroidvania with an emphasis on close co...,"[Windows PC, Mac, Linux, Nintendo Switch]","[Adventure, Indie, Platform]",4.4,25000,2700,9600,2600,2100,2017
4,Undertale,2015-09-15,"[tobyfox, 8-4]","A small child falls into the Underground, wher...","[Windows PC, Mac, Linux, PlayStation 4, Xbox O...","[Adventure, Indie, RPG, Turn Based Strategy]",4.2,32000,728,5700,2100,2500,2015
...,...,...,...,...,...,...,...,...,...,...,...,...,...
27998,FIFA Manager 14,2013-10-24,"[Bright Future, Electronic Arts]",FIFA Manager 14: Legacy Edition is a re-releas...,[Windows PC],[Sport],2.5,6,0,4,0,0,2013
27999,Japanese Rail Sim 3D Monorail Trip to Okinawa,2016-08-25,[Sonic Powered],"This game is set on the Yui Rail, a railway lo...",[Nintendo 3DS],[Simulator],,3,0,3,1,0,2016
28000,Just Snowboarding,2017-01-25,[Rrndrln Games],Just Snowboarding is a highly authentic snowbo...,[iOS],"[Simulator, Sport]",,0,0,0,0,0,2017
28001,Dragon Spirits,2023-04-25,"[FHNBHJ, indienova]",While constantly working on a video game witho...,[Windows PC],[RPG],,2,0,1,5,0,2023


## create structured data

In [30]:
# Initialize an empty dictionary to hold the final structured data
structured_data = {}

# Group the data by year
grouped = games_by_year.groupby('Year')

for year, group in grouped:

    # Count the occurrences of Genre, Platform, and Developer
    counts = group[['Genres', 'Platforms', 'Developers']].explode('Genres').explode('Platforms').explode('Developers').value_counts().to_dict()

    # Convert the counts to the desired format
    year_data = [{'Genre' : name[0], 'Platform' : name[1], 'Developer' : name[2], 'value' : value} for name, value in counts.items()]
    
    # Add the year data to the structured data
    structured_data[year] = year_data


## create JSON

In [31]:
# Convert the structured data to a JSON string
json_data = json.dumps(structured_data, indent=4)

# Save the JSON string to a file
with open('games_by_year_by_all.json', 'w') as f:
    f.write(json_data)