# Game Analyzer

## Import Modules

In [60]:
import pandas as pd
import numpy as np
from pandas import DataFrame
import re
import os

## Initializing DataFrame

In [61]:
df = pd.read_csv('data/raw_games.csv')
df = df.replace(['[]', 'releases on TBD'], [np.nan, np.nan])
df = df.dropna()

## Generate Test DataFrames

### Main DataFrame

In [62]:
df['Genres'] = df['Genres'].apply(lambda x: '-'.join(map(lambda y: y[1:len(y) - 1], re.findall(r"'[\w& ]*'", x))))
df['Plays'] = df['Plays'].apply(lambda x: int(float(x[:len(x) - 1]) * 1000) if (x[-1] == 'K' or x[-1] == 'k') else int(x))
df = df.loc[~df['Title'].str.contains(',')]
df.to_csv('data/games.csv', index=False)

### TestReadFromCSV

In [63]:
df = df.drop_duplicates()
os.makedirs('solution', exist_ok=True)
df.to_csv('solution/testReadFromCSV.csv', index=False)

### TestSortByRating

In [64]:
df.sort_values(by=['Rating'], ascending=False).to_csv('solution/testSortByRating.csv', index=False)

### TestTopMostPlayedGames

In [65]:
df[df['Plays'] >= 13000].sort_values(by=['Plays'], ascending=False).to_csv('solution/testTopMostPlayedGames.csv', index=False)

### TestCountReleasedGames

In [66]:
print('2023:', len(df[df['Release Date'].str.contains(r'\d*-\w*-23')].index))
print('2020:', len(df[df['Release Date'].str.contains(r'\d*-\w*-20')].index))

2023: 22
2020: 51


### TestInterestGenreInYear

In [79]:
print('2021:', df[df['Release Date'].str.contains(r'\d*-\w*-21') & df['Genres'].str.contains('Shooter')].sum()['Plays'])
print('2016:', df[df['Release Date'].str.contains(r'\d*-\w*-16') & df['Genres'].str.contains('Simulator')].sum()['Plays'])

2021: 42990
2016: 62124
2010: 167745


### TestInterestGenreChangeWithinYears

In [89]:
date_edited = df.copy()
date_edited['Release Date'] = date_edited['Release Date'].apply(lambda x: int(x.split('-')[2]))
print('2010 to 2019 Adventure:', date_edited[(date_edited['Release Date'] < 20) & (date_edited['Release Date'] >= 10) & (date_edited['Genres'].str.contains('Adventure'))].groupby('Release Date').aggregate({'Plays': 'sum'}).to_dict())
x = date_edited[(date_edited['Release Date'] < 23) & (date_edited['Release Date'] >= 10) & (date_edited['Genres'].str.contains('Shooter'))].groupby('Release Date').aggregate({'Plays': 'sum'}).to_dict()
for k, v in x['Plays'].items():
    print(f'expected.put({k}, {v});')

2010 to 2019 Adventure: {'Plays': {10: 167745, 11: 236000, 12: 153292, 13: 284560, 14: 146924, 15: 212025, 16: 209248, 17: 266619, 18: 248054, 19: 264180}}
expected.put(10, 103587);
expected.put(11, 81500);
expected.put(12, 94700);
expected.put(13, 125700);
expected.put(14, 85589);
expected.put(15, 71300);
expected.put(16, 115620);
expected.put(17, 116700);
expected.put(18, 41961);
expected.put(19, 75300);
expected.put(20, 67600);
expected.put(21, 42990);
expected.put(22, 21542);
