In [1]:
import pandas as pd
import json

PLAYTIME_THRESHOLD = 10

def convert_game(game: object) -> pd.Series:
    game = pd.json_normalize(game)
    game["playtime_forever"] = game["playtime_forever"].apply(lambda p: p / 60)
    game = game.where(lambda p: p["playtime_forever"] > PLAYTIME_THRESHOLD).dropna()
    return game

games = pd.read_json("games.json")
games["games"] = games["games"].apply(convert_game)
games["games"][0]

Unnamed: 0,appid,playtime_forever
3,218620.0,10.8
10,730.0,688.8
13,470220.0,16.4
15,360430.0,33.183333
16,289070.0,28.483333
20,252950.0,2871.683333
22,578080.0,199.066667
25,695290.0,136.433333
41,1089350.0,64.366667


In [2]:
def convert_game_info(gi):
    gi["appid"] = gi["game_info"]["steam_appid"]
    gi["name"] = gi["game_info"]["name"]
    if gi["game_info"]["genres"] is None:
        gi["game_info"]["genres"] = []
    gi["genres"] = list(map(lambda g: g["description"], gi["game_info"]["genres"]))
    if len(gi["reviews"]) > 0:
        gi["reviews"] = round(float(gi["reviews"][0]["weighted_vote_score"]), ndigits=2)
    else:
        return None
    return gi

game_info = pd.read_json("game_info.json")
game_info = game_info.apply(convert_game_info, axis=1)
game_info

Unnamed: 0,game_info,reviews,review_summary,appid,name,genres
0,"{'name': 'Warframe', 'steam_appid': 230410, 'g...",0.52,"{'review_score': 8.0, 'total_positive': 1009, ...",230410.0,Warframe,"[Action, Free to Play]"
1,"{'name': 'Rust', 'steam_appid': 252490, 'genre...",0.91,"{'review_score': 8.0, 'total_positive': 169918...",252490.0,Rust,"[Action, Adventure, Indie, Massively Multiplay..."
2,"{'name': 'Grand Theft Auto: San Andreas', 'ste...",0.52,"{'review_score': 7.0, 'total_positive': 39, 't...",12250.0,Grand Theft Auto: San Andreas,[]
3,"{'name': 'Grand Theft Auto: San Andreas', 'ste...",0.77,"{'review_score': 8.0, 'total_positive': 12549,...",12120.0,Grand Theft Auto: San Andreas,[Action]
4,"{'name': 'Game Dev Tycoon', 'steam_appid': 239...",0.70,"{'review_score': 8.0, 'total_positive': 16966,...",239820.0,Game Dev Tycoon,"[Casual, Indie, Simulation, Strategy]"
...,...,...,...,...,...,...
15821,"{'name': 'The Wind Road 紫塞秋风', 'steam_appid': ...",0.55,"{'review_score': 8.0, 'total_positive': 52, 't...",1264670.0,The Wind Road 紫塞秋风,"[Action, Adventure, Indie, RPG]"
15822,{'name': 'Stifled - Echolocation Horror Myster...,0.59,"{'review_score': 6.0, 'total_positive': 14, 't...",514830.0,Stifled - Echolocation Horror Mystery,"[Action, Adventure, Indie, Simulation]"
15823,"{'name': 'Shoot 1UP', 'steam_appid': 373610, '...",0.62,"{'review_score': 7.0, 'total_positive': 41, 't...",373610.0,Shoot 1UP,"[Action, Indie]"
15824,{'name': 'State of War : Warmonger / 蓝色警戒 (Cla...,0.54,"{'review_score': 8.0, 'total_positive': 51, 't...",748040.0,State of War : Warmonger / 蓝色警戒 (Classic 2000),"[Action, Strategy]"


In [3]:
def convert_to_percentage(game_time):
    total_playtime = sum(game_time)
    playtime_percentages = []
    for playtime in game_time:
        playtime_to_percent = (playtime / total_playtime) * 100
        playtime_percentages.append(playtime_to_percent)
    return playtime_percentages;

for i in range(len(games["games"])):
    games["games"][i]["playtime_forever"] = convert_to_percentage(games["games"][i]["playtime_forever"])

In [79]:
all_genres = set()
for app_genre_list in game_info['genres']:
    if app_genre_list is not None:
        for genre in app_genre_list:
            all_genres.add(genre)
print(unique_genres)

{'Free to Play', 'Sports', 'Accounting', 'Game Development', 'Strategy', 'Racing', 'Gore', 'Short', 'Violent', 'Web Publishing', 'Nudity', 'Design & Illustration', 'Simulation', 'Early Access', 'Movie', 'Documentary', 'Episodic', 'Photo Editing', 'Adventure', 'Tutorial', 'Education', 'Action', 'Massively Multiplayer', 'RPG', 'Animation & Modeling', 'Sexual Content', 'Utilities', 'Video Production', 'Casual', 'Indie', 'Audio Production', 'Software Training'}


In [40]:
game_info.head()

Unnamed: 0,game_info,reviews,review_summary,appid,name,genres
0,"{'name': 'Warframe', 'steam_appid': 230410, 'g...",0.52,"{'review_score': 8.0, 'total_positive': 1009, ...",230410.0,Warframe,"[Action, Free to Play]"
1,"{'name': 'Rust', 'steam_appid': 252490, 'genre...",0.91,"{'review_score': 8.0, 'total_positive': 169918...",252490.0,Rust,"[Action, Adventure, Indie, Massively Multiplay..."
2,"{'name': 'Grand Theft Auto: San Andreas', 'ste...",0.52,"{'review_score': 7.0, 'total_positive': 39, 't...",12250.0,Grand Theft Auto: San Andreas,[]
3,"{'name': 'Grand Theft Auto: San Andreas', 'ste...",0.77,"{'review_score': 8.0, 'total_positive': 12549,...",12120.0,Grand Theft Auto: San Andreas,[Action]
4,"{'name': 'Game Dev Tycoon', 'steam_appid': 239...",0.7,"{'review_score': 8.0, 'total_positive': 16966,...",239820.0,Game Dev Tycoon,"[Casual, Indie, Simulation, Strategy]"


In [55]:
print(game_info["appid"][1], game_info["name"][1], game_info["genres"][1])

252490.0 Rust ['Action', 'Adventure', 'Indie', 'Massively Multiplayer', 'RPG']


In [64]:
# Given the appid of a game, we will get its associated genre
def genre_from_appid(appid):
    return game_info["genres"][(int(game_info[game_info["appid"]==appid].index.values))]
# print(genre_from_appid(252490.0))


['Action', 'Adventure', 'Indie', 'Massively Multiplayer', 'RPG']


In [128]:
print(games["games"][0]["playtime_forever"], games["games"][0]["appid"])

3      0.266718
10    17.010698
13     0.405017
15     0.819500
16     0.703428
20    70.919478
22     4.916177
25     3.369376
41     1.589608
Name: playtime_forever, dtype: float64 3      218620.0
10        730.0
13     470220.0
15     360430.0
16     289070.0
20     252950.0
22     578080.0
25     695290.0
41    1089350.0
Name: appid, dtype: float64


In [132]:
# want to create a dictionary then have it be a column in games df
genre_dict = {}
for genre in list(all_genres):
    genre_dict[genre] = 0

player_list = []
for player in games["player"]:
    player_list.append(genre_dict)

# print(player_list)
games["genre_mapping"] = player_list
print(games["genre_mapping"][0])


{'Free to Play': 0, 'Sports': 0, 'Accounting': 0, 'Game Development': 0, 'Strategy': 0, 'Racing': 0, 'Gore': 0, 'Short': 0, 'Violent': 0, 'Web Publishing': 0, 'Nudity': 0, 'Design & Illustration': 0, 'Simulation': 0, 'Early Access': 0, 'Movie': 0, 'Documentary': 0, 'Episodic': 0, 'Photo Editing': 0, 'Adventure': 0, 'Tutorial': 0, 'Education': 0, 'Action': 0, 'Massively Multiplayer': 0, 'RPG': 0, 'Animation & Modeling': 0, 'Sexual Content': 0, 'Utilities': 0, 'Video Production': 0, 'Casual': 0, 'Indie': 0, 'Audio Production': 0, 'Software Training': 0}


In [162]:
# return game_info["genres"][(int(game_info[game_info["appid"]==appid].index.values))]
def getplaytime(appid, player):
    player_rec = games["games"][player]
    print((player_rec["appid"]==appid).index.values)
    # print(player_rec[(player_rec["appid"]==appid).index.values])
getplaytime(730.0, 0)
# print(games["games"][0])

[ 3 10 13 15 16 20 22 25 41]


In [137]:
# print(games["games"][0])

for players_game in games["games"]:
    # print(players_game)
    for i, appid in enumerate(players_game["appid"]):
        # print(appid, genre_from_appid(appid))
        for genre in genre_from_appid(appid):
            games["genre_mapping"][i][genre] += games["games"][i]["playtime_forever"]
            print(games["genre_mapping"][0])
            # get players playtime forever for that appid
        break
    break
    

KeyError: 218620