In [1]:
import pandas as pd

games = pd.read_csv('../datasets/metacritic_games.csv')

games.drop(['Unnamed: 0'], axis=1, inplace=True)
games = games.fillna('')

games.head()

Unnamed: 0,Title,platform,release_date,summary,meta_score,user_review,Wikipedia,Genre,Modes,MetacriticReviews
0,Pokemon Mystery Dungeon: Explorers of Darkness,DS,2008-04-20T00:00:00Z,"In this pair of action-packed adventures, play...",59,8.5,Pokémon Mystery Dungeon: Explorers of Time and...,Roguelike,['Single player'],"[('GameSpy', ""If you've wanted to play Rogue-l..."
1,Z.H.P. Unlosing Ranger vs Darkdeath Evilman,PSP,2010-10-25T00:00:00Z,"Known as ZettaiHero Keikakuin Japan, Z.H.P. is...",81,9.7,Z.H.P. Unlosing Ranger VS Darkdeath Evilman is...,Tactical role-playing game,['Single-player'],"[('GameSpy', ""If you've wanted to play Rogue-l..."
2,Elemental Gearbolt,PlayStation,1998-06-30T00:00:00Z,In a fantastic world divided by class and race...,76,tbd,"Elemental Gearbolt, full title Genseikyokō Sei...",Light gun shooter,"['Single-player', 'multiplayer']","[('Game Informer', ""Elemental Gearbolt is just..."
3,King Arthur,Xbox,2004-11-16T00:00:00Z,Live the true story behind the epic legend of ...,61,tbd,King Arthur is an action-adventure game based ...,Action-adventure,"['Single-player', 'multiplayer']","[('TeamXbox', 'Taking things into consideratio..."
4,King Arthur,GameCube,2004-11-18T00:00:00Z,Live the true story behind the epic legend of ...,60,tbd,King Arthur is an action-adventure game based ...,Action-adventure,"['Single-player', 'multiplayer']","[('Computer Games Magazine', 'In the end, it d..."


In [2]:
# In some entries the user review is marked as "tbd", this isn't compatible with Solr's FloatPointField type so it should be stored as "NaN" instead
def get_user_review(user_review):
    if user_review == "tbd":
        return "NaN"
    return user_review

def get_review_specific_json(game, platform):
    reviews = (games[(games.Title == game) & (games.platform == platform)]).iloc[0].MetacriticReviews
    json = [{
        "reviewer": review[0],
        "review": review[1]
    } for review in eval(reviews)
    ]
    return json

def get_platform_specific_json(game):
    platforms = list(games[games.Title == game]["platform"])
    json = [{
        "platform": (games[(games.Title == game) & (games.platform == platform)]).iloc[0].platform,
        "release_date": (games[(games.Title == game) & (games.platform == platform)]).iloc[0].release_date,
        "meta_score": str((games[(games.Title == game) & (games.platform == platform)]).iloc[0].meta_score),
        "user_review": get_user_review((games[(games.Title == game) & (games.platform == platform)]).iloc[0].user_review),
        "reviews": get_review_specific_json(game, platform)
    } for platform in platforms
    ]
    return json

def get_game_json(game):
    json = {
        "title": game,
        "summary": (games[games.Title == game]).iloc[0].summary,
        "wikipedia": (games[games.Title == game]).iloc[0].Wikipedia,
        "genre": (games[games.Title == game]).iloc[0].Genre,
        "playing_modes": (games[games.Title == game]).iloc[0].Modes,
        "platforms": get_platform_specific_json(game)
    }
    return json

In [3]:
from json import dumps

json = []
for game in games['Title'].unique():
    json.append(get_game_json(game))

json = dumps(json, indent=4)
with open("../search-engine/games_collection.json", "w") as outfile:
    outfile.write(json)