In [185]:
import pandas as pd
import json
import ast
import pprint
pp = pprint.PrettyPrinter(indent=2)

In [186]:
import os
root_dir = '..'
games_fn = os.path.join(root_dir, '2019-05-02.csv')
details_fn = os.path.join(root_dir, 'games_detailed_info.csv')
reviews_fn = os.path.join(root_dir, 'bgg-13m-reviews.csv')
placeholder_img_url = 'https://via.placeholder.com/150x150?text=No+Image'

In [187]:
def print_nan_stat(df):
    for c in df.columns.values:
        vals = df[[c]]
        num_na = int(vals.isna().sum())
        if num_na > 0:
            print('{}: {} NaN values'.format(c, num_na))

In [188]:
games = pd.read_csv(games_fn)
#games = games.set_index('ID') # don't do this
games = games.drop(columns=['Bayes average', 'URL'])
# handle NaN values that don't affect the ML model
games['Thumbnail'] = games['Thumbnail'].fillna(placeholder_img_url)

# GET /boardgame/ 
#  display:  Rank, Thumbnail, Name, Year 
#  optional: (Average, Users rated)
#  argument for other URLs: ID
#  (Rank can be inferred from the Average so you could argue its useless)
#  (list already sorted by Rank, ascending)

# [TEST ONLY]
# example iteration for generating / paginating the webpage
for i, row in games.head(3).iterrows():
    print("{}. {} ({}) -- {}".format(row['Rank'], row['Name'], row['Year'], i))
#print("\r\n{}".format(games.loc[3439].tolist()))  # needs set_index to be applied
print("\r\n{}".format(games[games['ID']==3439].iloc[0].tolist()))  # image URL originally NaN

1. Gloomhaven (2017) -- 0
2. Pandemic Legacy: Season 1 (2015) -- 1
3. Through the Ages: A New Story of Civilization (2015) -- 2

[3439, 'HeroClix', 2002, 1922, 6.5, 2261, 'https://via.placeholder.com/150x150?text=No+Image']


In [189]:
print("NaN stats below:") # [TEST ONLY]
print_nan_stat(games)

NaN stats below:


In [190]:
# get JSON dict of N entries (for pagination, or None for all) starting from Nth entry
# args: dataframe, starting row index, number of rows, return json or dict?, list of keys to convert val strings to list
# retval: list of dicts, where each dict is 1 row
def get_json_entries(df, start_pos=None, num_rows=None, to_json=True, keyval_list=[]):
    if start_pos == None:
        start_pos = 0
    end_pos = len(df.index) if (num_rows == None) else min(start_pos + num_rows, len(df.index))
    selected = df.iloc[start_pos : end_pos].replace({pd.np.nan: None})
    # all NaN values to be converted to None (client is pure Python)
    # all specified keys to have their vals become Python lists
    row_entries = selected.to_dict(orient='records')
    if len(keyval_list) > 0:
        for i in range(len(row_entries)): # row
            for key in keyval_list: # specified column (key)
                if row_entries[i][key] != None: # null or list
                    row_entries[i][key] = ast.literal_eval(row_entries[i][key])
    return json.dumps(row_entries) if to_json else row_entries

# [TEST ONLY]
get_json_entries(games,1,2,False)    # note: rank starts from 1, start_pos starts from 0

[{'ID': 161936,
  'Name': 'Pandemic Legacy: Season 1',
  'Year': 2015,
  'Rank': 2,
  'Average': 8.64,
  'Users rated': 30921,
  'Thumbnail': 'https://cf.geekdo-images.com/micro/img/KbAKyhbG4qab4r-A_pBjUGvgal0=/fit-in/64x64/pic2452831.png'},
 {'ID': 182028,
  'Name': 'Through the Ages: A New Story of Civilization',
  'Year': 2015,
  'Rank': 3,
  'Average': 8.53,
  'Users rated': 15551,
  'Thumbnail': 'https://cf.geekdo-images.com/micro/img/APvZ_BYOt4ElpIXVl7i6wUp2BvM=/fit-in/64x64/pic2663291.jpg'}]

In [200]:
# GET /board_game/id
#
details = pd.read_csv(details_fn)
#print(details.columns)
details = details[['id','primary','boardgamepublisher','boardgamecategory','minplayers','maxplayers','minage','minplaytime','description','boardgameexpansion','boardgamemechanic','image']]
details.columns = ['ID','Name',   'Publisher',         'Category',         'Min players','Max players','Min age','Min playtime','Description','Expansion',     'Mechanic',         'Thumbnail']
# handle NaN values that don't affect the ML model
details['Thumbnail'] = details['Thumbnail'].fillna(placeholder_img_url)
details_listcat = ['Publisher', 'Category', 'Expansion', 'Mechanic']

# [TEST ONLY]
print_nan_stat(details) 
#details[details['Name']=='Dominion']
#details[details['Name']=='Dominion'].to_dict('record')
#games.sort_values(by=['ID']).head(13)
print("")

# extract a single row from 'details' as JSON
# (function has been designed to allow for selection of multiple rows)
ex = get_json_entries(details[details.isnull().any(axis=1)], 0, 1, False, details_listcat)[0]
json.dumps(ex)

#len(get_json_entries(details, None, None, False, details_listcat)) # check for crashes

Category: 184 NaN values
Description: 1 NaN values
Expansion: 13001 NaN values
Mechanic: 1564 NaN values



'{"ID": 161936, "Name": "Pandemic Legacy: Season 1", "Publisher": ["Z-Man Games, Inc.", "Asterion Press", "Devir", "Filosofia \\u00c9ditions", "G\\u00e9m Klub Kft.", "Hobby Japan", "Jolly Thinkers", "Korea Boardgames co., Ltd.", "Lacerta", "Lifestyle Boardgames Ltd", "MINDOK"], "Category": ["Environmental", "Medical"], "Min players": 2, "Max players": 4, "Min age": 13, "Min playtime": 60, "Description": "Pandemic Legacy is a co-operative campaign game, with an overarching story-arc played through 12-24 sessions, depending on how well your group does at the game. At the beginning, the game starts very similar to basic Pandemic, in which your team of disease-fighting specialists races against the clock to travel around the world, treating disease hotspots while researching cures for each of four plagues before they get out of hand.&#10;&#10;During a player\'s turn, they have four actions available, with which they may travel around in the world in various ways (sometimes needing to disca

In [None]:
# GET /usage