# This is my Notebook to test collecting the data with the BGG API

## Setup

In [1]:
import requests
import pandas as pd
import xmltodict
import json


## Import data

In [2]:
url_things = 'https://boardgamegeek.com/xmlapi2/thing?'

game_ids = pd.read_csv('data/boardgames_ranks.csv')
game_ids = game_ids.id
game_ids

0         224517
1         161936
2         342942
3         174430
4         233078
           ...  
162393    438989
162394    438991
162395    438992
162396    438993
162397    438995
Name: id, Length: 162398, dtype: int64

In [7]:
# convert the pandas series to a list for looping
game_ids_unupdated = [str(x) for x in game_ids.to_list()]
game_details = pd.DataFrame(columns=['id',
                                     #'alt_name', # maybe include, so the searching goes easier
                                     'description',
                                     'yearpublished',
                                     'minplayers',
                                     'maxplayers',
                                     'community_best_with',
                                     'community_recommended_with',
                                     'playingtime',
                                     'minplaytime',
                                     'maxplaytime',
                                     'minage',
                                     'community_minage',
                                     #'language_dependency', # superfluous? mostly focus on english games
                                     'boardgamecategories', 
                                     'boardgamemechanics',
                                     'boardgamefamilys',
                                     #'boardgameaccessories', # superfluous? probably has high correlation with the family and mechanics
                                     #'boardgameimplementations', # superfluous?
                                     'boardgamedesigners',
                                     'boardgameartists',
                                     #'boardgamepublishers', # superfluous?
                                     #'usersrated', # already in the dataset
                                     #'average', # already in the dataset
                                     #'bayesaverage', # already in the dataset
                                     #'ranks', # already in the dataset
                                     'stddev',
                                     'median',
                                     'owned',
                                     'trading',
                                     'wanting',
                                     'wishing',
                                     'numcomments',
                                     'numweights',
                                     'averageweight'
                                     ])

In [4]:
def _extract_details_from_link_element(link_element, detail_type):
    pass # these infos are all in the same element and can occur multiple times. The different detail_types are: '

In [8]:
def _extract_info_into_dataframe(df, item_dict):
    # save the info into a dictionary
    info = {
        'id': item_dict['@id'],
        #'name': item_dict[item], # included in dataset
        #'alt_name', # maybe include, so the searching goes easier
        'description': item_dict['description'],
        'yearpublished': item_dict['yearpublished']['@value'],
        'minplayers': item_dict['minplayers']['@value'],
        'maxplayers': item_dict['maxplayers']['@value'],
        'community_best_with': item_dict['poll-summary']['result'][0]['@value'],
        'community_recommended_with': item_dict['poll-summary']['result'][1]['@value'],
        'playingtime': item_dict['playingtime']['@value'],
        'minplaytime': item_dict['minplaytime']['@value'],
        'maxplaytime': item_dict['maxplaytime']['@value'],
        'minage': item_dict['minage']['@value'],
        #'community_minage': item_dict[item], # hassle to extract
        #'language_dependency': item_dict[item], # superfluous? mostly focus on english games
        'boardgamecategories': '', 
        'boardgamemechanics': '',
        'boardgamefamilys': '',
        #'boardgameaccessories': item_dict[item], # superfluous? probably has high correlation with the family and mechanics
        #'boardgameimplementations': item_dict[item], # superfluous?
        'boardgamedesigners': '',
        'boardgameartists': '',
        #'boardgamepublishers': item_dict[item], # superfluous?
        #'usersrated': item_dict[item], # already in the dataset
        #'average': item_dict[item], # already in the dataset
        #'bayesaverage': item_dict[item], # already in the dataset
        #'ranks': item_dict[item], # already in the dataset
        'stddev': item_dict['statistics']['ratings']['stddev']['@value'],
        'median': item_dict['statistics']['ratings']['median']['@value'],
        'owned': item_dict['statistics']['ratings']['owned']['@value'],
        'trading': item_dict['statistics']['ratings']['trading']['@value'],
        'wanting': item_dict['statistics']['ratings']['wanting']['@value'],
        'wishing': item_dict['statistics']['ratings']['wishing']['@value'],
        'numcomments': item_dict['statistics']['ratings']['numcomments']['@value'],
        'numweights': item_dict['statistics']['ratings']['numweights']['@value'],
        'averageweight': item_dict['statistics']['ratings']['averageweight']['@value']
    }
    return info
    

In [None]:
# prepare and execute the api call
form_values = {
    'id':'', # Specifies the id of the thing(s) to retrieve. To request multiple things with a single query, NNN can specify a comma-delimited list of ids. Maximum 20.
    #'type':'', # Specifies that, regardless of the type of thing asked for by id, the results are filtered by the THINGTYPE(s) specified. Multiple THINGTYPEs can be specified in a comma-delimited list.
    #'versions':'1', # Returns version info for the item.
    #'videos':'1', # Returns videos for the item.
    'stats':'1', # Returns ranking and rating stats for the item.
    #'historical':'1', # Not currently supported. Returns historical data over time. See page parameter.
    #'marketplace':'1', # Returns marketplace data.
    #'comments':'1', # Returns all comments about the item. Also includes ratings when commented. See page parameter.
    #'ratingcomments':'1', # Returns all ratings for the item. Also includes comments when rated. See page parameter. The ratingcomments and comments parameters cannot be used together, as the output always appears in the <comments> node of the XML; comments parameter takes precedence if both are specified. Ratings are sorted in descending rating value, based on the highest rating they have assigned to that item (each item in the collection can have a different rating).
    #'page':'1', # Defaults to 1, controls the page of data to see for historical info, comments, and ratings data.
    #'pagesize':'10', # Set the number of records to return in paging. Minimum is 10, maximum is 100.
    #'from':'', # Not currently supported.
    #'to':'' # Not currently supported.
}
max_ids_per_call = 20
# while game_ids_unupdated != []:
while True:
    ids_to_update = ",".join(game_ids_unupdated[:max_ids_per_call])  # Get first 20 items and join with ','
    del game_ids_unupdated[:20]  # Remove them from the original list
    # update the API parameter to get the current id's
    form_values.update({'id':ids_to_update})
    # get the info from BGG
    response = requests.get(url_things, form_values)

    # save the info into the dataframe
    if response.status_code == 200:
        info = xmltodict.parse(response.text)  
    else:
        game_ids_unupdated.append(ids_to_update.split(','))
        continue
    for i in range(max_ids_per_call):
        info_current_game = _extract_info_into_dataframe(game_details, info['items']['item'][i])
        df_current_game = pd.DataFrame(info_current_game, index = [int(info_current_game['id'])])
        game_details = pd.concat([game_details, df_current_game])
    break


{'id': '180263', 'description': "It's the early 20th century. You have decided to sail back to the newly discovered seventh continent to attempt to lift the terrible curse that has struck you since your return from the previous expedition.&#10;&#10;In The 7th Continent, a solo or cooperative &quot;choose-your-own-adventure&quot; exploration board game, you choose a character and begin your adventure on your own or with a team of other explorers. Inspired by the Fighting Fantasy book series, you will discover the extent of this wild new land through a variety of terrain and event cards. In a land fraught with danger and wonders, you have to use every ounce of wit and cunning to survive, crafting tools, weapons, and shelter to ensure your survival.&#10;&#10;Unlike most board games, it will take you many, MANY hours of exploring and searching the seventh continent until you eventually discover how to remove the curse(s)...or die trying.&#10;&#10;The 7th Continent features an easy saving s

In [19]:
game_details.head()

Unnamed: 0,id,description,yearpublished,minplayers,maxplayers,community_best_with,community_recommended_with,playingtime,minplaytime,maxplaytime,...,boardgameartists,stddev,median,owned,trading,wanting,wishing,numcomments,numweights,averageweight
271055,271055,Dwellings of Eldervale is an epic worker place...,2020,1,5,Best with 3 players,Recommended with 1–5 players,150,60,150,...,,1.48478,0,10980,119,811,6951,1604,343,3.242
217372,217372,"In The Quest for El Dorado, players take the r...",2017,2,4,"Best with 2, 4 players",Recommended with 2–4 players,60,30,60,...,,1.09493,0,45937,415,1091,8415,4343,717,1.9428
180263,180263,It's the early 20th century. You have decided ...,2017,1,4,Best with 1–2 players,Recommended with 1–3 players,1000,5,1000,...,,1.70137,0,38229,1084,980,11079,4752,632,2.8987
218417,218417,Gravehold remains the last bastion of The Worl...,2017,1,4,Best with 2 players,Recommended with 1–4 players,60,60,60,...,,1.28198,0,13639,211,209,1397,1039,156,2.9231
205896,205896,The great and forgotten Kami have returned fro...,2018,3,5,Best with 4–5 players,Recommended with 3–5+ players,120,90,120,...,,1.6094,0,28123,348,719,7567,3625,685,3.2964


In [39]:
response = requests.get(url_things, {'id':'224517','stats':'1'})
info = xmltodict.parse(response.text)
item_dict= info['items']['item']
item_dict['statistics']['ratings']['stddev']['@value']

'1.4173'

In [96]:
response.text

'<?xml version="1.0" encoding="utf-8"?><items termsofuse="https://boardgamegeek.com/xmlapi/termsofuse"><item type="boardgame" id="224517">\n         <thumbnail>https://cf.geekdo-images.com/x3zxjr-Vw5iU4yDPg70Jgw__thumb/img/o18rjEemoWaVru9Y2TyPwuIaRfE=/fit-in/200x150/filters:strip_icc()/pic3490053.jpg</thumbnail>\n      <image>https://cf.geekdo-images.com/x3zxjr-Vw5iU4yDPg70Jgw__original/img/FpyxH41Y6_ROoePAilPNEhXnzO8=/0x0/filters:format(jpeg)/pic3490053.jpg</image>\n                                     \t\t\t\t\n\t\t\t\t<name type="primary" sortindex="1" value="Brass: Birmingham" />\n\t\t\t\n\t\t\t\t\t\t                               \t\t\t\t\n\t\t\t\t<name type="alternate" sortindex="1" value="Brass. Бирмингем" />\n\t\t\t    \t\t\t\t\n\t\t\t\t<name type="alternate" sortindex="1" value="Brass. Бірмінгем" />\n\t\t\t    \t\t\t\t\n\t\t\t\t<name type="alternate" sortindex="1" value="ブラス：バーミンガム" />\n\t\t\t    \t\t\t\t\n\t\t\t\t<name type="alternate" sortindex="1" value="工业革命：伯明翰(Chinese ed

In [5]:

o = xmltodict.parse(response.text)
# o['items']['item'][0]['minplayers']['@value']
o['items']['item']

[{'@type': 'boardgame',
  '@id': '224517',
  'thumbnail': 'https://cf.geekdo-images.com/x3zxjr-Vw5iU4yDPg70Jgw__thumb/img/o18rjEemoWaVru9Y2TyPwuIaRfE=/fit-in/200x150/filters:strip_icc()/pic3490053.jpg',
  'image': 'https://cf.geekdo-images.com/x3zxjr-Vw5iU4yDPg70Jgw__original/img/FpyxH41Y6_ROoePAilPNEhXnzO8=/0x0/filters:format(jpeg)/pic3490053.jpg',
  'name': [{'@type': 'primary',
    '@sortindex': '1',
    '@value': 'Brass: Birmingham'},
   {'@type': 'alternate', '@sortindex': '1', '@value': 'Brass. Бирмингем'},
   {'@type': 'alternate', '@sortindex': '1', '@value': 'Brass. Бірмінгем'},
   {'@type': 'alternate', '@sortindex': '1', '@value': 'ブラス：バーミンガム'},
   {'@type': 'alternate',
    '@sortindex': '1',
    '@value': '工业革命：伯明翰(Chinese edition) (2018)'},
   {'@type': 'alternate', '@sortindex': '1', '@value': '工業革命：伯明翰'},
   {'@type': 'alternate', '@sortindex': '1', '@value': '브라스: 버밍엄'}],
  'description': "Brass: Birmingham is an economic strategy game sequel to Martin Wallace's 2007 m