In [2]:
import os
import requests as re
from bs4 import BeautifulSoup
import pandas as pd
import json
from datetime import datetime
import time
from tqdm.notebook import tqdm_notebook

In [114]:
def get_api_bgg_game_data(unique_ids):
    '''
    This function retrieves aggregated game information via the boardgamegeek.com API
    
    ----------
    
    unique_ids - list of unique boardgames ids (it is best to feed a list of no more than 50 id)
    
    ----------
    Using API BoardGameGeek:
    https://api.geekdo.com/xmlapi/boardgame/37111?stats=1&pricehistory=1&marketplace=1&comments=1

    base - https://api.geekdo.com/xmlapi/boardgame
    game - /37111 - gameid
    params - ?stats=1&pricehistory=1&marketplace=1&comments=1
    comments: Show brief user comments on games (set it to 1, absent by default)
    stats: Include game statistics (set it to 1, absent by default)
    historical: Include historical game statistics (set it to 1, absent by default) - Use from/end parameters to set starting and ending dates. Returns all data starting from 2006-03-18.
    from: Set the start date to include historical data (format: YYYY-MM-DD, absent by default )
    to: Set the end date to include historical data (format: YYYY-MM-DD, absent by default )
    pricehistory: retrieve the marketplace history for this item (set it to 1, absent by default)
    marketplace: retrieve the current marketplace listings (set it to 1, absent by default)
    '''
    url_id = ''
    for i in range(len(unique_ids)):
        if i == 0:
            url_id = str(unique_ids[i])
        else:
            url_id += ',' + str(unique_ids[i])

    api_boardgame = 'https://api.geekdo.com/xmlapi/boardgame/'
    api_params = '?stats=1&pricehistory=1&marketplace=1&comments=1'
    r = re.get(api_boardgame + url_id + api_params)
    soup = BeautifulSoup(r.text, 'xml')
    list_bg = soup.find_all('boardgame')
    for bg in list_bg:
        boardgame_id = bg.get('objectid')
#         dir_path = '../Board-Game-Data/scrapped/bgg_boardgame'
        dir_path = './data/scraped/'
        with open(dir_path + '/' + str(boardgame_id) + '.xml', 'w', encoding='utf-8') as f:
            f.write(str(bg))
    return list_bg


In [74]:
new_bgg_boardgames = list(set(bgg_gamelist) - set(list_bg))
list_of_lists_of_ids = []
for it in range(len(new_bgg_boardgames)//50+1):
    if it < len(new_bgg_boardgames)//50+1:
        list_of_lists_of_ids.append(new_bgg_boardgames[it*50:50+it*50])
    else:
        list_of_lists_of_ids.append(new_bgg_boardgames[it*50:])

In [77]:
for i in tqdm_notebook(range(len(list_of_lists_of_ids))):
    get_api_bgg_game_data(list_of_lists_of_ids[i])
    time.sleep(0.33)

  0%|          | 0/2698 [00:00<?, ?it/s]

In [3]:
content = os.listdir('../Board-Game-Data/scraped/bgg_boardgame/')
list_bg = []
for file in content:
    try:
        list_bg.append(int(file.split('.')[0]))
    except:
        pass

In [124]:
pd.DataFrame(list_bg, columns=['bgg_id']).to_csv('../Board-Game-Data/list_of_loaded_bgg_boardgames.csv', index=False)