# Board games scraping

|Type|Description| Lite | Full |
|-----|-----|-----|-----|
|Boardgames|count of boardgames in bases| 94 053 (632,9Mb) | 106239 (687,6Mb) 141377 |
|Users|count of users in bases| 943 327 (72,2Mb) | 1 665 204 (118,1Mb) |
|Ratings|count of ratings in bases| 12 778 448 (1,86Gb) | 20 186 648 (2,39Gb) |

In [1]:
import requests as re
from bs4 import BeautifulSoup
import pandas as pd
import json
from datetime import datetime
import time
from tqdm.notebook import tqdm_notebook

In [2]:
def bgg_top(n_pages=20, start_page=1):
    '''
    On this script you can get list of top board games from boardgamegeek.com
    -----
    Paramrters
    -----
    n_pages: how many pages you searh, default - 20, because without sighning in on boardgamegeek.com you can load only first 20 pages
    start_page: from this page we start scrapping, default - 1.
    '''
    url_main = 'https://boardgamegeek.com'
    url_searh_boardgames = url_main + '/browse/boardgame'
    url_page = '/page/'
    # n_pages = 20
    game_top_list = pd.DataFrame()
    for i in range(n_pages):
        r = re.get(url_searh_boardgames + url_page + str(start_page+i))
        soup = BeautifulSoup(r.text)
        rows = soup.find_all('tr', id='row_')
        for j, row in enumerate(rows):
            rank = row.find('td', class_='collection_rank').find('a').get('name')
            gameinfo = row.find('div', id='results_objectname'+str(j+1))
            title = gameinfo.find('a').text
            try:
                year = gameinfo.find('span').text
            except:
                year = ''
            try:
                description = row.find('p', class_='smallefont dull').text.strip()
            except:
                description = ''
            link = url_main + gameinfo.find('a').get('href')
            game_id = str(gameinfo.find('a').get('href'))[len('/boardgame/'):]
            game_id = game_id[:game_id.find('/')]
            ratings = row.find_all('td', class_='collection_bggrating')
            rat_list = []
            for i in ratings:
                rat_list.append(i.text.strip())
            geek_rating = rat_list[0]
            avg_rating = rat_list[1]
            num_votes = rat_list[2]

            game = {'rank': rank, 'title': title, 'game_id':game_id, 'description':description, 'year':year, 'link': link, 
                    'geek_rating':geek_rating, 'avg_rating':avg_rating, 'num_votes':num_votes}
            game_top_list = pd.concat([game_top_list, pd.DataFrame([game])])

    return game_top_list

In [3]:
def get_bgg_categories():
    category_list = pd.DataFrame()
    url_main = 'https://boardgamegeek.com'
    url_cat = 'https://boardgamegeek.com/browse/boardgamecategory'
    r = re.get(url_cat)
    soup = BeautifulSoup(r.text)
    table = soup.find('table', class_='forum_table')
    rows = table.find_all('a')
    for row in rows:
        link = url_main + row.get('href')
        category = row.text
        cat = {'category':category, 'link':link}
        category_list = pd.concat([category_list, pd.DataFrame([cat])])
    return category_list

In [105]:
def get_api_bgg_game_data(unique_ids):
    '''
    This function retrieves aggregated game information via the boardgamegeek.com API
    
    ----------
    
    unique_ids - list of unique boardgames ids (it is best to feed a list of no more than 50 id)
    
    ----------
    Using API BoardGameGeek:
    https://api.geekdo.com/xmlapi/boardgame/37111?stats=1&pricehistory=1&marketplace=1&comments=1

    base - https://api.geekdo.com/xmlapi/boardgame
    game - /37111 - gameid
    params - ?stats=1&pricehistory=1&marketplace=1&comments=1
    comments: Show brief user comments on games (set it to 1, absent by default)
    stats: Include game statistics (set it to 1, absent by default)
    historical: Include historical game statistics (set it to 1, absent by default) - Use from/end parameters to set starting and ending dates. Returns all data starting from 2006-03-18.
    from: Set the start date to include historical data (format: YYYY-MM-DD, absent by default )
    to: Set the end date to include historical data (format: YYYY-MM-DD, absent by default )
    pricehistory: retrieve the marketplace history for this item (set it to 1, absent by default)
    marketplace: retrieve the current marketplace listings (set it to 1, absent by default)
    '''
    url_id = ''
    for i in range(len(unique_ids)):
        if i == 0:
            url_id = str(unique_ids[i])
        else:
            url_id += ',' + str(unique_ids[i])

    api_boardgame = 'https://api.geekdo.com/xmlapi/boardgame/'
    api_params = '?stats=1&pricehistory=1&marketplace=1&comments=1'
    r = re.get(api_boardgame + url_id + api_params)
    soup = BeautifulSoup(r.text, 'xml')
    list_bg = soup.find_all('boardgame')
    games = pd.DataFrame()
    for bg in list_bg:
    #     get basic info
        boardgame_id = bg.get('objectid')
        try:
            thumbnail_link = bg.find('thumbnail').text 
        except:
            thumbnail_link = ""
        try:
            image_link = bg.find('image').text 
        except:
            image_link = ""
        try:
            year_published = bg.find('yearpublished').text
        except:
            year_published = ""
        try:
            minplayers = bg.find('minplayers').text
        except:
            minplayers = ""
        try:
            maxplayers = bg.find('maxplayers').text
        except:
            maxplayers = ""
        try:
            minplaytime = bg.find('minplaytime').text
        except:
            minplaytime = ""
        try:
            maxplaytime = bg.find('maxplaytime').text
        except:
            maxplaytime = ""
        try:
            title = bg.find('name', primary='true').text
        except:
            title = ""
        try:
            age = bg.find('age').text
        except:
            age = ""
        try:
            description = bg.find('description').text
        except:
            description = ""
        try:
            main_publisher = bg.find('boardgamepublisher').text
        except:
            main_publisher = ""

        
        publishers = []
        try:
            for i in bg.find_all('boardgamepublisher'):
                publishers.append(i.text)
        except:
            pass
        honors = []
        try:
            for i in bg.find_all('boardgamehonor'):
                honors.append(i.text)
        except:
            pass
        expansions = []
        try:
            for i in bg.find_all('boardgameexpansion'):
                expansions.append(i.text)
        except:
            pass
        accessories = []
        try:
            for i in bg.find_all('boardgameaccessory'):
                accessories.append(i.text)
        except:
            pass
        artists = []
        try:
            for i in bg.find_all('boardgameartist'):
                artists.append(i.text)
        except:
            pass
        mechanics = []
        try:
            for i in bg.find_all('boardgamemechanic'):
                mechanics.append(i.text)
        except:
            pass
        category = []
        try:
            for i in bg.find_all('boardgamecategory'):
                category.append(i.text)
        except:
            pass
        podcast_episodes = []
        try:
            for i in bg.find_all('boardgamepodcastepisode'):
                podcast_episodes.append(i.text)
        except:
            pass
        designers = []
        try:
            for i in bg.find_all('boardgamedesigner'):
                designers.append(i.text)
        except:
            pass
        graphic_designers = []
        try:
            for i in bg.find_all('boardgamegraphicdesigner'):
                graphic_designers.append(i.text)
        except:
            pass
        subdomains = []
        try:
            for i in bg.find_all('boardgamesubdomain'):
                subdomains.append(i.text)
        except:
            pass
        implementations = []
        try:
            for i in bg.find_all('boardgameimplementation'):
                implementations.append(i.text)
        except:
            pass

    #     get info about voting
        suggested_numplayers = {}
        try:
            for i in bg.find('poll').find_all('results'):
                numplayers = i.get('numplayers')
                dict_vote = {}
                for f in i.find_all('result'):
                    dict_vote[f.get('value')] = f.get('numvotes')
                suggested_numplayers[numplayers] = dict_vote
        except:
            pass

    #     get all shorts comments
        comments = {}
        try:
            for comment in bg.find_all('comment'):
                comments[comment.get('username')] = comment.text
        except:
            pass

    #     get statistics info
        stats = bg.find('statistics')
        try:
            users_rated = stats.find('usersrated').text
        except:
            users_rated = ''
        try:
            average_rating = stats.find('average').text
        except:
            average_rating = ''
        try:
            bayes_average_rating = stats.find('bayesaverage').text
        except:
            bayes_average_rating = ''
        try:
            median = stats.find('median').text
        except:
            median = ''
        try:
            stddev = stats.find('stddev').text
        except:
            stddev = ''
        try:
            owned = stats.find('owned').text
        except:
            owned = ''
        try:
            trading = stats.find('trading').text
        except:
            trading = ''
        try:
            wishing = stats.find('wishing').text
        except:
            wishing = ''
        try:
            num_of_comments = stats.find('numcomments').text
        except:
            num_of_comments = ''
        try:
            num_of_weights = stats.find('numweights').text
        except:
            num_of_weights = ''
        try:
            average_weight = stats.find('averageweight').text
        except:
            average_weight = ''
        ranks = {}
        try:
            ranking = stats.find_all('rank')
            for rank in ranking:
                ranks[rank.get('friendlyname')] = rank.get('value')
        except:
            pass
            
    #     get historical sales data
        try:
            sales_history = bg.find('marketplacehistory')
            listings = sales_history.find_all('listing')
            date_format = '%a, %d %b %Y %H:%M:%S +0000'
            marketplace_history = {}
            for i, listing in enumerate(listings):
                listdate = datetime.strptime(listing.find('listdate').text, date_format)
                saledate = datetime.strptime(listing.find('saledate').text, date_format)
                delta = saledate - listdate
                saletime = delta.days*24*60*60 + delta.seconds
                unix_saledate = int(datetime.timestamp(saledate))

                price = listing.find('price').text
                currency = listing.find('price').get('currency')
                condition = listing.find('condition').text

                marketplace_history[i] = {'unix_saledate':unix_saledate, 'saletime':saletime, 
                                         'price':price, 'currency':currency, 'condition':condition}
        except:
            marketplace_history = {}


    #     get real sales data
        try:
            sales_history = bg.find('marketplacelistings')
            listings = sales_history.find_all('listing')
            marketplace_listings = {}
            for i, listing in enumerate(listings):
                listdate = datetime.strptime(listing.find('listdate').text, date_format)
                unix_listdate = int(datetime.timestamp(listdate))

                price = listing.find('price').text
                currency = listing.find('price').get('currency')
                condition = listing.find('condition').text

                marketplace_listings[i] = {'unix_listdate':unix_listdate,
                                         'price':price, 'currency':currency, 'condition':condition}
        except:
            marketplace_listings = {}

        game = {'boardgame_id':boardgame_id, 'title': title, 'year_published':year_published, 'minplayers':minplayers, 'maxplayers':maxplayers, 
                'minplaytime': minplaytime, 'maxplaytime':maxplaytime, 'age':age, 'users_rated':users_rated, 
                'average_rating':average_rating, 'bayes_average_rating':bayes_average_rating, 'median':median, 
                'stddev':stddev, 'owned':owned, 'trading':trading, 'wishing':wishing, 'num_of_comments':num_of_comments, 
                'num_of_weights':num_of_weights, 'average_weight':average_weight, 'ranks':ranks, 
                'main_publisher':main_publisher, 
                'description':description, 'publishers':publishers, 'honors':honors, 'expansions':expansions, 
                'accessories':accessories, 'artists':artists, 'mechanics':mechanics, 'category':category, 
                'designers':designers, 'graphic_designers':graphic_designers, 'subdomains':subdomains, 
                'implementations':implementations, 'suggested_numplayers':suggested_numplayers,
                'podcast_episodes':podcast_episodes, 'comments':comments, 'marketplace_history':marketplace_history, 
                'marketplace_listings':marketplace_listings, 'thumbnail_link': thumbnail_link, 'image_link': image_link
               }
        games = pd.concat([games, pd.DataFrame([game])])
    return games

In [5]:
def get_user_ratings(nickname, sleep=True):
    '''
    This function takes the boardgamegeek.com username as input 
    and returns a pandas DataFrame with all of the user's scores
    '''
    url_coll_main = 'https://api.geekdo.com/xmlapi/collection/'
    params = '?rated=1'
    
    if sleep: 
        r = re.get(url_coll_main + nickname + params)
        time.sleep(14.66)
    time.sleep(0.33)
    r = re.get(url_coll_main + nickname + params)
    soup = BeautifulSoup(r.text, features="xml")
    ratings = pd.DataFrame()
    rows = soup.find_all('item')
    for item in rows:
        boardgame_id = item.get('objectid')
        try:
            title = item.find('name').text
        except:
            title = ''
        try:
            rating = item.find('stats').find('rating').get('value')
        except:
            rating = ''
        try:
            num_of_plays = item.find('numplays').text
        except:
            num_of_plays = ''
        try:
            comment = item.find('comment').text
        except:
            comment = ''

        status = item.find('status')
        own = status.get('own')
        prevowned = status.get('prevowned')
        fortrade = status.get('fortrade')
        want = status.get('want')
        wanttoplay = status.get('wanttoplay')
        wanttobuy = status.get('wanttobuy')
        wishlist = status.get('wishlist') 
        preordered = status.get('preordered')
        last_modified = status.get('lastmodified')


        vote = {'nickname':nickname, 'title': title, 'boardgame_id':boardgame_id, 'rating':rating, 
                'num_of_plays':num_of_plays, 
                    'comment': comment, 'own':own, 'prevowned':prevowned, 'fortrade':fortrade, 
                    'want':want, 'wanttoplay':wanttoplay, 'wanttobuy':wanttobuy, 
                    'wishlist':wishlist, 'preordered':preordered, 'last_modified':last_modified}
        ratings = pd.concat([ratings, pd.DataFrame([vote])])
    return ratings

In [6]:
def get_users_for_country(country):
    '''
    This function allows you to retrieve public information about BoardGameGeek.com users, 
    such as nicknames, links to their profiles and the countries, states and sities listed on their profiles.
    
    Parameter
    ----------
    country: str, function wiil search uers from this country
    '''
    url_main = 'https://boardgamegeek.com/users/page/'
    url_params = '?country='
#     country = 'United States'#'England'

    r = re.get(url_main + '1' + url_params + country)
    soup = BeautifulSoup(r.text)
    last_page = int(soup.find('a', title='last page').text.strip('[]'))

    user_board = pd.DataFrame()

    for page in tqdm_notebook(range(last_page)):
        if page > 0:
            time.sleep(0.33)
            r = re.get(url_main + str(page+1) + url_params + country)
            soup = BeautifulSoup(r.text)
            
        users = soup.find('table', class_='forum_table').find_all('div', class_='avatarblock')
        for user in users:
            link = user.find('div', class_='username').find('a').get('href')
            location = str(user.find('div', class_='location'))
            state_cuty = location[location.find(country):]
            nickname = user.get('data-username')
            user_name = {'link':link, 'nickname':nickname, 'country':country, 'state_cuty': state_cuty}
            user_board = pd.concat([user_board, pd.DataFrame([user_name])])
    return user_board

In [7]:
def get_users_for_country_step_by_step(country, start_p=1, n_pages=5000):
    '''
    Modification function 'get_users_for_country', witch help to get info from countries with lot's of users, 
    like a USA.
    
    This function allows you to retrieve public information about BoardGameGeek.com users, 
    such as nicknames, links to their profiles and the countries, states and sities listed on their profiles.
    
    Parameter
    ----------
    country: str, function wiil search uers from this country
    '''
    url_main = 'https://boardgamegeek.com/users/page/'
    url_params = '?country='
#     country = 'United States'#'England'

    r = re.get(url_main + str(start_p) + url_params + country)
    soup = BeautifulSoup(r.text)
    try: 
        last_page = int(soup.find('a', title='last page').text.strip('[]'))
        if last_page-start_p < n_pages:
            n_pages = last_page-start_p
    except: 
        pass
    user_board = pd.DataFrame()

    for page in tqdm_notebook(range(n_pages)):
        if page > 0:
            time.sleep(0.5)
            r = re.get(url_main + str(page+start_p) + url_params + country)
            soup = BeautifulSoup(r.text)
            
        users = soup.find('table', class_='forum_table').find_all('div', class_='avatarblock')
        for user in users:
            link = user.find('div', class_='username').find('a').get('href')
            location = str(user.find('div', class_='location'))
            state_cuty = location[location.find(country):]
            nickname = user.get('data-username')
            user_name = {'link':link, 'nickname':nickname, 'country':country, 'state_cuty': state_cuty}
            user_board = pd.concat([user_board, pd.DataFrame([user_name])])
    return user_board

# Get top 2000 board games 

In [None]:
top_games = bgg_top(n_pages=20, start_page=1)
# top_games.to_csv('bgg_top_2000_games.csv', index=False)
top_games

# Get main list of board games' categories

In [None]:
categories = get_bgg_categories()
# categories.to_csv('bgg_categories.csv', index=False)
categories

# Scrap users' collections data

In [61]:
%%time
checked = pd.read_csv('checked_nicknames.csv')
nicknames = pd.read_csv('bgg_users_full.csv')
nicknames = list(set(nicknames.nickname) - set(checked.nicknames))
rates = pd.DataFrame()
test_nn = nicknames[:5164]
print(f'Checked {len(checked)}, in progress {len(test_nn)}, left {len(nicknames)-len(test_nn)} nicknames')
for nn in tqdm_notebook(test_nn):
    get_user_ratings(nn, sleep=False)
for i, nn in tqdm_notebook(enumerate(test_nn)):
    rates = pd.concat([rates,get_user_ratings(nn, sleep=False)])
#     if (i+1)%100==0: 
#         rates.to_csv('ratings_tmp.csv', index=False)
        
print(f'{len(rates)} rows added')
checked = pd.concat([checked,pd.DataFrame(test_nn, columns=['nicknames'])])
dt = datetime.now()
rates.to_csv(f'bgg_ratings_{dt.year}_{dt.month}_{dt.day}_{dt.hour}_{dt.minute}_{dt.second}_{len(test_nn)}.csv', index=False)
checked.to_csv(f'checked_nicknames.csv', index=False)

Checked 1664526, in progress 5164, left 0 nicknames


  0%|          | 0/5164 [00:00<?, ?it/s]

0it [00:00, ?it/s]

49827 rows added
CPU times: user 4min 31s, sys: 18.2 s, total: 4min 49s
Wall time: 1h 49min 17s


# Scrap boardgames' data

In [243]:
rats = pd.read_feather('./data/full/bgg_ratings_full.feather')
unique_ids = list(rats.boardgame_id.unique())
boardgame_info = pd.read_csv('./data/full/bgg_boardgames_full.csv')
new_bgg_boardgames = list(set(unique_ids) - set(boardgame_info.boardgame_id))

In [None]:
list_of_lists_of_ids = []
for it in range(len(new_bgg_boardgames)//50+1):
    if it < len(new_bgg_boardgames)//50+1:
        list_of_lists_of_ids.append(new_bgg_boardgames[it*50:50+it*50])
    else:
        list_of_lists_of_ids.append(new_bgg_boardgames[it*50:])

In [None]:
# boardgame_info = pd.DataFrame()
for i in tqdm_notebook(range(len(list_of_lists_of_ids))):
    boardgame_info = pd.concat([boardgame_info,get_api_bgg_game_data(list_of_lists_of_ids[i])])
    time.sleep(0.33)

In [244]:
boardgame_info

Unnamed: 0,boardgame_id,title,year_published,minplayers,maxplayers,minplaytime,maxplaytime,age,users_rated,average_rating,...,graphic_designers,subdomains,implementations,suggested_numplayers,podcast_episodes,comments,marketplace_history,marketplace_listings,thumbnail_link,image_link
0,262144,マギアルマ (Magiarma),2017.0,2.0,5.0,10.0,20.0,8.0,4.0,7.62500,...,[],[],[],"{'1': {'Best': '0', 'Recommended': '0', 'Not R...",[],{'Impr3ssion': 'Extremely fun. The weapons are...,"{0: {'unix_saledate': 1551489033, 'saletime': ...",{},https://cf.geekdo-images.com/0esKPM6NhMoWmz-Wh...,https://cf.geekdo-images.com/0esKPM6NhMoWmz-Wh...
1,1,Die Macher,1986.0,3.0,5.0,240.0,240.0,14.0,5595.0,7.60630,...,[],['Strategy Games'],[],"{'1': {'Best': '0', 'Recommended': '1', 'Not R...",['BGA Episode 100 - Top 100 Games of All Time'...,"{'.JcK.': ""Die Macher is one of the very few l...","{0: {'unix_saledate': 1039114253, 'saletime': ...","{0: {'unix_listdate': 1426671957, 'price': '75...",https://cf.geekdo-images.com/rpwCZAjYLD940NWwP...,https://cf.geekdo-images.com/rpwCZAjYLD940NWwP...
2,2,Dragonmaster,1981.0,3.0,4.0,30.0,30.0,12.0,578.0,6.65468,...,[],['Strategy Games'],"['Indulgence', ""Coup d'État""]","{'1': {'Best': '0', 'Recommended': '0', 'Not R...",[],{'1979 AMC Treo 755p': 'Decent shape. Box has...,"{0: {'unix_saledate': 1041035044, 'saletime': ...","{0: {'unix_listdate': 1675114089, 'price': '45...",https://cf.geekdo-images.com/oQYhaJx5Lg3KcGis2...,https://cf.geekdo-images.com/oQYhaJx5Lg3KcGis2...
3,3,Samurai,1998.0,2.0,4.0,30.0,60.0,10.0,15764.0,7.45949,...,['Monica Helland'],['Strategy Games'],['Samurai: The Card Game'],"{'1': {'Best': '0', 'Recommended': '0', 'Not R...","['BGA Ep 90 - Essen Acquisition Disorder', 'Cu...","{'-xXx-': ""What an elegant game. \n\nLight rul...","{0: {'unix_saledate': 1045155733, 'saletime': ...","{0: {'unix_listdate': 1418239543, 'price': '75...",https://cf.geekdo-images.com/o9-sNXmFS_TLAb7Zl...,https://cf.geekdo-images.com/o9-sNXmFS_TLAb7Zl...
4,4,Tal der Könige,1992.0,2.0,4.0,60.0,60.0,12.0,351.0,6.62296,...,[],['Strategy Games'],[],"{'1': {'Best': '0', 'Recommended': '0', 'Not R...",['Ep. 15: ConQuest Avalon 2016 Convention Cove...,{'Andy Parsons': 'Blind bidding and some bluff...,"{0: {'unix_saledate': 1056550841, 'saletime': ...","{0: {'unix_listdate': 1340678336, 'price': '35...",https://cf.geekdo-images.com/nYiYhUlatT2DpyXaJ...,https://cf.geekdo-images.com/nYiYhUlatT2DpyXaJ...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
106234,348155,Lost Mummy,2017.0,2.0,99.0,30.0,60.0,10.0,4.0,9.25000,...,[],[],[],"{'1': {'Best': '0', 'Recommended': '0', 'Not R...",[],"{'amandalock': 'Kids loved it!', 'ElliottBaile...",{},{},https://cf.geekdo-images.com/0iUiI93SxHqTJWW5q...,https://cf.geekdo-images.com/0iUiI93SxHqTJWW5q...
106235,86012,Ladybohn: Manche mögen's heiss! – Mutterböhnchen,2010.0,2.0,6.0,45.0,45.0,12.0,19.0,7.20000,...,[],[],[],"{'1': {'Best': '0', 'Recommended': '0', 'Not R...",[],"{'bayspiel': '18.02.01.09', 'byunjaebang': 'Bo...","{0: {'unix_saledate': 1371448870, 'saletime': ...","{0: {'unix_listdate': 1671506918, 'price': '5....",https://cf.geekdo-images.com/1q34HBBcqBYkFCJ_-...,https://cf.geekdo-images.com/1q34HBBcqBYkFCJ_-...
106236,86013,Minotaurus,1993.0,3.0,5.0,45.0,45.0,8.0,12.0,5.20833,...,[],[],[],"{'1': {'Best': '0', 'Recommended': '0', 'Not R...",[],"{'bayspiel': '11.02.01.03', 'gwen': 'Memory wi...","{0: {'unix_saledate': 1363951459, 'saletime': ...","{0: {'unix_listdate': 1416406861, 'price': '18...",https://cf.geekdo-images.com/gvcWfmA2Qj_IIkQ3g...,https://cf.geekdo-images.com/gvcWfmA2Qj_IIkQ3g...
106237,348158,LadyUp,2022.0,2.0,5.0,15.0,20.0,6.0,4.0,7.00000,...,[],[],[],"{'1': {'Best': '0', 'Recommended': '0', 'Not R...",[],{'jdhorux': 'Great childrens Game. Fast and f...,{},{},https://cf.geekdo-images.com/oMMqVHiH-462YFqaH...,https://cf.geekdo-images.com/oMMqVHiH-462YFqaH...


In [233]:
# boardgame_info.to_csv('bgg_boardgames_tmp.csv', index=False)

# Scrap nicknames

In [None]:
list_of_all_countries = ['Abkhazia', 'Adjara', 'Afghanistan', 'Akrotiri and Dhekelia', 'Åland', 'Albania', 
                         'Algeria', 'American Samoa', 'Andorra', 'Angola', 'Anguilla', 'Antarctica', 
                         'Antigua and Barbuda', 'APO/FPO', 'Argentina', 'Armenia', 'Aruba', 'Ascension Island', 
                         'Australia', 'Austria', 'Azerbaijan', 'Bahamas', 'Bahrain', 'Bangladesh', 'Barbados', 
                         'Belarus', 'Belgium', 'Belize', 'Benin', 'Bermuda', 'Bhutan', 'Bolivia', 'Bonaire', 
                         'Bosnia and Herzegovina', 'Botswana', 'Bouvet Island', 'Brazil', 
                         'British Indian Ocean Territory', 'British Virgin Islands', 'Brunei', 'Bulgaria', 
                         'Burkina Faso', 'Burundi', 'Cambodia', 'Cameroon', 'Canada', 'Cape Verde', 
                         'Cayman Islands', 'Central African Republic', 'Chad', 'Chile', 'China', 'Christmas Island', 
                         'Cocos [Keeling] Islands', 'Colombia', 'Comoros', 'Cook Islands', 'Costa Rica', 'Croatia', 
                         'Cuba', 'Curacao', 'Cyprus', 'Czech Republic', 'Democratic Republic of the Congo', 
                         'Denmark', 'Djibouti', 'Dominica', 'Dominican Republic', 'East Timor', 'Ecuador', 'Egypt', 
                         'El Salvador', 'England', 'Equatorial Guinea', 'Eritrea', 'Estonia', 'Ethiopia', 
                         'Falkland Islands', 'Faroe Islands', 'Fiji', 'Finland', 'France', 'French Guiana', 
                         'French Polynesia', 'French Southern Territories', 'Gabon', 'Gambia', 'Georgia', 'Germany', 
                         'Ghana', 'Gibraltar', 'Greece', 'Greenland', 'Grenada', 'Guadeloupe', 'Guam', 'Guatemala', 
                         'Guernsey', 'Guinea', 'Guinea-Bissau', 'Guyana', 'Haiti', 
                         'Heard Island and McDonald Islands', 'Honduras', 'Hong Kong', 'Hungary', 'Iceland', 'India', 
                         'Indonesia', 'Iran', 'Iraq', 'Ireland', 'Isle of Man', 'Israel', 'Italy', 'Ivory Coast', 
                         'Jamaica', 'Japan', 'Jersey', 'Jordan', 'Kazakhstan', 'Kenya', 'Kiribati', 'Kosovo', 
                         'Kuwait', 'Kyrgyzstan', 'Laos', 'Latvia', 'Lebanon', 'Lesotho', 'Liberia', 'Libya', 
                         'Liechtenstein', 'Lithuania', 'Luxembourg', 'Macao', 'Macedonia', 'Madagascar', 'Malawi', 
                         'Malaysia', 'Maldives', 'Mali', 'Malta', 'Marshall Islands', 'Martinique', 'Mauritania', 
                         'Mauritius', 'Mayotte', 'Mexico', 'Micronesia', 'Moldova', 'Monaco', 'Mongolia', 
                         'Montenegro', 'Montserrat', 'Morocco', 'Mozambique', 'Myanmar [Burma]', 'Nagorno-Karabakh', 
                         'Namibia', 'Nauru', 'Nepal', 'Netherlands', 'Netherlands Antilles', 'New Caledonia', 
                         'New Zealand', 'Nicaragua', 'Niger', 'Nigeria', 'Niue', 'Norfolk Island', 'North Korea', 
                         'Northern Mariana Islands', 'Norway', 'Oman', 'Other-Africa', 'Other-Asia', 
                         'Other-Eastern Europe', 'Other-Middle East', 'Other-South Pacific', 'Pakistan', 'Palau', 
                         'Palestine', 'Panama', 'Papua New Guinea', 'Paraguay', 'Peru', 'Philippines', 
                         'Pitcairn Islands', 'Poland', 'Portugal', 'Puerto Rico', 'Qatar', 'Republic of the Congo', 
                         'Réunion', 'Romania', 'Russia', 'Rwanda', 'Saint Barthélemy', 'Saint Helena', 
                         'Saint Kitts and Nevis', 'Saint Lucia', 'Saint Martin', 'Saint Pierre and Miquelon', 
                         'Saint Vincent and the Grenadines', 'Samoa', 'San Marino', 'São Tomé and Príncipe', 
                         'Saudi Arabia', 'Scotland', 'Senegal', 'Serbia', 'Seychelles', 'Sierra Leone', 'Singapore', 
                         'Sint Maarten', 'Slovakia', 'Slovenia', 'Solomon Islands', 'Somalia', 'South Africa', 
                         'South Georgia and the South Sandwich Islands', 'South Korea', 'South Sudan', 'Spain', 
                         'Sri Lanka', 'Sudan', 'Suriname', 'Svalbard and Jan Mayen', 'Swaziland', 'Sweden', 
                         'Switzerland', 'Syria', 'Taiwan', 'Tajikistan', 'Tanzania', 'Thailand', 'Togo', 'Tokelau', 
                         'Tonga', 'Trinidad and Tobago', 'Tristan da Cunha', 'Tunisia', 'Turkey', 'Turkmenistan', 
                         'Turks and Caicos Islands', 'Tuvalu', 'Uganda', 'Ukraine', 'United Arab Emirates', 
                         'United Kingdom', 'United States', 'Uruguay', 'U.S. Minor Outlying Islands', 
                         'U.S. Virgin Islands', 'Uzbekistan', 'Vanuatu', 'Vatican City', 'Venezuela', 'Vietnam', 
                         'Wales', 'Wallis and Futuna', 'Western Sahara', 'Yemen', 'Zambia', 'Zimbabwe']

In [None]:
%%time
boardgame_users = get_users_for_country_step_by_step('Bonaire', start_p=1, n_pages=1)
# boardgame_users = get_users_for_country('Spain')

# bgg_users = pd.DataFrame()
bgg_users = pd.read_csv('bgg_users.csv')
bgg_users = pd.concat([bgg_users, boardgame_users])
bgg_users.drop_duplicates()
bgg_users.to_csv('bgg_users.csv', index=False)
bgg_users