Further information I hope to obtain:
* Game descriptions
* Player ratings, alongside other games that those players rated

# Setup

In [1]:
import requests
from lxml import etree
from bs4 import BeautifulSoup
import pandas as pd
import numpy as np
import time
import sqlite3

In [2]:
con = sqlite3.connect('bgg_full.db')
cur = con.cursor()

In [None]:
ERROR_MESSAGE = error_message = ['\n\tYour request for this collection has been accepted and will be processed.  Please try again later for access.\n']

In [None]:
def get_player_count_ratings(game_id):
    params = {'id' : game_id}
    response = requests.get('https://www.boardgamegeek.com/xmlapi2/thing?', params)
    s = BeautifulSoup(response.text)
    player_votings = s.find_all('results')
    player_counter = 0

    # Lookup if a rating refers to best, rec or not rec
    rec_li = ['Best', 'Recommended', 'Not Recommended']


    # output used to make SQL table
    full_rating_li = []

    for val in player_votings:
        try: 
            for idx, elem in enumerate(val.find_all('result')):
                full_rating_li.append((game_id, val['numplayers'], rec_li[idx], elem['numvotes']))
        except KeyError:
            pass

    return full_rating_li 

In [None]:
def get_user_ratings(username):
    error_message = ['\n\tYour request for this collection has been accepted and will be processed.  Please try again later for access.\n']
    
    got_data = False
    # Make API call to get XML data
    while not got_data:
        params = {'username' : username, 'type': 'boardgame', 'rated' : '1', 'brief' : '1', 'stats' : '1'}
        response = requests.get('https://www.boardgamegeek.com/xmlapi2/collection?', params)
        s = BeautifulSoup(response.text, "xml")
        
        try:
            s.find('message').contents
        except AttributeError:
            got_data = True
        finally:
            time.sleep(1)
    
    # Initialize dataframe
    rating_df = pd.DataFrame(columns = ['id', 'rating'])

    # Loop through and output ratings to a dataframe
    for i in s.find_all('item'):
        game_id = i['objectid']
        
        # Make a soup out of the item info to extract rating
        inner_soup = BeautifulSoup(str(i))
        rv = inner_soup.find('stats')
        inner_soup2 = BeautifulSoup(str(rv))
        rating = inner_soup2.find('rating')
        rating = float(rating['value'])
        
        # Append rating to df
        temp_df = pd.DataFrame([[game_id,rating]], columns = ['id', 'rating'])
        rating_df = rating_df.append(temp_df)
    
    rating_df['index'] = username
    rating_df.rename({'index' :'username'}, inplace=True)
    
    return rating_df

In [None]:
con = sqlite3.connect('bgg.db')
cur = con.cursor()

# Get Player Counts

In [None]:
# Get list of top 1000 games from SQL table
game_li = []

results=cur.execute("select id FROM TOP_GAMES_FULL")
results=results.fetchall()

for num in results:
    game_li.append(num[0])

In [None]:
# For each game, get the min and max player count:
min_max_player_li = []

i = 0
while i < len(game_li):
    game_id = game_li[i]
    params = {'id' : game_id,'stats' :'1'}
    response = requests.get('https://www.boardgamegeek.com/xmlapi2/thing?', params)
    soup = BeautifulSoup(response.text, "xml")

    try: 
        min_play = soup.find('minplayers')['value']
        max_play = soup.find('maxplayers')['value']
        i += 1
    except TypeError:
        # If it fails, we need to wait so we can use the API gain
        time.sleep(15)
    
    min_max_player_li.append([game_id, min_play, max_play])

In [None]:
# Create table
cur.execute(''' CREATE TABLE BGG_PLAYER_COUNTS
               (id text, min_players text, max_players text)''')

In [None]:
# Remove duplicates from API
player_li_df = pd.DataFrame(min_max_player_li, columns = ['GameId','MinPlayers','MaxPlayers'])
player_li_df.drop_duplicates(subset = 'GameId', keep='first', inplace=True)
player_li_df.reset_index(inplace=True)
player_li_df.shape

In [None]:
# Convert dataframe to tuple
df = player_li_df
tuple_li = []
for i in range(len(df)):
    tuple_ = str(df.loc[i, 'GameId']), df.loc[i, 'MinPlayers'], str(df.loc[i, 'MaxPlayers'])
    tuple_li.append(tuple_)

In [None]:
# Enter tuple into SQL table
cur.executemany("insert into BGG_PLAYER_COUNTS values (?, ?, ?)", tuple_li)

In [None]:
con.commit()

# Obtain game categories and mechanism

In [None]:
category_li, mechanic_li = [],[]
loop_len = len(game_li)

i = 0
while i < loop_len:
    game_id = game_li[i]
    params = {'id' : game_id}
    response = requests.get('https://www.boardgamegeek.com/xmlapi2/thing?', params)
    soup = BeautifulSoup(response.text, 'xml')
    info_ = soup.find_all('link')
    
    try:
        if info_ == []:
            raise TypeError
        for x in info_:
            if x['type'] == 'boardgamecategory':
                category_li.append((game_id, x['value']))
            elif x['type'] == 'boardgamemechanic':
                mechanic_li.append((game_id, x['value']))
        i += 1 # increment the loop
        print(i)
    except TypeError:  # occurs when API times out
        time.sleep(10) # wait ten seconds so we can run the API again

In [None]:
li__ = [elem[0] for elem in mechanic_li]

In [None]:
# Create table
cur.execute(''' CREATE TABLE BGG_CATEGORIES
               (id text, category text);
               ''')

cur.execute('''CREATE TABLE BGG_MECHANICS
               (id text, mechanic text);''')

In [None]:
# Enter tuple into SQL table
cur.executemany("insert into BGG_CATEGORIES values (?, ?)", category_li)
cur.executemany("insert into BGG_MECHANICS values (?, ?)", mechanic_li)

In [None]:
con.commit()

# Key Game Stats

In [None]:
def get_game_stats(game):
    params = {'id' : game,'stats' :'1'}
    response = requests.get('https://www.boardgamegeek.com/xmlapi2/thing?', params)
    s = BeautifulSoup(response.text, "xml")
    all_stats = s.find('ratings')
    avg_rating = all_stats.find('average')['value']
    avg_bayes_rating = all_stats.find('bayesaverage')['value']
    std = all_stats.find('stddev')['value']
    ratings = all_stats.find('usersrated')['value']
    weight = all_stats.find('averageweight')['value']
    
    return((game, avg_rating, avg_bayes_rating, std, ratings, weight))

In [None]:
game_rating_li = []
for idx, game in enumerate(game_li):
    got_data = False
    
    while not got_data:
        try:
            temp = get_game_stats(game)
            got_data = True
        except AttributeError:
            time.sleep(10)
    # print(idx) 
    game_rating_li.append(temp)

In [None]:
cur.execute('''CREATE TABLE BGG_GAME_STATS
               (id text, avg_rating text, avg_bayes_rating text, std text, ratings text, weight text);''')

In [None]:
cur.executemany("insert into BGG_GAME_STATS values (?, ?, ?, ?, ?, ?)", game_rating_li)

In [None]:
con.commit()

# Num of Players Voting

In [None]:
full_player_rating_li = []
for g in game_li:
    current_li = get_player_count_ratings(g)
    if current_li == []:
        time.sleep(10)
        current_li = get_player_count_ratings(g)
    full_player_rating_li.extend(current_li)

In [None]:
cur.execute(''' CREATE TABLE BGG_PLAYER_RATINGS
               (id text, player_count text, player_recommendation text, votes text);
               ''')

In [None]:
cur.executemany("insert into BGG_PLAYER_RATINGS values (?, ?, ?, ?)", full_player_rating_li)

In [None]:
con.commit()

# Join Tables

In [None]:
cur.execute('''CREATE TABLE TOP_GAMES_FULL
           
           AS
           
        SELECT t1.id, title, year, t2.min_players, t2.max_players
           ,avg_rating, avg_bayes_rating, std, ratings, weight
        FROM TOP_BGG_GAMES t1
            LEFT JOIN BGG_PLAYER_COUNTS t2
                ON t1.id = t2.id
            LEFT JOIN  BGG_GAME_STATS t3
                ON t1.id = t3.id
           
           ''')

In [None]:
con.commit()

In [None]:
results = cur.execute('''
        SELECT t1.id, title, max_players
           ,avg_rating, std
        FROM TOP_GAMES_FULL t1
           
           ''')

In [None]:
result_li = [result for result in results]

In [None]:
df = pd.DataFrame(data=result_li, columns=['id', 'title', 'max_players', 'score', 'std_score'])

In [None]:
# convert data types
cols = ['max_players', 'score', 'std_score']
for col in cols:
    df[col] = df[col].astype('float')
df.dtypes

In [None]:
# solo games 
df[df.max_players == 1]

In [None]:
# duel games
df[df.max_players == 2]

In [None]:
# Large Group games
df[df.max_players > 5]

In [None]:
#What's the largest?
print(df.max_players.max())

# Which games?
df[df['max_players'] == 100]

In [None]:
# What's the most divisive game?
df[df['std_score']==df.std_score.max()]

In [None]:
# The least divisive?
df[df['std_score']==df.std_score.min()]

In [None]:
params = {'id' : 13,'stats' :'1'}
response = requests.get('https://www.boardgamegeek.com/xmlapi2/thing?', params)
s = BeautifulSoup(response.text, "xml")
s.find('playingtime')

# Coda: Adding Play time

Initially, I felt that the previous table was complete. I decided to add playtime as well. I should have added it before, though this gives me the chance to test the Alter table functions

In [None]:
i = 0
playtime_li = []
while i < len(game_li):
    game_id = game_li[i]
    params = {'id' : game_id,'stats' :'1'}
    try:
        response = requests.get('https://www.boardgamegeek.com/xmlapi2/thing?', params)
        s = BeautifulSoup(response.text, "xml")
        play_time = s.find('playingtime')['value']
        playtime_li.append((game_id, play_time))
        i += 1
    except TypeError:
        time.sleep(20)

In [None]:
cur.execute(''' CREATE TABLE BGG_PLAY_TIME
               (id text, play_time text);
               ''')

cur.executemany("insert into BGG_PLAY_TIME values (?, ?)", playtime_li)

In [None]:
cur.execute('''  ALTER TABLE TOP_GAMES_FULL
     ADD COLUMNS play_time text ''')

In [None]:
cur.execute(''' DELETE  FROM TOP_GAMES_FULL  WHERE TRUE''')

In [None]:
cur.execute('''INSERT INTO TOP_GAMES_FULL
           
        SELECT t1.id, title, year, t2.min_players, t2.max_players
           ,avg_rating, avg_bayes_rating, std, ratings, weight, t4.play_time
        FROM TOP_BGG_GAMES t1
            LEFT JOIN BGG_PLAYER_COUNTS t2
                ON t1.id = t2.id
            LEFT JOIN  BGG_GAME_STATS t3
                ON t1.id = t3.id
            LEFT JOIN BGG_PLAY_TIME t4
                ON t1.id = t4.id''')

In [None]:
results = cur.execute('PRAGMA table_info(TOP_GAMES_FULL);')
results = results.fetchall()
result_li = [r for r in results]
result_li

In [None]:
cur.execute('ALTER TABLE TOP_GAMES_FULL RENAME COLUMN COLUMNS TO play_time;')

In [None]:
con.commit()

Playtime added!

# Load Game Ratings

In [12]:
# Get list of top 1000 games from SQL table
game_li = []

results=cur.execute('''select id FROM TOP_GAMES_FULL
                    
                    WHERE 1=1
                        AND id >= 102652
                    
                    ORDER BY 1 ASC
                    ''')
results=results.fetchall()

for num in results:
    game_li.append(num[0])
    game_li.sort()

In [62]:
# cur.execute(''' CREATE TABLE BGG_GAME_RATINGS
#                (game_id text, username text, rating);
#                ''')

<sqlite3.Cursor at 0x12dddce2490>

In [63]:
con.commit()

In [None]:
# game_id = game_li[i]
for game in game_li:
    i = 1
    comment_li = []
    while True:
        params = {'id' : game, 'ratingcomments': '1', 'page' : i, 'pagesize':100}
        response = requests.get('https://www.boardgamegeek.com/xmlapi2/thing?', params)
        soup = BeautifulSoup(response.text, "xml")
        comments = soup.find_all("comment")
        if len(comments) == 0:
            break
        else:
            pass # print(i, ':', len(comments))
        if i % 10 == 0:
            print('game:', game, 'page:', i)
            time.sleep(30)
        for com in comments:
            temp_tuple = (game, com['username'], com['rating'])
            comment_li.append(temp_tuple)
        i += 1
    print('completed coments for game:', game)
    cur.executemany("insert into BGG_GAME_RATINGS values (?, ?, ?)", comment_li)
    con.commit()

game: 102652 page: 10
game: 102652 page: 20
game: 102652 page: 30
game: 102652 page: 40
game: 102652 page: 50
game: 102652 page: 60
game: 102652 page: 70
game: 102652 page: 80
game: 102652 page: 90
game: 102652 page: 100
game: 102652 page: 110
game: 102652 page: 120
game: 102652 page: 130
game: 102652 page: 140
game: 102652 page: 150
completed coments for game: 102652
game: 102680 page: 10
game: 102680 page: 20
game: 102680 page: 30
game: 102680 page: 40
game: 102680 page: 50
game: 102680 page: 60
game: 102680 page: 70
game: 102680 page: 80
game: 102680 page: 90
game: 102680 page: 100
game: 102680 page: 110
game: 102680 page: 120
game: 102680 page: 130
game: 102680 page: 140
game: 102680 page: 150
completed coments for game: 102680
game: 102794 page: 10
game: 102794 page: 20
game: 102794 page: 30
game: 102794 page: 40
game: 102794 page: 50
game: 102794 page: 60
game: 102794 page: 70
game: 102794 page: 80
game: 102794 page: 90
game: 102794 page: 100
game: 102794 page: 110
game: 102794 p

In [11]:
#cur.execute('''Delete from BGG_GAME_RATINGS WHERE game_id >= 102652''')

<sqlite3.Cursor at 0x1ed7f686490>

# Scratch Code

In [None]:
# We look at certain users from the comments and see what other games they might like
params = {'name' : 'agentzen', 'hot' : '1', 'top' : '1', 'domain' : 'boardgame',}
response = requests.get('https://www.boardgamegeek.com/xmlapi2/user?', params)
s = BeautifulSoup(response.text, "xml")
s

In [None]:
# We look at certain users from the comments and see what other games they might like
params = {'username' : 'agentzen', 'type': 'boardgame', 'rated' : '1', 'brief' : '1', 'stats' : '1'}
response = requests.get('https://www.boardgamegeek.com/xmlapi2/collection?', params)
s = BeautifulSoup(response.text, "xml")
s

In [None]:
# Comments
comment_li = []

# Obtaining ten pages of user comments/ratings
for i in range(1,11):
    params = {'id' : '13', 'ratingcomments' : '1', 'comments' : '1', 'pagesize' : '100', 'page':[3,4], 'stats' :str(i)}
    response = requests.get('https://www.boardgamegeek.com/xmlapi2/thing?', params)
    comments = soup.find_all("comment")
    for com in comments:
        comment_li.append(com)

In [None]:
test_df = get_user_ratings('agent_t_bib')
test_df