In [2]:
import requests
import numpy as np
import pandas as pd
import time
import urllib.parse

In [3]:
## determine list of board games to base recommendation
username = "TragicBandit"
min_rating = 7.5
max_rating = 10

In [25]:
base_collection = requests.get('https://boardgamegeek.com/xmlapi/collection/' + username + '?own=1').text

In [27]:
## get collection data on rated games from a user's collection

def getNextGameData(game_text):
    
    ## get subtype to see if boardgame
    subtype_start = game_text.find('subtype="')
    subtype_end = game_text.find('collid') - 2
    subtype = game_text[subtype_start + len('subtype="'):subtype_end]
    
    if subtype != 'boardgame':
        print(subtype)
        game_dict = {'id': -1, 'name': '', 'rating': ''}
        return game_dict
    
    ## get game id
    id_start = game_text.find('objectid="')
    id_end = game_text.find('"', id_start + len('objectid="'))
    id = int(game_text[id_start + len('objectid="'):id_end])
    
    ## get game name
    name_start = game_text.find('<name sortindex="')
    name_end = game_text.find('</name>', name_start)
    name = game_text[name_start + 3 + len('<name sortindex="'):name_end]
    
    ## get game rating
    rating_start = game_text.find('<rating value="')
    rating_end = game_text.find('"', rating_start + len('<rating value="'))
    try:
        rating = float(game_text[rating_start + len('<rating value="'):rating_end])
    except:
        rating = np.nan
    
    game_dict = {'id': id, 'name': name, 'rating': rating}
                                                
    return game_dict


def getNextGame(content_text):
    item_start = content_text.find('<item ')
    item_end = content_text.find('</item>') + len('</item>')
    item = content_text[item_start:item_end]
    rest_content = content_text[item_end + 1:]
    return item,rest_content

def createCollection(content_text, rated):
    ## initialize empty boardgame_collection
    boardgame_collection = pd.DataFrame(columns=['id', 'name', 'rating'])
    fullboardgame_collection = pd.DataFrame(columns=['id', 'name', 'rating'])
    
    ## get counter for loop
    item_count_start = content_text.find('totalitems="')
    item_count_end = content_text.find('"', item_count_start + len('totalitems="'))
    item_count = int(content_text[item_count_start + len('totalitems="'):item_count_end])
    
    for i in range(item_count):
        next_game, content_text = getNextGame(content_text)
        game_data = getNextGameData(next_game)
        
        if rated == True:
            if (game_data['rating'] >= min_rating) and (game_data['rating'] <= max_rating) and game_data['id'] != -1:
                boardgame_collection = boardgame_collection.append(game_data, ignore_index = True)
            else:
                continue
        else:
            boardgame_collection = boardgame_collection.append(game_data, ignore_index = True)
        
    
    return boardgame_collection

In [31]:
base_collection_df = createCollection(base_collection, True)
full_collection_df = createCollection(base_collection, False)
print(base_collection_df)
print(full_collection_df)

       id                           name  rating
0   68448                      7 Wonders    10.0
1   84876        The Castles of Burgundy     9.0
2   36218                       Dominion     8.0
3  271519                      Ecosystem     8.0
4  148228                       Splendor     8.0
5    9209                 Ticket to Ride     9.0
6  183394  Viticulture Essential Edition     8.0
7  266192                       Wingspan    10.0
        id                                        name  rating
0    68448                                   7 Wonders    10.0
1   358504  Betrayal at House on the Hill: 3rd Edition     6.0
2    84876                     The Castles of Burgundy     9.0
3       13                                       CATAN     7.0
4    36218                                    Dominion     8.0
5   271519                                   Ecosystem     8.0
6    65244                            Forbidden Island     6.0
7   291457                Gloomhaven: Jaws of the Lion 

In [7]:
collection_text = requests.get('https://boardgamegeek.com/xmlapi/collection/1000rpm?own=1').text

print(createCollection(collection_text))

         id                                      name  rating
0       432                                  6 nimmt!     8.0
1     68448                                 7 Wonders     8.0
2    204516         878 Vikings: Invasions of England     9.0
3         5                                   Acquire     8.0
4     22545  Age of Empires III: The Age of Discovery     8.0
..      ...                                       ...     ...
129  193105               Utopia Engine: Beast Hunter     8.0
130   15364                            Vegas Showdown     8.0
131   25821    The Werewolves of Miller&#039;s Hollow     8.0
132   33643                                Whoowasit?     8.0
133   27588                                Zooloretto     8.0

[134 rows x 3 columns]


In [8]:
## get next comment
def getNextComment(game_text):
    comment_start = game_text.find('<comment ')
    if comment_start == -1:
        return 0,0
    
    comment_end = game_text.find('</comment>') + len('</comment>')
    comment = game_text[comment_start:comment_end]
    rest_comments = game_text[comment_end + 1:]
    return comment,rest_comments


def getCommentInfo(comment_text):
    
    ## get comment username
    username_start = comment_text.find('username="')
    username_end = comment_text.find('"', username_start + len('username="'))
    username = urllib.parse.quote(comment_text[username_start + len('username="'):username_end], safe = '')
    
    ## get comment rating
    rating_start = comment_text.find('rating="')
    rating_end = comment_text.find('"', rating_start + len('rating="'))
    
    try:
        rating = float(comment_text[rating_start + len('rating="'):rating_end])
    except:
        rating = np.nan
    
    comment_dict = {'username': username, 'rating': rating}
    return comment_dict

## get list of usernames
def getGameListUsernames(game_text):
    
    comment_collection = pd.DataFrame(columns=['username', 'rating'])
    while True:
        comment, game_text = getNextComment(game_text)
        if comment == 0: 
            break
        else:
            comment_dict = getCommentInfo(comment)
            
            if (comment_dict['rating'] >= min_rating) and (comment_dict['rating'] <= max_rating):
                comment_collection = comment_collection.append(comment_dict, ignore_index = True)
            continue
            
    return comment_collection


In [9]:
boardgame_comments = requests.get('https://boardgamegeek.com/xmlapi/boardgame/13?comments=1').text

x = getGameListUsernames(boardgame_comments)

print(x)
    

              username  rating
0           00daniel00    8.00
1           100pcBlade    8.00
2                1024b    8.25
3              14cross    9.00
4            1amgreg77    8.70
5          1Aspielerin    8.00
6             1nf1n1ty    8.00
7               200404   10.00
8             2ndPlace    8.00
9               2ombie    8.00
10              31rhcp    9.00
11         360_Piranha    8.00
12        3kindsofsalt   10.00
13          3sapphires   10.00
14               47029    8.00
15             49xjohn    9.40
16               4ndrz    8.00
17           4ndySmith    8.00
18              5thian   10.00
19             80sGirl   10.00
20               8one6    8.00
21               8tlas    8.00
22            95after5   10.00
23  A%20Strange%20Aeon   10.00
24               A.Men    8.00
25              AADA7A    9.00
26           aadrian13    8.00
27             aandjso    8.00
28            aardball    8.80
29      Aardvark%20Sam    8.00
30            aaron444    8.00
31      

In [15]:
## get usernames for each boardgame

username_df = pd.Series(dtype = 'object')

for index, value in base_collection_df['id'].items():
    
    time.sleep(3)
    boardgame_comments = requests.get('https://boardgamegeek.com/xmlapi/boardgame/' + str(value) +  '?comments=1').text
    
    
    comment_data = getGameListUsernames(boardgame_comments)
    print(comment_data['username'].shape)
    username_df = pd.concat([username_df, comment_data['username']], ignore_index = True)

username_df.drop_duplicates(inplace=True)    
print(username_df)

(51,)
(67,)
(54,)
(39,)
(44,)
(42,)
(58,)
(63,)
0           0lexandr
1            1000rpm
2         100pcBlade
3       144creations
4            14cross
           ...      
413      Addiction2k
414           adh105
415           Adio75
416        AdMan2015
417    AdmiralBanana
Length: 310, dtype: object


In [38]:
def scoreUserWeightValue(rated_game_pd):
    weight = 0
    modified_game_pd = rated_game_pd.copy()
    for index, value in base_collection_df['id'].items():
        if len(modified_game_pd[modified_game_pd['id'].isin([value])]) > 0:
            weight = weight + 1
            modified_game_pd = modified_game_pd.drop(modified_game_pd[modified_game_pd['id'] == value].index)
            
        else:
            continue
    
    weight = 1 + (0.2 * weight)
    
    return modified_game_pd, weight
            
def scoreUserWeight(rated_game_pd):
    modified_game_pd, weight = scoreUserWeightValue(rated_game_pd)
    modified_game_pd['score'] = weight
    
    return modified_game_pd
    
def scoreUsernameRatings(username):
    time.sleep(2)
    rated_game_pd = createCollection(requests.get('https://boardgamegeek.com/xmlapi/collection/' + username + '?own=1').text,rated = True)
    modified_game_pd = scoreUserWeight(rated_game_pd)
    print("added username " + username)
    return modified_game_pd[['id', 'name', 'score']]

In [36]:
scoreUsernameRatings('TragicBandit')

added username TragicBandit


Unnamed: 0,id,name,score


In [39]:
scoreUsernameRatings('1000rpm')

added username 1000rpm


Unnamed: 0,id,name,score
0,432,6 nimmt!,1.4
2,204516,878 Vikings: Invasions of England,1.4
3,5,Acquire,1.4
4,22545,Age of Empires III: The Age of Discovery,1.4
5,48726,Alien Frontiers,1.4
...,...,...,...
129,193105,Utopia Engine: Beast Hunter,1.4
130,15364,Vegas Showdown,1.4
131,25821,The Werewolves of Miller&#039;s Hollow,1.4
132,33643,Whoowasit?,1.4


In [20]:
## create the game ranking matrix

game_recommendation_df = pd.DataFrame(columns = ['id', 'name', 'score'])


## get user id and associated board games
for index, value in username_df.items():        
    
    for i in range(3):
        try: 
            user_score_df = scoreUsernameRatings(value)
            break
        except:
            time.sleep(3)
            continue
            
    game_recommendation_df = pd.concat([game_recommendation_df, user_score_df], ignore_index = True)

print(game_recommendation_df)

added username 0lexandr
added username 1000rpm
added username 100pcBlade
added username 144creations
added username 14cross
added username 16note
added username 1amgreg77
added username 1arska
added username 1awesomeguy
added username 1CHNUP
added username 1point21gigawatts
added username 1SkyCaptain
added username 216stitches
added username 234418803
added username 2d20
added username 2dTones
added username 2goofy
added username 2la_fr
added username 2ndPlace
added username 3davoli
added username 3dragonfly
added username 42n4
added username 4th%20stringer
added username 6element
added username 6EQUJ5
added username 700ravens
added username 7guard
added username 7Wonders500
added username 80sGirl
added username 8odoros
added username A1790794268
added username A1win
added username A3RKev
added username aaa37
added username aadrian13
added username aaj94
added username aalotr13
added username aardball
added username Aardvark%20Sam
added username aardwolf98
added username aaronandre41
a

In [41]:
for index, value in full_collection_df['id'].items():
    if len(game_recommendation_df[game_recommendation_df['id'].isin([value])]) > 0:
        game_recommendation_df = game_recommendation_df.drop(game_recommendation_df[game_recommendation_df['id'] == value].index)

In [43]:
game_recommendation_df.set_index('id')
score_df = game_recommendation_df.groupby(['name']).sum()
score_df.sort_values(by = ['score'], ascending = False, inplace = True)


print(score_df.head(35))

                                     score
name                                      
Terraforming Mars                    122.0
Carcassonne                          116.6
Azul                                 114.4
7 Wonders Duel                       110.2
Codenames                             90.8
Concordia                             89.4
Puerto Rico                           87.6
Race for the Galaxy                   86.6
Agricola                              86.2
The Quacks of Quedlinburg             86.0
Patchwork                             85.4
Jaipur                                81.0
Love Letter                           78.4
Lords of Waterdeep                    75.8
King of Tokyo                         69.6
Brass: Birmingham                     65.6
Five Tribes                           64.2
Cascadia                              63.6
The Crew: The Quest for Planet Nine   61.6
Everdell                              61.4
Power Grid                            61.2
Kingdomino 