In [1]:
import pandas as pd
from sklearn.neighbors import NearestNeighbors
from sklearn.preprocessing import MinMaxScaler
from IPython.display import display
import warnings
warnings.filterwarnings('ignore')

games = pd.read_csv('games.csv')
mechanics = pd.read_csv('mechanics.csv')
user_ratings = pd.read_csv('user_ratings.csv')

columns_to_drop = [
    'Name', 'Description', 'YearPublished', 'BayesAvgRating', 'StdDev',
    'LanguageEase', 'BestPlayers', 'GoodPlayers', 'NumWeightVotes',
    'Kickstarted', 'ImagePath', 'Rank:boardgame', 'Rank:strategygames',
    'Rank:abstracts', 'Rank:familygames', 'Rank:thematic', 'Rank:cgs',
    'Rank:wargames', 'Rank:partygames', 'Rank:childrensgames'
]
games_cleaned = games.drop(columns=columns_to_drop)


In [2]:
# Merge 'games' and 'mechanics' datasets on 'BGGId' column
games_mechanics_merged = pd.merge(games_cleaned, mechanics, on='BGGId', how='inner')

# Columns to be normalized
numeric_columns = ['GameWeight', 'AvgRating', 'MinPlayers', 'MaxPlayers', 'ComAgeRec', 'NumOwned',
                   'NumWant', 'NumWish', 'MfgPlaytime', 'ComMinPlaytime', 'ComMaxPlaytime', 'MfgAgeRec',
                   'NumUserRatings', 'NumComments', 'NumAlternates', 'NumExpansions', 'NumImplementations']

# Initializing MinMaxScaler
scaler = MinMaxScaler()
games_mechanics_merged[numeric_columns] = scaler.fit_transform(games_mechanics_merged[numeric_columns])


In [10]:
numeric_data = games_mechanics_merged.drop(columns=['Family', 'Cat:Thematic', 'Cat:Strategy', 'Cat:War', 'Cat:Family', 
                                                    'Cat:CGS', 'Cat:Abstract', 'Cat:Party', 'Cat:Childrens', 
                                                    'IsReimplementation'])

knn_model = NearestNeighbors(n_neighbors=50, metric='euclidean')

# Fill any NaN values with the mean of their columns
numeric_data_filled = numeric_data.fillna(numeric_data.mean())
knn_model.fit(numeric_data_filled.drop(columns=['BGGId']))


'''This was my first attempt at a recommendation function, it seems to work well for high ratings (>5) but not for low ratings (<5)'''
def high_rating_recommended_games(game_ratings, n_recommendations=5):
    # Convert game names to BGGIds and remember user ratings
    game_info = [(games[games['Name'] == name]['BGGId'].values[0], rating) 
                 for name, rating in game_ratings.items() if name in games['Name'].values]
    
    recommendations = pd.DataFrame(columns=['Boardgame Name', 'Average Rating', 'Predicted Player Rating'])
    for bggid, user_rating in game_info:
        game_features = numeric_data_filled[numeric_data_filled['BGGId'] == bggid].drop(columns=['BGGId'])
        distances, indices = knn_model.kneighbors(game_features, n_neighbors=n_recommendations+1) # Find similar games using knn
        recommended_ids = numeric_data_filled.iloc[indices.flatten()]['BGGId'].values[1:]  # Skip self
        recommended_ids = [id for id in recommended_ids if id != bggid]
        
        similarity_scores = 1 / (1 + distances.flatten())  # Create similarity scores from distances (closer to 0 is more similar)
        
        for idx, rec_id in enumerate(recommended_ids):
            if rec_id in games['BGGId'].values:
                rec_game = games[games['BGGId'] == rec_id][['Name', 'AvgRating']].iloc[0]
                name, avg_rating = rec_game
                
                # Calculate the predicted rating using the similarity score
                estimated_rating = (similarity_scores[idx] * user_rating + (1 - similarity_scores[idx]) * avg_rating)
                
                recommendations = recommendations.append({
                    'Boardgame Name': name, 
                    'Average Rating': avg_rating, 
                    'Predicted Player Rating': estimated_rating
                }, ignore_index=True)
    
    # Sort the recommendations and return the top N ratings
    recommendations = recommendations.sort_values(by='Predicted Player Rating', ascending=False).head(n_recommendations)
    recommendations['Predicted Player Rating'] = recommendations['Predicted Player Rating'].round(5)
    return recommendations

'''This was my second attempt at a recommendation function, it seems to work well for low ratings (<5) but not for high ratings (>5)'''
def low_rating_recommended_games(game_ratings, n_recommendations=5):
    # Convert game names to BGGIds and remember user ratings
    game_info = [(games[games['Name'] == name]['BGGId'].values[0], rating)
                 for name, rating in game_ratings.items() if name in games['Name'].values]

    recommendations = pd.DataFrame(columns=['Boardgame Name', 'Average Rating', 'Predicted Player Rating'])

    for bggid, user_rating in game_info:
        game_features = numeric_data_filled[numeric_data_filled['BGGId'] == bggid].drop(columns=['BGGId'])
        distances, indices = knn_model.kneighbors(game_features, n_neighbors=n_recommendations+1)
        recommended_ids = numeric_data_filled.iloc[indices.flatten()]['BGGId'].values[1:]  # Skip self

        for idx, rec_id in enumerate(recommended_ids):
            if rec_id in games['BGGId'].values:
                rec_game = games[games['BGGId'] == rec_id][['Name', 'AvgRating']].iloc[0]
                similarity = 1 / (1 + distances[0][idx+1])  # Normalized similarity score
                estimated_rating = user_rating * similarity + rec_game['AvgRating'] * (1 - similarity)
                
                new_row = pd.DataFrame([{
                    'Boardgame Name': rec_game['Name'],
                    'Average Rating': rec_game['AvgRating'],
                    'Predicted Player Rating': max(1, min(10, estimated_rating))
                }])
                recommendations = pd.concat([recommendations, new_row], ignore_index=True)

    recommendations = recommendations.sort_values(by='Predicted Player Rating', ascending=True).head(n_recommendations)
    recommendations['Predicted Player Rating'] = recommendations['Predicted Player Rating'].round(5)
    return recommendations


'''
I was planning on combining these 2 functions all into one, but due to time restrains, 
it seems easier to keep these functions separate, as the results seem to be just fine as is.
'''

'''Prints the most similar games for both high and low ratings'''

def print_recommendations(game_ratings, n_recommendations=10):
    # Determine the nature of the user ratings
    ratings = list(game_ratings.values())
    all_high = all(rating > 5 for rating in ratings)
    all_low = all(rating < 5 for rating in ratings)

    # If all game ratings are higher than 5
    if all_high:
        high_recommendations = high_rating_recommended_games(game_ratings, n_recommendations)
        high_recommendations = high_recommendations.style.hide_index()
        print("Similar games you may give a high rating:")
        display(high_recommendations)

    # If all game ratings are lower than 5
    elif all_low:
        low_recommendations = low_rating_recommended_games(game_ratings, n_recommendations)
        low_recommendations = low_recommendations.style.hide_index()
        print("Similar games you may give a low rating:")
        display(low_recommendations)

    # Ratings are above and below 5
    else:
        high_recommendations = high_rating_recommended_games(game_ratings, n_recommendations)
        low_recommendations = low_rating_recommended_games(game_ratings, n_recommendations)
        
        print("Similar games you may give a high rating:")
        high_recommendations = high_recommendations.style.hide_index()
        display(high_recommendations)
        
        print("\nSimilar games you may give a low rating:")
        low_recommendations = low_recommendations.style.hide_index()
        display(low_recommendations)



game_ratings = {"Apples to Apples": 6, "The Game of Life": 8, "UNO": 9, "Skip-Bo": 8, "Catan": 5, "Clue": 7, "Operation": 4}

games_and_ratings = pd.DataFrame(list(game_ratings.items()), columns=['Boardgame Name', 'Rating']).style.hide_index()
print("User Boardgame Ratings:")
display(games_and_ratings)

print_recommendations(game_ratings, n_recommendations=15)


User Boardgame Ratings:


Boardgame Name,Rating
Apples to Apples,6
The Game of Life,8
UNO,9
Skip-Bo,8
Catan,5
Clue,7
Operation,4


Similar games you may give a high rating:


Boardgame Name,Average Rating,Predicted Player Rating
UNO Dare!,4.9781,9.0
Mad Magazine Card Game,5.83031,8.19128
UNO Attack!,5.43512,8.03356
Elfer raus!,5.18873,8.0
The Game of Life (2013- Editions),4.8751,8.0
Star Wars: Episode 1 – Clash of the Lightsabers,6.21853,7.77983
11 nimmt!,6.2905,7.76358
Holiday Fluxx,6.27126,7.76049
Seven7s,6.17441,7.74482
Cartoon Network Fluxx,5.92052,7.74218



Similar games you may give a low rating:


Boardgame Name,Average Rating,Predicted Player Rating
Tiddledy Winks,3.9982,3.99909
Bed Bugs,4.38615,4.19469
Test Match,4.44165,4.22258
Jack Straws,4.55939,4.28171
Rock 'Em Sock 'Em Robots,4.58716,4.29572
Labyrinth,4.61668,4.31023
Marbles,4.69182,4.34886
LEGO Soccer,5.76986,4.38008
Pie Face,4.87533,4.44125
Compact Bowling,5.95,4.52701
