In [1]:
# Take the V1 example and use for inference in the app. Testing notebook

In [2]:
# Import the required modules
import pandas as pd
pd.set_option('display.max_columns', None)
import numpy as np

# Pre-Processing
from sklearn.preprocessing import StandardScaler, OneHotEncoder, OrdinalEncoder
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.impute import SimpleImputer

# Models
from sklearn.neighbors import NearestNeighbors

# suppress warnings
import warnings
warnings.filterwarnings('ignore')

In [3]:
def make_recommendation(gamelist_length, game):
    # Step 1: Load the dataset
    df = pd.read_csv("resources/ml_clean.csv")
    
    # Remove any rows with missing values and reset the index
    df = df.dropna(how="any").reset_index(drop=True)

    # Drop unnecessary columns that are not needed for recommendation
    df = df.drop(columns=[])

    # # Remove duplicate games based on game_id and reset index again
    # df = df.drop_duplicates(subset=[]).reset_index(drop=True)

    # Step 2: Define the columns for metadata and features
    meta_cols = ['game_id', 'name']
    feature_cols = ['max_players', 'max_playtime', 'min_age', 'min_players',
       'min_playtime', 'playing_time', 'category', 'mechanic',
       'average_rating', 'users_rated', 'category_count', 'mechanic_count',
       'has_expansion', 'binned_playtime', 'binned_mechanics',
       'binned_min_age', 'binned_category', 'len_description',
       'description_sentiment']
    # Step 3: Define preprocessing steps for the data

    # Preprocessing for numeric features: fill missing values with the mean and standardize the values
    numeric_features = ['max_players', 'max_playtime', 'min_age', 'min_players',
       'min_playtime', 'playing_time', 
       'average_rating', 'users_rated', 'category_count', 'mechanic_count',
       'len_description', 'description_sentiment']
    numeric_transformer = Pipeline(steps=[
        ('imputer', SimpleImputer(strategy='mean')),  # Fill missing values with the mean
        ('scaler', StandardScaler())])  # Standardize features

    # Preprocessing for binary features (if any): fill missing values with most frequent and label encode
    binary_features = ['has_expansion']
    binary_transformer = Pipeline(steps=[
        ('imputer', SimpleImputer(strategy='most_frequent', missing_values=pd.NA)),  # Fill missing values
        ('label', OrdinalEncoder())])  # Convert binary features to ordinal values

    # Preprocessing for categorical features: fill missing values and apply one-hot encoding
    categorical_features = ['category', 'mechanic', 'binned_playtime', 'binned_mechanics',
       'binned_min_age', 'binned_category']
    categorical_transformer = Pipeline(steps=[
        ('imputer', SimpleImputer(strategy='most_frequent', missing_values=pd.NA)),  # Fill missing values
        ('onehot', OneHotEncoder(handle_unknown='ignore'))])  # Apply one-hot encoding

    # Combine all preprocessing steps into a single preprocessor
    preprocessor = ColumnTransformer(
        transformers=[
            ('num', numeric_transformer, numeric_features),  # Numeric features preprocessing
            ('binary', binary_transformer, binary_features),  # Binary features preprocessing (if any)
            ('cat', categorical_transformer, categorical_features)])  # Categorical features preprocessing

    # Step 4: Get the target games ID based on game name
    game_id = df.loc[(df.name == game)].game_id.values[0] 

    # Step 5: Prepare the feature matrix for the Nearest Neighbors model
    X = df.loc[:, feature_cols]  # Select the feature columns

    # Apply preprocessing to the feature matrix
    preprocessor.fit(X)  # Fit the preprocessor to the feature matrix
    X_preprocessed = preprocessor.transform(X)  # Transform the feature matrix

    # Step 6: Initialize the Nearest Neighbors model
    # Define the number of neighbors (game list length) and use cosine similarity as the metric
    k = gamelist_length
    model1 = NearestNeighbors(n_neighbors=k, metric="cosine")

    # Fit the Nearest Neighbors model to the preprocessed data
    model1.fit(X_preprocessed)

    # Step 7: Extract features of the target game
    game_features = df.loc[df.game_id == game_id, feature_cols]  # Get the feature vector for the target track
    game_features_preprocessed = preprocessor.transform(game_features)  # Preprocess the target track features

    # Step 8: Find the nearest neighbors (songs most similar to the target game)
    distances, indices = model1.kneighbors(game_features_preprocessed)  # Get distances and indices of neighbors

    # Step 9: Retrieve the metadata of the recommended games
    games = df.iloc[indices[0]]  # Select tracks corresponding to the nearest neighbors
    games["distance"] = distances[0]  # Add the distance of each neighbor as a new column

    # Step 10: Filter the columns for the final output
    cols = games.columns # you can explicitly choose to return specific columns here
    games = games.loc[:, cols]  # Keep the relevant columns
    games = games.sort_values(by="distance")  # Sort the tracks by their distance (most similar first)

    # Step 11: Return the recommended tracks as a list of dictionaries
    return games.to_dict(orient="records")

In [4]:
# User input
gamelist_length = 10
game = "Catan"

response = make_recommendation(gamelist_length, game) # used for the flask app, returns JSON

# to test
pd.DataFrame(response)

KeyError: "['binned_playtime', 'binned_mechanics', 'binned_min_age', 'binned_category'] not in index"

In [14]:
## FOR THE APP

def make_recommendation_1(name, gamelist_length, max_players, max_playtime, min_age, min_players, min_playtime, average_rating):
    # Step 1: Load the dataset
    df = pd.read_csv("resources/ml_clean.csv")
    
    # Remove any rows with missing values and reset the index
    df = df.dropna(how="any").reset_index(drop=True)

    # Step 2: Define the columns for metadata and features
    meta_cols = ['game_id', 'name']
    feature_cols = ['max_players', 'min_players',
       'average_rating', 'users_rated', 'category_count', 'mechanic_count',
       'has_expansion', 'binned_playtime', 'binned_mechanics',
       'binned_min_age', 'binned_category', 'len_description',
       'description_sentiment']

    # Step 3: Filter the dataset based on input parameters
    df_filtered = df[
        (df['max_players'] <= max_players) &
        (df['max_playtime'] <= max_playtime) &
        (df['min_age'] >= min_age) &
        (df['min_players'] >= min_players) &
        (df['min_playtime'] >= min_playtime) &
        (df['average_rating'] >= average_rating)
    ]

    # Step 4: Preprocess the data
    numeric_features = ['max_players', 'min_players', 
                        'average_rating', 'users_rated', 'category_count', 'mechanic_count', 'len_description', 'description_sentiment']
    numeric_transformer = Pipeline(steps=[
        ('imputer', SimpleImputer(strategy='mean')),
        ('scaler', StandardScaler())])

    binary_features = ['has_expansion']
    binary_transformer = Pipeline(steps=[
        ('imputer', SimpleImputer(strategy='most_frequent')),
        ('label', OrdinalEncoder())])

    categorical_features = ['binned_playtime', 'binned_mechanics',
       'binned_min_age', 'binned_category']
    categorical_transformer = Pipeline(steps=[
        ('imputer', SimpleImputer(strategy='most_frequent')),
        ('onehot', OneHotEncoder(handle_unknown='ignore'))])

    preprocessor = ColumnTransformer(
        transformers=[
            ('num', numeric_transformer, numeric_features),
            ('binary', binary_transformer, binary_features),
            ('cat', categorical_transformer, categorical_features)])

    # Step 5: Prepare the feature matrix for the Nearest Neighbors model
    X = df_filtered.loc[:, feature_cols]  # Select the feature columns

    # Apply preprocessing to the feature matrix
    preprocessor.fit(X)  # Fit the preprocessor to the feature matrix
    X_preprocessed = preprocessor.transform(X)  # Transform the feature matrix

    # Step 6: Initialize the Nearest Neighbors model
    k = gamelist_length
    model1 = NearestNeighbors(n_neighbors=k, metric="cosine")
    model1.fit(X_preprocessed)

    # Step 7: Choose a random game from the filtered list to use as the target
    game_id = df_filtered.loc[(df_filtered.name == name)].game_id.values[0]
    game_features = df_filtered.loc[df_filtered.game_id == game_id, feature_cols]
    game_features_preprocessed = preprocessor.transform(game_features)

    # Step 8: Find the nearest neighbors (most similar games)
    distances, indices = model1.kneighbors(game_features_preprocessed)

    # Step 9: Retrieve the metadata of the recommended games
    games = df_filtered.iloc[indices[0]]
    games["distance"] = distances[0]

    # Step 10: Return the recommended games
    return games.to_dict(orient="records")


In [15]:
# User input
name = 'Catan'
gamelist_length = 10
min_players = 3
max_players = 4
min_playtime = 60
max_playtime = 120
min_age = 5
min_average_rating = 5

response = make_recommendation_1(name, gamelist_length, max_players, max_playtime, min_age, min_players, min_playtime, min_average_rating) # used for the flask app, returns JSON

# to test
pd.DataFrame(response)

Unnamed: 0,game_id,max_players,max_playtime,min_age,min_players,min_playtime,name,playing_time,category,mechanic,average_rating,users_rated,category_count,mechanic_count,has_expansion,binned_playtime,binned_mechanics,binned_min_age,binned_category,len_description,description_sentiment,distance
0,13,4,120,10,3,60,Catan,120,negotiation,dice rolling,7.26569,67655,1,5,1,Long (61-120 minutes),Action & Movement Mechanics,Pre-Teen (10-12),Social/Party,457,0.098867,0.0
1,43111,4,120,13,3,60,Chaos in the Old World,120,fantasy,action point allowance system,7.6853,11213,5,8,1,Long (61-120 minutes),Action & Movement Mechanics,Teen (13+),Fantasy/Adventure,240,0.044097,0.629589
2,117985,4,75,10,3,75,Star Trek: Catan,75,movies / tv / radio theme,dice rolling,7.07678,2206,3,5,1,Long (61-120 minutes),Action & Movement Mechanics,Pre-Teen (10-12),Entertainment/Pop Culture,322,0.154146,0.707363
3,124,4,90,12,3,90,Primordial Soup,90,prehistoric,dice rolling,6.98124,4240,1,5,1,Long (61-120 minutes),Action & Movement Mechanics,Pre-Teen (10-12),History/War,99,0.043056,0.707484
4,67239,4,120,12,3,120,Catan Histories: Settlers of America – Trails ...,120,civilization,dice rolling,6.89457,1761,2,4,0,Long (61-120 minutes),Action & Movement Mechanics,Pre-Teen (10-12),Strategy,218,0.126449,0.768839
5,57998,4,120,12,3,120,Boże Igrzysko,120,civilization,area control / area influence,7.66667,228,4,4,0,Long (61-120 minutes),Strategic Mechanics,Pre-Teen (10-12),Strategy,899,0.138017,0.78364
6,38749,4,60,10,3,60,Catan Geographies: Germany,60,negotiation,dice rolling,6.9088,565,1,5,0,Medium (31-60 minutes),Action & Movement Mechanics,Pre-Teen (10-12),Social/Party,243,0.190693,0.787306
7,3800,4,60,8,3,60,Himalaya,60,economic,action / movement programming,7.03068,1947,3,5,1,Medium (31-60 minutes),Action & Movement Mechanics,Middle Childhood (7-9),Strategy,318,0.0825,0.7891
8,39927,4,120,13,3,120,Last Train to Wensleydale,120,trains,auction/bidding,7.15725,927,1,3,0,Long (61-120 minutes),Action & Movement Mechanics,Teen (13+),Strategy,324,-0.00958,0.797471
9,103091,4,120,12,3,120,Catan Histories: Merchants of Europe,120,civilization,dice rolling,6.79848,348,2,3,0,Long (61-120 minutes),Action & Movement Mechanics,Pre-Teen (10-12),Strategy,328,0.112758,0.808978
