In [1]:
# Import the required modules
import pandas as pd
pd.set_option('display.max_columns', None)
import numpy as np

# visualization
import matplotlib.pyplot as plt
import seaborn as sns

# Machine Learning
from sklearn.model_selection import train_test_split

# Pre-Processing
from sklearn.preprocessing import StandardScaler, OneHotEncoder, OrdinalEncoder
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.impute import SimpleImputer

# Models
from sklearn.neighbors import NearestNeighbors

# suppress warnings
import warnings
warnings.filterwarnings('ignore')

In [3]:
df = pd.read_csv("resources/games_clean.csv")
print(df.shape)
df.head()

(9507, 17)


Unnamed: 0,game_id,max_players,max_playtime,min_age,min_players,min_playtime,name,playing_time,category,mechanic,average_rating,users_rated,category_count,mechanic_count,has_expansion,len_description,description_sentiment
0,1,5,240,14,3,240,Die Macher,240,economic,area control / area influence,7.66508,4498,3,5,0,222,0.091012
1,2,4,30,12,3,30,Dragonmaster,30,card game,trick-taking,6.60815,478,2,1,0,154,0.055291
2,3,4,60,10,2,30,Samurai,60,abstract strategy,area control / area influence,7.44119,12019,2,4,0,183,-0.05625
3,4,4,60,12,2,60,Tal der Könige,60,ancient,action point allowance system,6.60675,314,1,4,0,104,-0.007908
4,5,6,90,12,3,90,Acquire,90,economic,hand management,7.3583,15195,1,3,0,191,0.168056


In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9507 entries, 0 to 9506
Data columns (total 17 columns):
 #   Column                 Non-Null Count  Dtype  
---  ------                 --------------  -----  
 0   game_id                9507 non-null   int64  
 1   max_players            9507 non-null   int64  
 2   max_playtime           9507 non-null   int64  
 3   min_age                9507 non-null   int64  
 4   min_players            9507 non-null   int64  
 5   min_playtime           9507 non-null   int64  
 6   name                   9507 non-null   object 
 7   playing_time           9507 non-null   int64  
 8   category               9507 non-null   object 
 9   mechanic               9507 non-null   object 
 10  average_rating         9507 non-null   float64
 11  users_rated            9507 non-null   int64  
 12  category_count         9507 non-null   int64  
 13  mechanic_count         9507 non-null   int64  
 14  has_expansion          9507 non-null   int64  
 15  len_

In [5]:
# Drop null song names/artists
df = df.dropna(how="any").reset_index(drop=True)
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9507 entries, 0 to 9506
Data columns (total 17 columns):
 #   Column                 Non-Null Count  Dtype  
---  ------                 --------------  -----  
 0   game_id                9507 non-null   int64  
 1   max_players            9507 non-null   int64  
 2   max_playtime           9507 non-null   int64  
 3   min_age                9507 non-null   int64  
 4   min_players            9507 non-null   int64  
 5   min_playtime           9507 non-null   int64  
 6   name                   9507 non-null   object 
 7   playing_time           9507 non-null   int64  
 8   category               9507 non-null   object 
 9   mechanic               9507 non-null   object 
 10  average_rating         9507 non-null   float64
 11  users_rated            9507 non-null   int64  
 12  category_count         9507 non-null   int64  
 13  mechanic_count         9507 non-null   int64  
 14  has_expansion          9507 non-null   int64  
 15  len_

In [6]:
# Select string columns
string_cols = df.select_dtypes(include=['object'])

# Count unique categories for each string column
unique_counts = string_cols.nunique()
unique_counts

name        9372
category      82
mechanic      51
dtype: int64

In [7]:
df2=df.copy()

In [8]:
df2.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9507 entries, 0 to 9506
Data columns (total 17 columns):
 #   Column                 Non-Null Count  Dtype  
---  ------                 --------------  -----  
 0   game_id                9507 non-null   int64  
 1   max_players            9507 non-null   int64  
 2   max_playtime           9507 non-null   int64  
 3   min_age                9507 non-null   int64  
 4   min_players            9507 non-null   int64  
 5   min_playtime           9507 non-null   int64  
 6   name                   9507 non-null   object 
 7   playing_time           9507 non-null   int64  
 8   category               9507 non-null   object 
 9   mechanic               9507 non-null   object 
 10  average_rating         9507 non-null   float64
 11  users_rated            9507 non-null   int64  
 12  category_count         9507 non-null   int64  
 13  mechanic_count         9507 non-null   int64  
 14  has_expansion          9507 non-null   int64  
 15  len_

In [9]:
df2.columns

Index(['game_id', 'max_players', 'max_playtime', 'min_age', 'min_players',
       'min_playtime', 'name', 'playing_time', 'category', 'mechanic',
       'average_rating', 'users_rated', 'category_count', 'mechanic_count',
       'has_expansion', 'len_description', 'description_sentiment'],
      dtype='object')

In [10]:
# set columns
meta_cols = ['game_id', 'name']
feature_cols = [ 'max_players', 'max_playtime', 'min_age', 'min_players',
       'min_playtime', 'playing_time', 'category', 'mechanic',
       'average_rating', 'users_rated', 'category_count', 'mechanic_count',
       'has_expansion', 'len_description', 'description_sentiment']

In [11]:
# Define Preprocessing Pipelines

# Define preprocessing for numeric features
numeric_features = [ 'max_players', 'max_playtime', 'min_age', 'min_players',
       'min_playtime', 'playing_time', 
       'average_rating', 'users_rated', 'category_count', 'mechanic_count',
       'len_description', 'description_sentiment'] # You can also do this in a loop, select the numeric columns
numeric_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='mean')),
    ('scaler', StandardScaler())])

# Define preprocessing for the binary features
binary_features = ['has_expansion']
binary_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='most_frequent', missing_values=pd.NA)),
    ('label', OrdinalEncoder())])  # Label encode for binary feature

# Define preprocessing for categorical features
categorical_features = ['category', 'mechanic']
categorical_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='most_frequent', missing_values=pd.NA)),
    ('onehot', OneHotEncoder(handle_unknown='ignore'))])

# Combine preprocessing for numeric and categorical features
preprocessor = ColumnTransformer(
    transformers=[
        ('num', numeric_transformer, numeric_features),
        ('binary', binary_transformer, binary_features),
        ('cat', categorical_transformer, categorical_features)])

In [13]:
# Correlation Analysis (this is optional and not needed for the ML Experiment)
# It just shows what the data looks like after transformation before training
# We will still declare a full pipeline of preprocessing + training

# Use only preprocessing pipeline to transform the data
preprocessed_X_train = preprocessor.fit_transform(df2).toarray()

# Convert preprocessed data to a DataFrame
# Get the feature names after one-hot encoding
encoded_feature_names = (numeric_features + binary_features +
                         list(preprocessor.transformers_[2][1]['onehot'].get_feature_names_out(categorical_features)))

df_final = pd.DataFrame(preprocessed_X_train, columns=encoded_feature_names)
df_final[meta_cols] = df2.loc[:, meta_cols]
df_final.head()

Unnamed: 0,max_players,max_playtime,min_age,min_players,min_playtime,playing_time,average_rating,users_rated,category_count,mechanic_count,len_description,description_sentiment,has_expansion,category_abstract strategy,category_action / dexterity,category_adventure,category_age of reason,category_american civil war,category_american indian wars,category_american revolutionary war,category_american west,category_ancient,category_animals,category_arabian,category_aviation / flight,category_bluffing,category_book,category_card game,category_children's game,category_city building,category_civil war,category_civilization,category_collectible components,category_comic book / strip,category_deduction,category_dice,category_economic,category_educational,category_electronic,category_environmental,category_expansion for base-game,category_exploration,category_fantasy,category_farming,category_fighting,category_game system,category_horror,category_humor,category_industry / manufacturing,category_korean war,category_mafia,category_math,category_mature / adult,category_maze,category_medical,category_medieval,category_memory,category_miniatures,category_modern warfare,category_movies / tv / radio theme,category_murder/mystery,category_music,category_mythology,category_napoleonic,category_nautical,category_negotiation,category_novel-based,category_party game,category_pike and shot,category_pirates,category_political,category_post-napoleonic,category_prehistoric,category_print & play,category_puzzle,category_racing,category_real-time,category_religious,category_renaissance,category_science fiction,category_space exploration,category_spies/secret agents,category_sports,category_territory building,category_trains,category_transportation,category_travel,category_trivia,category_video game theme,category_vietnam war,category_wargame,category_word game,category_world war i,category_world war ii,category_zombies,mechanic_acting,mechanic_action / movement programming,mechanic_action point allowance system,mechanic_area control / area influence,mechanic_area enclosure,mechanic_area movement,mechanic_area-impulse,mechanic_auction/bidding,mechanic_betting/wagering,mechanic_campaign / battle card driven,mechanic_card drafting,mechanic_chit-pull system,mechanic_co-operative play,mechanic_commodity speculation,mechanic_crayon rail system,mechanic_deck / pool building,mechanic_dice rolling,mechanic_grid movement,mechanic_hand management,mechanic_hex-and-counter,mechanic_line drawing,mechanic_memory,mechanic_modular board,mechanic_paper-and-pencil,mechanic_partnerships,mechanic_pattern building,mechanic_pattern recognition,mechanic_pick-up and deliver,mechanic_player elimination,mechanic_point to point movement,mechanic_press your luck,mechanic_rock-paper-scissors,mechanic_role playing,mechanic_roll / spin and move,mechanic_route/network building,mechanic_secret unit deployment,mechanic_set collection,mechanic_simulation,mechanic_simultaneous action selection,mechanic_singing,mechanic_stock holding,mechanic_storytelling,mechanic_take that,mechanic_tile placement,mechanic_time track,mechanic_trading,mechanic_trick-taking,mechanic_variable phase order,mechanic_variable player powers,mechanic_voting,mechanic_worker placement,game_id,name
0,-0.025793,0.206217,1.216348,1.407807,0.230367,0.206217,1.484124,1.17858,0.253625,1.682974,0.173831,0.315755,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1,Die Macher
1,-0.086458,-0.096368,0.633186,1.407807,-0.08257,-0.096368,0.218884,-0.152598,-0.494269,-1.013358,-0.310715,0.02933,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,2,Dragonmaster
2,-0.086458,-0.053141,0.050023,-0.094496,-0.08257,-0.053141,1.216107,3.669074,-0.494269,1.008891,-0.10407,-0.86505,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3,Samurai
3,-0.086458,-0.053141,0.633186,-0.094496,-0.037865,-0.053141,0.217208,-0.206905,-1.242162,1.008891,-0.666999,-0.477426,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4,Tal der Könige
4,0.034873,-0.009915,0.633186,1.407807,0.00684,-0.009915,1.116881,4.720771,-1.242162,0.334808,-0.047065,0.933521,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5,Acquire


In [14]:
corrs = df_final.loc[:, [x for x in df_final.columns if x not in meta_cols]].corr()
corrs

Unnamed: 0,max_players,max_playtime,min_age,min_players,min_playtime,playing_time,average_rating,users_rated,category_count,mechanic_count,len_description,description_sentiment,has_expansion,category_abstract strategy,category_action / dexterity,category_adventure,category_age of reason,category_american civil war,category_american indian wars,category_american revolutionary war,category_american west,category_ancient,category_animals,category_arabian,category_aviation / flight,category_bluffing,category_book,category_card game,category_children's game,category_city building,category_civil war,category_civilization,category_collectible components,category_comic book / strip,category_deduction,category_dice,category_economic,category_educational,category_electronic,category_environmental,category_expansion for base-game,category_exploration,category_fantasy,category_farming,category_fighting,category_game system,category_horror,category_humor,category_industry / manufacturing,category_korean war,category_mafia,category_math,category_mature / adult,category_maze,category_medical,category_medieval,category_memory,category_miniatures,category_modern warfare,category_movies / tv / radio theme,category_murder/mystery,category_music,category_mythology,category_napoleonic,category_nautical,category_negotiation,category_novel-based,category_party game,category_pike and shot,category_pirates,category_political,category_post-napoleonic,category_prehistoric,category_print & play,category_puzzle,category_racing,category_real-time,category_religious,category_renaissance,category_science fiction,category_space exploration,category_spies/secret agents,category_sports,category_territory building,category_trains,category_transportation,category_travel,category_trivia,category_video game theme,category_vietnam war,category_wargame,category_word game,category_world war i,category_world war ii,category_zombies,mechanic_acting,mechanic_action / movement programming,mechanic_action point allowance system,mechanic_area control / area influence,mechanic_area enclosure,mechanic_area movement,mechanic_area-impulse,mechanic_auction/bidding,mechanic_betting/wagering,mechanic_campaign / battle card driven,mechanic_card drafting,mechanic_chit-pull system,mechanic_co-operative play,mechanic_commodity speculation,mechanic_crayon rail system,mechanic_deck / pool building,mechanic_dice rolling,mechanic_grid movement,mechanic_hand management,mechanic_hex-and-counter,mechanic_line drawing,mechanic_memory,mechanic_modular board,mechanic_paper-and-pencil,mechanic_partnerships,mechanic_pattern building,mechanic_pattern recognition,mechanic_pick-up and deliver,mechanic_player elimination,mechanic_point to point movement,mechanic_press your luck,mechanic_rock-paper-scissors,mechanic_role playing,mechanic_roll / spin and move,mechanic_route/network building,mechanic_secret unit deployment,mechanic_set collection,mechanic_simulation,mechanic_simultaneous action selection,mechanic_singing,mechanic_stock holding,mechanic_storytelling,mechanic_take that,mechanic_tile placement,mechanic_time track,mechanic_trading,mechanic_trick-taking,mechanic_variable phase order,mechanic_variable player powers,mechanic_voting,mechanic_worker placement
max_players,1.000000,-0.002636,-0.011429,0.075155,-0.003320,-0.002636,-0.028721,-0.000860,0.023899,-0.011697,-0.016640,0.011805,-0.001116,-0.026651,0.029578,-0.008589,-0.010722,-0.020231,-0.004714,-0.003053,-0.003097,-0.012994,-0.003098,-0.002499,0.016463,0.026038,0.007503,0.040385,-0.002335,-0.007187,-0.007305,-0.003512,-0.009442,0.005584,0.004132,0.016524,-0.003059,-0.001382,0.004124,-0.002018,-0.002131,-0.003966,-0.008165,-0.001270,-0.005952,0.033314,-0.001957,0.018042,-0.002577,-0.004332,-0.000917,0.025571,0.006828,-0.000749,-0.002347,-0.008946,0.024137,-0.008510,-0.020344,-0.003363,0.000792,0.002112,-0.002412,-0.019580,-0.011254,-0.000618,-0.001816,0.044176,-0.008220,-0.002151,-0.002237,-0.008463,-0.000790,-0.005406,-0.004731,0.006472,0.000938,-0.001254,-0.004110,-0.010783,-0.001151,-0.001641,0.000600,-0.001530,-0.002323,-0.001526,-0.001001,0.000254,-0.002255,-0.007028,-0.043569,0.012847,-0.000887,-0.002574,-0.000648,0.060814,0.007607,-0.019995,-0.019496,-0.009308,-0.015954,-0.006173,0.004544,0.002330,-0.014146,-0.007246,-0.016499,0.000831,0.000170,0.000600,-0.006440,-0.000414,-0.015624,-0.003916,-0.047724,0.027802,0.055924,-0.008174,0.017734,0.043948,0.000684,0.010435,-0.003226,0.013906,-0.007440,0.007741,-0.004695,0.047321,0.006787,-0.001100,-0.006055,-0.004503,0.000389,0.001606,0.004931,0.000624,0.018042,0.001023,-0.008788,-0.001463,0.001636,0.000330,-0.002806,0.007826,0.123393,-0.003841
max_playtime,-0.002636,1.000000,0.039816,0.064401,0.975326,1.000000,0.054524,-0.006217,-0.006235,0.006029,0.049684,-0.004955,0.001603,-0.025235,-0.019248,-0.005633,0.012784,0.025240,0.005273,0.002161,-0.004967,0.004411,-0.018004,-0.001470,-0.001710,-0.015251,-0.002700,-0.041111,-0.014343,-0.002145,0.010411,0.004983,-0.004647,-0.002936,-0.008169,-0.014262,0.015763,-0.004547,-0.004017,-0.000953,-0.000988,-0.002465,-0.003525,-0.002402,-0.001562,0.000336,-0.002820,-0.005790,-0.001646,0.009764,-0.001377,-0.001715,-0.002784,-0.002378,-0.001275,0.002300,-0.002594,0.001424,0.015464,-0.003467,-0.000288,-0.001934,-0.002122,0.022692,0.017709,0.004330,-0.001051,-0.007883,0.008134,-0.002800,0.004805,0.003438,-0.001621,-0.002888,-0.005014,-0.003204,-0.004792,-0.000928,-0.000357,-0.000124,0.001127,-0.000688,-0.001476,-0.002787,-0.001067,-0.002182,-0.002005,-0.002192,-0.001755,0.005282,0.132299,-0.005070,-0.000767,-0.001398,-0.001245,-0.008338,-0.004330,0.001066,-0.004392,-0.007288,0.022750,0.014295,-0.004222,-0.009975,0.011445,-0.017018,0.026523,-0.004997,-0.001041,0.004493,-0.007108,0.016352,-0.008350,-0.029471,0.089234,-0.004801,-0.014509,-0.008281,-0.005139,-0.009097,-0.013698,-0.011792,-0.003364,-0.004608,-0.005422,-0.006892,-0.004856,-0.001891,-0.010085,0.004736,-0.001895,-0.012217,-0.003042,-0.007091,-0.001456,-0.001653,-0.004729,-0.003020,-0.011189,-0.000942,-0.001023,-0.007682,-0.001210,-0.002786,-0.004532,-0.000725
min_age,-0.011429,0.039816,1.000000,0.037072,0.035911,0.039816,0.172478,0.070322,0.067825,0.166384,0.078679,-0.005046,0.146234,-0.118516,-0.137254,0.036529,0.027996,0.045085,0.013469,0.005881,0.004256,0.037632,-0.116344,0.006114,0.007942,0.027530,-0.000939,0.001390,-0.161918,0.032228,0.000772,0.044104,0.009050,0.012056,0.002472,-0.040133,0.069443,-0.006404,0.017032,0.011085,0.006494,0.012685,0.035893,0.000768,0.038862,-0.012926,0.041445,0.017047,0.008321,0.032012,0.014736,-0.014905,0.051109,-0.013357,-0.000904,0.012738,0.010719,0.001540,0.042784,0.013313,0.007798,0.005683,0.009193,0.009333,0.010595,0.012968,-0.005515,0.041470,0.011033,-0.011921,0.020008,0.010645,0.002117,-0.009167,-0.021931,-0.000520,-0.020012,-0.003504,0.011208,0.033604,0.020468,0.011534,0.004983,0.003712,0.009285,0.005795,-0.016411,0.001851,0.004342,-0.018953,0.049480,-0.021618,0.006494,0.000726,0.019576,0.014158,0.031253,0.079554,0.059019,-0.020801,0.021018,0.007280,0.026056,-0.013722,0.011193,0.033505,-0.015106,0.014785,0.020121,0.008491,0.055775,-0.053007,-0.027673,-0.037210,0.087878,-0.023882,-0.079572,-0.018343,-0.004406,0.016158,-0.092220,-0.094387,-0.014381,-0.028410,-0.029073,-0.038342,-0.004612,0.015701,-0.037910,0.033981,-0.019480,-0.029534,0.004627,-0.019902,-0.002565,-0.016341,0.024397,-0.007320,-0.048980,-0.000469,0.010151,-0.005690,0.025277,-0.006094,0.037983,0.014740
min_players,0.075155,0.064401,0.037072,1.000000,0.071222,0.064401,-0.124563,0.005561,0.028103,-0.028971,-0.060709,0.010233,-0.057994,-0.046103,0.014783,-0.091682,-0.027672,-0.048363,-0.021251,-0.020042,0.002649,-0.004855,0.017208,0.024245,-0.041951,0.196646,-0.011590,0.056090,-0.014028,-0.021155,-0.032622,0.009790,-0.013480,-0.004015,0.051421,-0.060347,0.041381,-0.013544,0.008771,-0.007941,-0.016378,-0.012807,-0.024626,0.003154,-0.035474,-0.001679,-0.021553,0.099713,-0.000890,-0.021162,0.018894,-0.015952,0.038215,-0.002742,-0.008391,0.013634,0.041701,-0.021014,-0.047075,-0.001883,0.035406,0.029956,-0.003066,-0.058614,-0.012468,0.054531,-0.021809,0.145680,-0.015445,0.012169,0.014831,-0.025926,0.018442,-0.019716,-0.033095,0.004123,-0.017520,-0.001371,0.012361,-0.026136,-0.017350,-0.014216,-0.022632,0.008213,0.002869,0.003830,0.002229,0.009332,-0.010576,-0.016711,-0.088903,0.003612,-0.000969,-0.012267,0.003917,0.221407,-0.021244,-0.059830,0.005552,-0.019117,-0.040293,-0.016921,0.097738,0.052745,-0.052836,-0.031365,-0.065838,-0.120899,0.015720,0.000780,-0.038053,-0.081425,-0.018227,0.048846,-0.095870,0.073598,0.031217,-0.021229,0.035339,0.208914,-0.039664,-0.001094,0.004067,0.032773,-0.008370,0.021530,0.008604,0.096014,0.003805,0.018090,-0.002362,0.017721,-0.033535,0.062222,0.025012,0.010729,0.061925,0.018442,-0.024711,-0.001939,0.022161,0.083334,0.006683,0.002351,0.098623,-0.014201
min_playtime,-0.003320,0.975326,0.035911,0.071222,1.000000,0.975326,0.035431,-0.009339,-0.007600,-0.002485,0.037698,-0.009182,-0.002689,-0.022573,-0.017116,-0.004936,0.013996,0.026591,0.004010,0.001659,-0.004342,0.005147,-0.016951,-0.000777,-0.000079,-0.014049,-0.001861,-0.037284,-0.012565,-0.002931,0.010620,0.003243,-0.003692,-0.002690,-0.007373,-0.013021,0.012961,-0.004108,-0.003737,-0.000939,-0.000847,-0.001236,-0.003514,-0.002064,-0.000633,-0.001114,-0.003200,-0.004904,-0.001687,0.005938,-0.000816,-0.001484,-0.002415,-0.001964,-0.001057,0.003209,-0.002129,0.002053,0.016011,-0.003050,0.000199,-0.001558,-0.001640,0.015626,0.009479,0.004998,-0.000459,-0.006322,0.009114,-0.002354,0.004201,0.003832,-0.001009,-0.002203,-0.004638,-0.002854,-0.004401,-0.000711,-0.000539,0.001028,0.001058,-0.001375,-0.000510,-0.001935,-0.000954,-0.001612,-0.001726,-0.002525,-0.001511,0.002958,0.118210,-0.004300,-0.000618,-0.001198,-0.001232,-0.007196,-0.004413,-0.003292,-0.004996,-0.006596,0.014306,0.015696,-0.002443,-0.008555,0.011341,-0.016354,0.012004,-0.005364,-0.000781,0.003498,-0.007399,0.016185,-0.007788,-0.026254,0.088665,-0.004123,-0.012950,-0.006819,-0.004376,-0.008035,-0.012162,-0.010453,-0.002727,-0.004890,-0.004219,-0.006253,-0.004231,-0.000917,-0.007843,0.005554,-0.001384,-0.010783,-0.001914,-0.006239,-0.001202,-0.001197,-0.004281,-0.003402,-0.009829,-0.000968,-0.000426,-0.006511,-0.001422,-0.001858,-0.003402,-0.001514
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
mechanic_trick-taking,0.000330,-0.007682,-0.005690,0.083334,-0.006511,-0.007682,-0.028222,-0.019104,-0.084955,-0.087334,-0.016484,-0.003160,-0.047007,-0.023475,-0.016530,-0.020846,-0.008211,-0.010223,-0.002226,-0.002404,0.002600,-0.017672,0.004713,-0.005970,-0.011514,-0.014734,-0.003637,0.159539,-0.013149,-0.012094,-0.004548,-0.007791,-0.006879,-0.005970,-0.011023,-0.018147,-0.021628,-0.005461,-0.004983,-0.002874,-0.000908,-0.009086,-0.014969,-0.003858,-0.010550,-0.001574,-0.006999,-0.007462,-0.004067,-0.003149,-0.003149,-0.002032,-0.002404,-0.002570,-0.002404,-0.010749,-0.003401,-0.007737,-0.010182,-0.008211,-0.002570,-0.002570,-0.002874,-0.010469,-0.008610,-0.005147,-0.003015,-0.009887,-0.003637,-0.003521,-0.004899,-0.003858,-0.003749,-0.004814,-0.004456,-0.007176,-0.004362,-0.001285,-0.004814,-0.010306,-0.001817,-0.002404,-0.004814,-0.004362,-0.006040,-0.003637,-0.002726,-0.003277,-0.001574,-0.003149,-0.021628,-0.004814,-0.000908,-0.001285,-0.002226,-0.009178,-0.012304,-0.022306,-0.024913,-0.008004,-0.017316,-0.004727,-0.021607,-0.011588,-0.012164,-0.022597,-0.008755,-0.016557,-0.008108,-0.003277,-0.010630,-0.035908,-0.011061,-0.029924,-0.024777,-0.004814,-0.012750,-0.012269,-0.005461,-0.010510,-0.011365,-0.009450,-0.008561,-0.004814,-0.007845,-0.006440,-0.004266,-0.005384,-0.014734,-0.006694,-0.004899,-0.013727,-0.005611,-0.008159,-0.001574,-0.003015,-0.005970,-0.003749,-0.012234,-0.001817,-0.003277,1.000000,-0.002874,-0.006243,-0.005384,-0.004456
mechanic_variable phase order,-0.002806,-0.001210,0.025277,0.006683,-0.001422,-0.001210,0.014823,0.046272,0.013084,-0.026321,-0.006475,0.032864,0.016041,0.004503,-0.006056,-0.007637,-0.003008,-0.003746,-0.000815,-0.000881,-0.003346,-0.006475,0.009058,-0.002187,-0.004218,0.007522,-0.001332,0.000055,-0.004817,0.043986,-0.001666,-0.002854,-0.002520,-0.002187,-0.004038,0.009854,0.006157,-0.002001,-0.001826,-0.001053,-0.000333,-0.003329,-0.005484,-0.001413,-0.003865,-0.000577,-0.002564,0.036057,-0.001490,-0.001154,-0.001154,-0.000744,-0.000881,-0.000942,-0.000881,-0.003938,-0.001246,-0.002835,-0.003730,-0.003008,-0.000942,-0.000942,-0.001053,-0.003836,-0.003154,-0.001886,-0.001104,-0.003622,-0.001332,-0.001290,-0.001795,-0.001413,-0.001373,-0.001764,-0.001632,-0.002629,-0.001598,-0.000471,-0.001764,-0.003776,-0.000666,-0.000881,-0.001764,-0.001598,-0.002213,-0.001332,-0.000999,-0.001201,-0.000577,-0.001154,-0.007924,-0.001764,-0.000333,-0.000471,-0.000815,-0.003363,-0.004508,-0.008172,-0.009127,-0.002932,-0.006344,-0.001732,-0.007916,-0.004246,-0.004457,-0.008279,-0.003208,-0.006066,-0.002970,-0.001201,-0.003895,-0.013155,-0.004052,-0.010963,-0.009078,-0.001764,-0.004671,-0.004495,-0.002001,-0.003850,-0.004164,-0.003462,-0.003137,-0.001764,-0.002874,-0.002359,-0.001563,-0.001973,-0.005398,-0.002453,-0.001795,-0.005029,-0.002056,-0.002989,-0.000577,-0.001104,-0.002187,-0.001373,-0.004482,-0.000666,-0.001201,-0.002874,1.000000,-0.002287,-0.001973,-0.001632
mechanic_variable player powers,0.007826,-0.002786,-0.006094,0.002351,-0.001858,-0.002786,-0.024177,-0.015864,0.026850,-0.070417,-0.026924,-0.012843,-0.003634,-0.006570,0.003476,-0.003140,-0.006534,-0.008136,-0.001771,-0.001913,-0.007267,0.009382,-0.014983,-0.004751,-0.009163,0.004602,0.070281,0.013633,-0.010464,0.001564,-0.003619,-0.006200,0.072232,0.039949,0.015702,-0.006814,-0.017212,-0.004346,-0.003966,-0.002287,-0.000723,-0.007231,-0.002785,-0.003070,0.004373,-0.001252,0.013526,-0.005938,-0.003236,-0.002506,-0.002506,-0.001617,-0.001913,-0.002046,-0.001913,-0.008555,-0.002707,0.011141,-0.008103,-0.006534,-0.002046,-0.002046,-0.002287,-0.008332,-0.006852,0.021797,-0.002399,-0.007868,-0.002894,-0.002802,0.023296,-0.003070,-0.002983,-0.003831,-0.003546,0.012921,-0.003471,-0.001022,-0.003831,0.017924,-0.001446,0.053376,-0.003831,-0.003471,-0.004806,-0.002894,-0.002170,-0.002608,-0.001252,-0.002506,-0.017212,-0.003831,-0.000723,-0.001022,-0.001771,-0.007304,-0.009792,-0.017751,-0.019826,-0.006369,-0.013780,-0.003762,-0.017195,-0.009222,-0.009680,-0.017983,-0.006968,-0.013176,-0.006452,-0.002608,-0.008460,-0.028576,-0.008803,-0.023814,-0.019718,-0.003831,-0.010146,-0.009764,-0.004346,-0.008364,-0.009045,-0.007520,-0.006813,-0.003831,-0.006243,-0.005125,-0.003395,-0.004285,-0.011725,-0.005327,-0.003899,-0.010924,-0.004465,-0.006493,-0.001252,-0.002399,-0.004751,-0.002983,-0.009736,-0.001446,-0.002608,-0.006243,-0.002287,1.000000,-0.004285,-0.003546
mechanic_voting,0.123393,-0.004532,0.037983,0.098623,-0.003402,-0.004532,-0.058275,-0.013891,-0.024850,-0.061599,-0.026263,-0.009840,-0.010636,-0.016111,-0.001715,-0.014307,-0.005635,-0.007016,-0.001528,-0.001650,-0.006267,-0.012129,-0.012921,-0.004097,-0.007902,0.058067,-0.002496,0.008797,-0.009024,-0.008300,-0.003121,0.014549,-0.004721,-0.004097,0.006607,-0.012454,-0.014844,0.024530,-0.003420,-0.001973,-0.000623,-0.006236,-0.010273,-0.002648,-0.007241,-0.001080,-0.004804,0.036403,-0.002791,-0.002161,-0.002161,-0.001394,0.126408,-0.001764,-0.001650,-0.007377,-0.002334,-0.005310,-0.006988,-0.005635,-0.001764,-0.001764,-0.001973,-0.007185,-0.005909,0.056440,-0.002069,0.071980,-0.002496,-0.002416,0.028132,-0.002648,-0.002573,-0.003304,-0.003058,-0.004925,-0.002994,-0.000882,-0.003304,-0.007073,-0.001247,-0.001650,-0.003304,-0.002994,-0.004145,-0.002496,-0.001871,-0.002249,-0.001080,-0.002161,-0.007307,-0.003304,-0.000623,-0.000882,-0.001528,-0.006299,-0.008445,-0.015309,-0.017098,-0.005493,-0.011884,-0.003244,-0.014829,-0.007953,-0.008348,-0.015509,-0.006009,-0.011363,-0.005564,-0.002249,-0.007296,-0.024644,-0.007592,-0.020537,-0.017005,-0.003304,-0.008750,-0.008421,-0.003748,-0.007213,-0.007800,-0.006485,-0.005876,-0.003304,-0.005384,-0.004420,-0.002928,-0.003695,-0.010112,-0.004594,-0.003362,-0.009421,-0.003851,-0.005600,-0.001080,-0.002069,-0.004097,-0.002573,-0.008397,-0.001247,-0.002249,-0.005384,-0.001973,-0.004285,1.000000,-0.003058


In [16]:
# plt.figure(figsize=(18,10))
# sns.heatmap(corrs, annot=True)
# plt.show()

In [17]:
# User inputs
playlist_length = 20
game = "Catan" #Circles


game_id = df2.loc[(df2.name == game) ].game_id.values[0]
game_id

13

In [18]:
# Step 1: Get the data (NOTE: this is the original dataframe, not the one used for the correlations)
X = df2.loc[:, feature_cols]

# Preprocess the data
X_preprocessed = preprocessor.transform(X)

# NO TARGET OR TRAIN TEST SPLIT
X.head()

Unnamed: 0,max_players,max_playtime,min_age,min_players,min_playtime,playing_time,category,mechanic,average_rating,users_rated,category_count,mechanic_count,has_expansion,len_description,description_sentiment
0,5,240,14,3,240,240,economic,area control / area influence,7.66508,4498,3,5,0,222,0.091012
1,4,30,12,3,30,30,card game,trick-taking,6.60815,478,2,1,0,154,0.055291
2,4,60,10,2,30,60,abstract strategy,area control / area influence,7.44119,12019,2,4,0,183,-0.05625
3,4,60,12,2,60,60,ancient,action point allowance system,6.60675,314,1,4,0,104,-0.007908
4,6,90,12,3,90,90,economic,hand management,7.3583,15195,1,3,0,191,0.168056


In [19]:
# define the number of nearest neighbors to consider
k = playlist_length

# Initialize the nearest neighbors model
model1 = NearestNeighbors(n_neighbors=k, metric="euclidean")

# Fit the model to the preprocessed data
model1.fit(X_preprocessed)

In [20]:
# Extract the features for the specific track
game_features = df2.loc[df2.game_id == game_id, feature_cols]
game_features_preprocessed = preprocessor.transform(game_features)

# Find the nearest neighbors
distances, indices = model1.kneighbors(game_features_preprocessed)

# get the track names of the nearest neighbors
games = df2.iloc[indices[0]]
games["distance"] = distances[0]

# filter columns
cols = games.columns

games = games.loc[:, cols]
games = games.sort_values(by = "distance")
games.head(10)

Unnamed: 0,game_id,max_players,max_playtime,min_age,min_players,min_playtime,name,playing_time,category,mechanic,average_rating,users_rated,category_count,mechanic_count,has_expansion,len_description,description_sentiment,distance
12,13,4,120,10,3,60,Catan,120,negotiation,dice rolling,7.26569,67655,1,5,1,457,0.098867,0.0
4702,30549,4,45,8,2,45,Pandemic,45,medical,action point allowance system,7.67234,62377,1,7,1,237,0.114088,3.76861
544,822,5,45,8,2,30,Carcassonne,45,city building,area control / area influence,7.43536,67056,3,2,1,204,-0.013889,4.135336
5041,36218,4,30,13,2,30,Dominion,30,card game,card drafting,7.69995,55930,2,3,1,289,0.130288,5.126014
5937,68448,7,30,10,2,30,7 Wonders,30,ancient,card drafting,7.83595,51688,4,5,1,252,-0.012574,6.535561
3052,9209,5,60,8,2,30,Ticket to Ride,60,trains,hand management,7.48301,48227,2,3,1,229,0.042562,7.300253
4739,31260,5,150,12,1,30,Agricola,150,animals,card drafting,8.05581,48261,3,4,1,343,0.082667,7.673573
1540,3076,5,150,12,2,90,Puerto Rico,150,city building,variable phase order,8.09157,47789,3,1,1,381,-0.017889,7.844523
5348,40692,5,80,8,2,40,Small World,80,fantasy,area control / area influence,7.35735,43144,3,4,1,250,0.084914,8.796502
1369,2651,6,120,12,2,120,Power Grid,120,economic,auction/bidding,7.94499,42036,2,2,1,215,0.051769,9.328175


In [None]:
# try other distances

# define the number of nearest neighbors to consider
k = playlist_length

# Initialize the nearest neighbors model
model2 = NearestNeighbors(n_neighbors=k, metric="manhattan")

# Fit the model to the preprocessed data
model2.fit(X_preprocessed)

In [None]:
# Extract the features for the specific track
track_features = df2.loc[df2.track_id == track_id, feature_cols]
track_features_preprocessed = preprocessor.transform(track_features)

# Find the nearest neighbors
distances, indices = model2.kneighbors(track_features_preprocessed)

# get the track names of the nearest neighbors
tracks = df2.iloc[indices[0]]
tracks["distance"] = distances[0]

# filter columns
cols = tracks.columns

tracks = tracks.loc[:, cols]
tracks = tracks.sort_values(by = "distance")
tracks.head(10)

In [None]:
# try other distances

# define the number of nearest neighbors to consider
k = playlist_length

# Initialize the nearest neighbors model
model3 = NearestNeighbors(n_neighbors=k, metric="cosine")

# Fit the model to the preprocessed data
model3.fit(X_preprocessed)

In [None]:
# Extract the features for the specific track
track_features = df2.loc[df2.track_id == track_id, feature_cols]
track_features_preprocessed = preprocessor.transform(track_features)

# Find the nearest neighbors
distances, indices = model3.kneighbors(track_features_preprocessed)

# get the track names of the nearest neighbors
tracks = df2.iloc[indices[0]]
tracks["distance"] = distances[0]

# filter columns
cols = tracks.columns

tracks = tracks.loc[:, cols]
tracks = tracks.sort_values(by = "distance")
tracks.head(10)