# Configuration

In [1]:
import yaml
import joblib
import pandas as pd
import numpy as np
from os.path import join

In [2]:
with open('config.yaml', 'r') as file:
    config = yaml.safe_load(file)

data_path = config['paths']['data']
model_path = config['paths']['model']

In [3]:
scaled_features = pd.read_csv(join(data_path, 'processed/scaled_features.csv'))
scaled_features.head(5)

Unnamed: 0,Metascore,User_Score,Release_Year,Platform_ Xbox,Platform_3DS,Platform_DC,Platform_DS,Platform_GBA,Platform_GC,Platform_Mobile,...,Genre_Role-Playing,Genre_Rythm,Genre_Sandbox,Genre_Shooter,Genre_Simulation,Genre_Sport,Genre_Sports,Genre_Strategy,Genre_Survival,Genre_Tactical
0,0.751179,0.210426,3.583417,-0.011916,-0.150493,-0.044626,-0.265856,-0.186593,-0.228317,-0.016853,...,-0.336375,-0.011916,-0.016853,-0.375877,-0.212011,-0.016853,-0.39483,-0.20308,-0.035768,-0.020642
1,0.607353,1.11453,3.583417,-0.011916,-0.150493,-0.044626,-0.265856,-0.186593,-0.228317,-0.016853,...,-0.336375,-0.011916,-0.016853,-0.375877,-0.212011,-0.016853,-0.39483,-0.20308,-0.035768,-0.020642
2,0.751179,-0.276399,3.583417,-0.011916,-0.150493,-0.044626,-0.265856,-0.186593,-0.228317,-0.016853,...,-0.336375,-0.011916,-0.016853,-0.375877,-0.212011,-0.016853,-0.39483,-0.20308,-0.035768,-0.020642
3,0.607353,0.001787,3.583417,-0.011916,-0.150493,-0.044626,-0.265856,-0.186593,-0.228317,-0.016853,...,-0.336375,-0.011916,-0.016853,-0.375877,-0.212011,-0.016853,-0.39483,-0.20308,-0.035768,-0.020642
4,1.829878,1.11453,3.143234,-0.011916,-0.150493,-0.044626,-0.265856,-0.186593,-0.228317,-0.016853,...,-0.336375,-0.011916,-0.016853,-0.375877,-0.212011,-0.016853,-0.39483,-0.20308,-0.035768,-0.020642


In [4]:
df = pd.read_csv(join(data_path, 'processed/processed_data.csv'))
df.head(5)

Unnamed: 0,Name,Platform,Publisher,Genre,Metascore,User_Score,Release_Year
0,Senua's Saga: Hellblade II,Xbox,Xbox,Action,81.0,7.5,2024.0
1,Zet Zillion,PC,Raw Fury,Card,79.0,8.8,2024.0
2,Duck Detective: The Secret Salami,PC,Happy Broccoli Games,Adventure,81.0,6.8,2024.0
3,Rakugaki,PC,Gearbox Publishing,Action,79.0,7.2,2024.0
4,Elden Ring,PS5,Bandai Namco Games,Action,96.0,8.8,2022.0


# Model

## Collaborative Filtering

In [5]:
from sklearn.neighbors import NearestNeighbors

nn_model = NearestNeighbors(n_neighbors=11, metric='cosine', algorithm='brute')
nn_model.fit(scaled_features)

In [6]:
vg_distances, vg_indices = nn_model.kneighbors(scaled_features)

print("List of indexes and distances for the first 5 games:\n")
print(vg_indices[:5], "\n")
print(vg_distances[:5])

List of indexes and distances for the first 5 games:

[[   0  124  125  119  123   78   64  139 6995    7 5806]
 [   1   78   64  139 5762 3093 6290 6769 6995 6597 5806]
 [   2 5208 6465 3875 5179 6377 6772 6863 2608 2280 7012]
 [   3   78   64 6995  139 3010 6853 5806 6597 4654 4173]
 [   4  118  100  101   99   74   97  127  103  138   90]] 

[[0.00000000e+00 3.16031201e-01 4.92328767e-01 6.77456977e-01
  7.40733504e-01 9.67059434e-01 9.72477693e-01 9.73308561e-01
  9.78140667e-01 9.78300194e-01 9.80146353e-01]
 [1.11022302e-16 9.71162433e-01 9.75339013e-01 9.76726598e-01
  9.76858533e-01 9.77238134e-01 9.77405486e-01 9.79179104e-01
  9.79445434e-01 9.79736921e-01 9.79788020e-01]
 [1.88737914e-15 9.34397365e-01 9.43484976e-01 9.44150705e-01
  9.47585337e-01 9.48045129e-01 9.48489222e-01 9.48843322e-01
  9.52593229e-01 9.52920948e-01 9.54502876e-01]
 [0.00000000e+00 9.50660013e-01 9.56660598e-01 9.59308862e-01
  9.59609422e-01 9.59838268e-01 9.60743912e-01 9.60759675e-01
  9.60891812e

In [7]:
joblib.dump(nn_model, join(model_path, 'Nearest_Neighbor.pkl'))

['D:/Workspace/Machine_Learning/Recommendation_System/Game_Recommendation_System/model\\Nearest_Neighbor.pkl']

## Content-Based Filtering

In [8]:
from sklearn.feature_extraction.text import TfidfVectorizer

game_names = df['Name'].drop_duplicates()
game_names = game_names.reset_index(drop=True)

vectorizer = TfidfVectorizer(use_idf=True).fit(game_names)
vectorizer

In [9]:
game_title_vectors = vectorizer.transform(game_names)

print("List of game title vectors for the first 5 games:\n")
print(pd.DataFrame(game_title_vectors.toarray()).head(5))

List of game title vectors for the first 5 games:

   0     1     2     3     4     5     6     7     8     9     ...  3892  \
0   0.0   0.0   0.0   0.0   0.0   0.0   0.0   0.0   0.0   0.0  ...   0.0   
1   0.0   0.0   0.0   0.0   0.0   0.0   0.0   0.0   0.0   0.0  ...   0.0   
2   0.0   0.0   0.0   0.0   0.0   0.0   0.0   0.0   0.0   0.0  ...   0.0   
3   0.0   0.0   0.0   0.0   0.0   0.0   0.0   0.0   0.0   0.0  ...   0.0   
4   0.0   0.0   0.0   0.0   0.0   0.0   0.0   0.0   0.0   0.0  ...   0.0   

   3893  3894  3895  3896  3897  3898  3899  3900  3901  
0   0.0   0.0   0.0   0.0   0.0   0.0   0.0   0.0   0.0  
1   0.0   0.0   0.0   0.0   0.0   0.0   0.0   0.0   0.0  
2   0.0   0.0   0.0   0.0   0.0   0.0   0.0   0.0   0.0  
3   0.0   0.0   0.0   0.0   0.0   0.0   0.0   0.0   0.0  
4   0.0   0.0   0.0   0.0   0.0   0.0   0.0   0.0   0.0  

[5 rows x 3902 columns]


In [10]:
from sklearn.metrics.pairwise import cosine_similarity

query_vector = vectorizer.transform(['NieR: Automata'])
similarity_scores = cosine_similarity(query_vector, game_title_vectors)

print(similarity_scores)

[[0. 0. 0. ... 0. 0. 0.]]


In [11]:
closest_match_index = similarity_scores.argmax()
closest_match_index

6

In [12]:
closest_match_game_name = game_names[closest_match_index-1]
closest_match_game_name

'The Witcher 3: Wild Hunt - Complete Edition'

In [13]:
joblib.dump(vectorizer, join(model_path, 'Vectorizer.pkl'))

['D:/Workspace/Machine_Learning/Recommendation_System/Game_Recommendation_System/model\\Vectorizer.pkl']