# Configuration

In [1]:
import yaml
import joblib
import pandas as pd
import numpy as np
from os.path import join

In [2]:
with open('config.yaml', 'r') as file:
    config = yaml.safe_load(file)

data_path = config['paths']['data']
model_path = config['paths']['model']

In [3]:
scaled_features = pd.read_csv(join(data_path, 'processed/scaled_features.csv'))
scaled_features.head(5)

Unnamed: 0,Metascore,User_Score,Release_Year,Platform_ Xbox,Platform_3DS,Platform_DC,Platform_DS,Platform_GBA,Platform_GC,Platform_Mobile,...,Genre_Role-Playing,Genre_Rythm,Genre_Sandbox,Genre_Shooter,Genre_Simulation,Genre_Sport,Genre_Sports,Genre_Strategy,Genre_Survival,Genre_Tactical
0,0.751179,0.210426,3.583417,-0.011916,-0.150493,-0.044626,-0.265856,-0.186593,-0.228317,-0.016853,...,-0.336375,-0.011916,-0.016853,-0.375877,-0.212011,-0.016853,-0.39483,-0.20308,-0.035768,-0.020642
1,0.607353,1.11453,3.583417,-0.011916,-0.150493,-0.044626,-0.265856,-0.186593,-0.228317,-0.016853,...,-0.336375,-0.011916,-0.016853,-0.375877,-0.212011,-0.016853,-0.39483,-0.20308,-0.035768,-0.020642
2,0.751179,-0.276399,3.583417,-0.011916,-0.150493,-0.044626,-0.265856,-0.186593,-0.228317,-0.016853,...,-0.336375,-0.011916,-0.016853,-0.375877,-0.212011,-0.016853,-0.39483,-0.20308,-0.035768,-0.020642
3,0.607353,0.001787,3.583417,-0.011916,-0.150493,-0.044626,-0.265856,-0.186593,-0.228317,-0.016853,...,-0.336375,-0.011916,-0.016853,-0.375877,-0.212011,-0.016853,-0.39483,-0.20308,-0.035768,-0.020642
4,1.829878,1.11453,3.143234,-0.011916,-0.150493,-0.044626,-0.265856,-0.186593,-0.228317,-0.016853,...,-0.336375,-0.011916,-0.016853,-0.375877,-0.212011,-0.016853,-0.39483,-0.20308,-0.035768,-0.020642


In [4]:
df = pd.read_csv(join(data_path, 'processed/processed_data.csv'))
df.head(5)

Unnamed: 0,Name,Platform,Publisher,Genre,Metascore,User_Score,Release_Year
0,Senua's Saga: Hellblade II,Xbox,Xbox,Action,81.0,7.5,2024.0
1,Zet Zillion,PC,Raw Fury,Card,79.0,8.8,2024.0
2,Duck Detective: The Secret Salami,PC,Happy Broccoli Games,Adventure,81.0,6.8,2024.0
3,Rakugaki,PC,Gearbox Publishing,Action,79.0,7.2,2024.0
4,Elden Ring,PS5,Bandai Namco Games,Action,96.0,8.8,2022.0


# Model

## Collaborative Filtering

In [5]:
from sklearn.neighbors import NearestNeighbors

nn_model = NearestNeighbors(n_neighbors=11, metric='cosine', algorithm='brute')
nn_model.fit(scaled_features)

In [6]:
vg_distances, vg_indices = nn_model.kneighbors(scaled_features)

print("List of indexes and distances for the first 5 games:\n")
print(vg_indices[:5], "\n")
print(vg_distances[:5])

List of indexes and distances for the first 5 games:

[[   0  124  125  119  123   78   64  139 6995    7 5806]
 [   1   78   64  139 5762 3093 6290 6769 6995 6597 5806]
 [   2 5208 6465 3875 5179 6377 6772 6863 2608 2280 7012]
 [   3   78   64 6995  139 3010 6853 5806 6597 4654 4173]
 [   4  118  100  101   99   74   97  127  103  138   90]] 

[[0.00000000e+00 3.16031201e-01 4.92328767e-01 6.77456977e-01
  7.40733504e-01 9.67059434e-01 9.72477693e-01 9.73308561e-01
  9.78140667e-01 9.78300194e-01 9.80146353e-01]
 [1.11022302e-16 9.71162433e-01 9.75339013e-01 9.76726598e-01
  9.76858533e-01 9.77238134e-01 9.77405486e-01 9.79179104e-01
  9.79445434e-01 9.79736921e-01 9.79788020e-01]
 [1.88737914e-15 9.34397365e-01 9.43484976e-01 9.44150705e-01
  9.47585337e-01 9.48045129e-01 9.48489222e-01 9.48843322e-01
  9.52593229e-01 9.52920948e-01 9.54502876e-01]
 [0.00000000e+00 9.50660013e-01 9.56660598e-01 9.59308862e-01
  9.59609422e-01 9.59838268e-01 9.60743912e-01 9.60759675e-01
  9.60891812e

In [7]:
joblib.dump(nn_model, join(model_path, 'Nearest_Neighbor.pkl'))

['D:/Workspace/AI/Recommendation_System/Game_Recommendation_System/model\\Nearest_Neighbor.pkl']

## Content-Based Filtering

In [8]:
from sklearn.feature_extraction.text import TfidfVectorizer

game_names = df['Name'].drop_duplicates()
game_names = game_names.reset_index(drop=True)

vectorizer = TfidfVectorizer(use_idf=True).fit(game_names)
vectorizer

In [9]:
game_title_vectors = vectorizer.transform(game_names)

print("List of game title vectors for the first 5 games:\n")
print(pd.DataFrame(game_title_vectors.toarray()).head(5))

List of game title vectors for the first 5 games:

   0     1     2     3     4     5     6     7     8     9     ...  3892  \
0   0.0   0.0   0.0   0.0   0.0   0.0   0.0   0.0   0.0   0.0  ...   0.0   
1   0.0   0.0   0.0   0.0   0.0   0.0   0.0   0.0   0.0   0.0  ...   0.0   
2   0.0   0.0   0.0   0.0   0.0   0.0   0.0   0.0   0.0   0.0  ...   0.0   
3   0.0   0.0   0.0   0.0   0.0   0.0   0.0   0.0   0.0   0.0  ...   0.0   
4   0.0   0.0   0.0   0.0   0.0   0.0   0.0   0.0   0.0   0.0  ...   0.0   

   3893  3894  3895  3896  3897  3898  3899  3900  3901  
0   0.0   0.0   0.0   0.0   0.0   0.0   0.0   0.0   0.0  
1   0.0   0.0   0.0   0.0   0.0   0.0   0.0   0.0   0.0  
2   0.0   0.0   0.0   0.0   0.0   0.0   0.0   0.0   0.0  
3   0.0   0.0   0.0   0.0   0.0   0.0   0.0   0.0   0.0  
4   0.0   0.0   0.0   0.0   0.0   0.0   0.0   0.0   0.0  

[5 rows x 3902 columns]


In [10]:
from sklearn.metrics.pairwise import cosine_similarity

query_vector = vectorizer.transform(['NieR: Automata'])
similarity_scores = cosine_similarity(query_vector, game_title_vectors)

print(similarity_scores)

[[0. 0. 0. ... 0. 0. 0.]]


In [11]:
closest_match_index = similarity_scores.argmax()
closest_match_index

np.int64(6)

In [12]:
closest_match_game_name = game_names[closest_match_index-1]
closest_match_game_name

'The Witcher 3: Wild Hunt - Complete Edition'

In [13]:
joblib.dump(vectorizer, join(model_path, 'Vectorizer.pkl'))

['D:/Workspace/AI/Recommendation_System/Game_Recommendation_System/model\\Vectorizer.pkl']

# Test

In [14]:
from fuzzywuzzy import process
from spellchecker import SpellChecker

import warnings
warnings.filterwarnings("ignore")



In [15]:
def VideoGameTitleRecommender(video_game_name):
    '''
    This function will recommend a game title that has the closest match to the input
    '''
    query_vector = vectorizer.transform([video_game_name])
    similarity_scores = cosine_similarity(query_vector, game_title_vectors)

    closest_match_index = similarity_scores.argmax()
    closest_match_game_name = game_names[closest_match_index]
    
   
    spell = SpellChecker(language='en')
    words = video_game_name.split()
    corrected_words = [spell.correction(word) for word in words]
    corrected_game_name = ' '.join(corrected_words)
    closest_match_game_name = process.extractOne(corrected_game_name, game_names)[0]

    return closest_match_game_name

def VideoGameRecommender_Genre(video_game_name, video_game_genre='Any'):
    '''
    This function will provide game recommendations based on various features of the game
    '''
    default_genre = 'Any'

    # User input: Game Title and Genre
    if video_game_genre != default_genre:
        video_game_idx = df.query("Name == @video_game_name & Genre == @video_game_genre").index
        
        if video_game_idx.empty:
            video_game_idx = df.query("Name == @video_game_name").index

            if not video_game_idx.empty:
                print(f"Note: Recommendations will be based on the title of the game as it is not available in the specified genre.\n")
                video_game_genre = default_genre
    
    # User input: Game Title only
    else:
        video_game_idx = df.query("Name == @video_game_name").index  
    
    if video_game_idx.empty:
        # If the game entered by the user doesn't exist in the records, the program will recommend a new game similar to the input
        closest_match_game_name = VideoGameTitleRecommender(video_game_name)

        print(f"'{video_game_name}' doesn't exist in the records.\n")
        print(f"You may want to try '{closest_match_game_name}', which is the closest match to the input.")
    
    else:
        # User input: Game Title only
        if video_game_genre == default_genre:

            # Place in a separate dataframe the indices and distances, then sort the record by distance in ascending order       
            vg_combined_dist_idx_df = pd.DataFrame()
            for idx in video_game_idx:
                # Remove from the list any game that shares the same name as the input
                vg_dist_idx_df = pd.concat([pd.DataFrame(vg_indices[idx][1:]), pd.DataFrame(vg_distances[idx][1:])], axis=1)
                vg_combined_dist_idx_df = pd.concat([vg_combined_dist_idx_df, vg_dist_idx_df])

            vg_combined_dist_idx_df = vg_combined_dist_idx_df.set_axis(['Index', 'Distance'], axis=1)
            vg_combined_dist_idx_df = vg_combined_dist_idx_df.reset_index(drop=True)
            vg_combined_dist_idx_df = vg_combined_dist_idx_df.sort_values(by='Distance', ascending=True)

            video_game_list = df.iloc[vg_combined_dist_idx_df['Index']]

            # Remove any duplicate game names to provide the user with a diverse selection of recommended games
            video_game_list = video_game_list.drop_duplicates(subset=['Name'], keep='first')
            
            # Get the first 10 games in the list
            video_game_list = video_game_list.head(10)

            # Get the distance of the games similar to the input
            recommended_distances = np.array(vg_combined_dist_idx_df['Distance'].head(10))

        # User input: Game Title and Genre
        else:
            # Remove from the list any game that shares the same name as the input
            recommended_idx = vg_indices[video_game_idx[0]][1:]
            video_game_list = df.iloc[recommended_idx]

            # Filter out games of other genres
            video_game_list = video_game_list[video_game_list['Genre'] == video_game_genre]

            # Get the distance of the games similar to the input
            recommended_distances = np.array(vg_distances[video_game_idx[0]][1:])

        print(f"Top 10 Recommended Video Games for '{video_game_name}' [genre:{video_game_genre}]")

        video_game_list = video_game_list.reset_index(drop=True)
        recommended_video_game_list = pd.concat([video_game_list, 
                                                 pd.DataFrame(recommended_distances, columns=['Similarity_Distance'])], axis=1)
        
        recommended_video_game_list['Metascore'].fillna(0, inplace=True)
        recommended_video_game_list['Release_Year'].fillna(0, inplace=True)

        # Convert Metascore and Release Year to integer type
        recommended_video_game_list['Metascore'] = recommended_video_game_list['Metascore'].astype(int)
        recommended_video_game_list['Release_Year'] = recommended_video_game_list['Release_Year'].astype(int)

        display(recommended_video_game_list.style.hide(axis=0))


def VideoGameRecommender_Platform(video_game_name, video_game_platform='Any'):
    '''
    This function will provide game recommendations based on various features of the game
    '''
    default_platform = 'Any'

    # User input: Game Title and Genre
    if video_game_platform != default_platform:
        video_game_idx = df.query("Name == @video_game_name & Platform == @video_game_platform").index
        
        if video_game_idx.empty:
            video_game_idx = df.query("Name == @video_game_name").index

            if not video_game_idx.empty:
                print(f"Note: Recommendations will be based on the title of the game as it is not available in the specified genre.\n")
                video_game_platform = default_platform
    
    # User input: Game Title only
    else:
        video_game_idx = df.query("Name == @video_game_name").index  
    
    if video_game_idx.empty:
        # If the game entered by the user doesn't exist in the records, the program will recommend a new game similar to the input
        closest_match_game_name = VideoGameTitleRecommender(video_game_name)

        print(f"'{video_game_name}' doesn't exist in the records.\n")
        print(f"You may want to try '{closest_match_game_name}', which is the closest match to the input.")
    
    else:
        # User input: Game Title only
        if video_game_platform == default_platform:

            # Place in a separate dataframe the indices and distances, then sort the record by distance in ascending order       
            vg_combined_dist_idx_df = pd.DataFrame()
            for idx in video_game_idx:
                # Remove from the list any game that shares the same name as the input
                vg_dist_idx_df = pd.concat([pd.DataFrame(vg_indices[idx][1:]), pd.DataFrame(vg_distances[idx][1:])], axis=1)
                vg_combined_dist_idx_df = pd.concat([vg_combined_dist_idx_df, vg_dist_idx_df])

            vg_combined_dist_idx_df = vg_combined_dist_idx_df.set_axis(['Index', 'Distance'], axis=1)
            vg_combined_dist_idx_df = vg_combined_dist_idx_df.reset_index(drop=True)
            vg_combined_dist_idx_df = vg_combined_dist_idx_df.sort_values(by='Distance', ascending=True)

            video_game_list = df.iloc[vg_combined_dist_idx_df['Index']]

            # Remove any duplicate game names to provide the user with a diverse selection of recommended games
            video_game_list = video_game_list.drop_duplicates(subset=['Name'], keep='first')
            
            # Get the first 10 games in the list
            video_game_list = video_game_list.head(10)

            # Get the distance of the games similar to the input
            recommended_distances = np.array(vg_combined_dist_idx_df['Distance'].head(10))

        # User input: Game Title and Genre
        else:
            # Remove from the list any game that shares the same name as the input
            recommended_idx = vg_indices[video_game_idx[0]][1:]
            video_game_list = df.iloc[recommended_idx]

            # Filter out games of other genres
            video_game_list = video_game_list[video_game_list['Platform'] == video_game_platform]

            # Get the distance of the games similar to the input
            recommended_distances = np.array(vg_distances[video_game_idx[0]][1:])

        print(f"Top 10 Recommended Video Games for '{video_game_name}' [platform:{video_game_platform}]")

        video_game_list = video_game_list.reset_index(drop=True)
        recommended_video_game_list = pd.concat([video_game_list, 
                                                 pd.DataFrame(recommended_distances, columns=['Similarity_Distance'])], axis=1)

        recommended_video_game_list['Metascore'].fillna(0, inplace=True)
        recommended_video_game_list['Release_Year'].fillna(0, inplace=True)

        # Convert Metascore and Release Year to integer type
        recommended_video_game_list['Metascore'] = recommended_video_game_list['Metascore'].astype(int)
        recommended_video_game_list['Release_Year'] = recommended_video_game_list['Release_Year'].astype(int)

        display(recommended_video_game_list.style.hide(axis=0))

def VideoGameRecommender(Type, *args):
    
    if Type == 'Genre':
        VideoGameRecommender_Genre(*args)
    elif Type == 'Platform':
        VideoGameRecommender_Platform(*args)
    else:
        VideoGameRecommender_Platform(Type)

In [16]:
VideoGameRecommender('NieR: Automata')

Top 10 Recommended Video Games for 'NieR: Automata' [platform:Any]


Name,Platform,Publisher,Genre,Metascore,User_Score,Release_Year,Similarity_Distance
Final Fantasy XIV: Heavensward,PC,Square Enix,Action,86,7.7,2015,0.006572
NieR Replicant,PC,Square Enix,Action,83,9.5,2021,0.007098
Sleeping Dogs,PC,Square Enix,Action,81,8.2,2012,0.012233
Just Cause 2,PC,Square Enix,Action,84,7.7,2010,0.023255
Hitman: Absolution,PC,Square Enix,Action,79,7.0,2012,0.024968
Murdered: Soul Suspect,PC,Square Enix,Action,59,7.4,2014,0.044139
Just Cause 3,PC,Square Enix,Action,74,5.3,2015,0.05595
Lara Croft and the Temple of Osiris,PC,Square Enix,Action,73,5.4,2014,0.056534
Deus Ex: Human Revolution,PC,Square Enix,Shooter,90,8.5,2011,0.121381
Lords of the Fallen,PC,Square Enix,Role-Playing,73,6.9,2014,0.146009


In [17]:
VideoGameRecommender('Neage of Negend')

'Neage of Negend' doesn't exist in the records.

You may want to try 'League of Legends', which is the closest match to the input.


In [18]:
VideoGameRecommender('Platform', 'God of War', 'PS2')

Top 10 Recommended Video Games for 'God of War' [platform:PS2]


Name,Platform,Publisher,Genre,Metascore,User_Score,Release_Year,Similarity_Distance
Shadow of the Colossus,PS2,Sony Computer Entertainment,Action,91,9.1,2005,0.000886
God of War II,PS2,Sony Computer Entertainment,Action,93,8.9,2007,0.002684
ICO,PS2,Sony Computer Entertainment,Action,90,8.8,2001,0.011502
The Mark of Kri,PS2,Sony Computer Entertainment,Action,80,7.9,2002,0.026511
Genji: Dawn of the Samurai,PS2,Sony Computer Entertainment,Action,74,8.3,2005,0.030648
Primal,PS2,Sony Computer Entertainment,Action,73,8.6,2003,0.034691
Ghosthunter,PS2,Sony Computer Entertainment,Action,69,8.7,2003,0.047679
Rise of the Kasai,PS2,Sony Computer Entertainment,Action,68,8.0,2005,0.054098
The Getaway,PS2,Sony Computer Entertainment,Action,72,6.8,2002,0.070712
Extermination,PS2,Sony Computer Entertainment,Action,67,7.8,2001,0.071138


In [19]:
VideoGameRecommender('Platform', 'Final Fantasy', 'PC')

Note: Recommendations will be based on the title of the game as it is not available in the specified genre.

Top 10 Recommended Video Games for 'Final Fantasy' [platform:Any]


Name,Platform,Publisher,Genre,Metascore,User_Score,Release_Year,Similarity_Distance
Final Fantasy II,PSP,Square Enix,JRPG,63,6.6,2007,0.000502
Final Fantasy III,PC,Square Enix,JRPG,77,8.5,2009,0.010227
Final Fantasy IV Pixel Remaster,PC,Square Enix,JRPG,83,7.7,2021,0.01368
Final Fantasy V Rebirth,PS5,Square Enix,JRPG,92,9.1,2024,0.113057
Final Fantasy Anniversary Edition,PSP,Square Enix,Role-Playing,67,7.7,2007,0.805436
Star Ocean: First Departure,PSP,Square Enix,Role-Playing,74,7.4,2007,0.806164
Final Fantasy II Anniversary Edition,PSP,Square Enix,Role-Playing,63,7.1,2007,0.806317
Valkyrie Profile: Lenneth,PSP,Square Enix,Role-Playing,80,8.4,2006,0.806321
Space Invaders Extreme,PSP,Square Enix,Shooter,84,6.8,2008,0.806734
Star Ocean: Second Evolution,PSP,Square Enix,Role-Playing,75,7.1,2008,0.806808


In [20]:
VideoGameRecommender('Genre', 'Valorant', 'FPS')

Top 10 Recommended Video Games for 'Valorant' [genre:FPS]


Name,Platform,Publisher,Genre,Metascore,User_Score,Release_Year,Similarity_Distance
Call of Duty: Modern Warfare II,PC,Activision,FPS,76,5.7,2022,0.360215
Tom Clancy's Rainbow Six Siege,PC,Ubisoft,FPS,79,7.0,2015,0.5737
Doom Eternal,PC,Bethesda Softworks,FPS,88,8.9,2020,0.574465
Counter-Strike 2,PC,Valve Software,FPS,82,4.6,2023,0.603582
Counter-Strike: Global Offensive,PC,Valve Software,FPS,83,7.3,2012,0.713012
Team Fortress 2,PC,Valve Software,FPS,92,8.6,2007,0.715384
Half-Life: Alyx,PC,Valve,FPS,93,9.1,2020,0.716963
Overwatch,PC,Blizzard Entertainment,FPS,91,7.7,2016,0.81567
,,,,0,,0,0.815738
,,,,0,,0,0.956958


In [21]:
VideoGameRecommender('Genre', 'Street Fighter IV', 'Adventure')

Note: Recommendations will be based on the title of the game as it is not available in the specified genre.

Top 10 Recommended Video Games for 'Street Fighter IV' [genre:Any]


Name,Platform,Publisher,Genre,Metascore,User_Score,Release_Year,Similarity_Distance
Marvel vs. Capcom 3: Fate of Two Worlds,PS3,Capcom,Fighting,84,7.3,2011,0.005454
Darkstalkers Resurrection,PS3,Capcom,Fighting,80,7.3,2013,0.013809
Ultimate Marvel vs. Capcom 3,PS3,Capcom,Fighting,80,6.2,2011,0.01385
Street Fighter X Tekken,PS3,Capcom,Fighting,84,5.3,2012,0.0141
Street Fighter IV,X360,Capcom,Fighting,93,7.3,2009,0.022043
Street Fighter Anniversary Collection,PS2,Capcom,Fighting,78,8.7,2004,0.038179
Capcom vs. SNK 2: Mark of the Millennium 2001,PS2,Capcom,Fighting,81,7.9,2001,0.151263
,,,,0,,0,0.151263
,,,,0,,0,0.15779
,,,,0,,0,0.159357
