In [5]:
# Importing the Libraries

import joblib
import pandas as pd
import os
from sklearn.feature_extraction.text import TfidfVectorizer

In [6]:
# Loading the dataset
from google.colab import files

uploader = files.upload()

Saving game.csv to game.csv


In [7]:
# Accessing the dataset

data = pd.read_csv('game.csv')

In [9]:
# Checking the data head

data.head()

Unnamed: 0,userId,game,rating
0,3,The Legend of Zelda: Ocarina of Time,4.0
1,6,Tony Hawk's Pro Skater 2,5.0
2,8,Grand Theft Auto IV,4.0
3,10,SoulCalibur,4.0
4,11,Grand Theft Auto IV,4.5


In [10]:
# Checking the missing values 
data.isna().sum()

userId    0
game      0
rating    0
dtype: int64

In [11]:
# Create a Tfidf Vectorizer to remove all stop words

tfidf = TfidfVectorizer(stop_words = "english")


In [12]:
# Transform a count matrix to a normalized tf-idf representation
tfidf_matrix = tfidf.fit(data.game)

In [13]:
# Save the Pipeline for tfidf matrix

joblib.dump(tfidf_matrix, 'matrix')

os.getcwd()

mat = joblib.load("matrix")

tfidf_matrix = mat.transform(data.game) 


tfidf_matrix.shape # (51, 90)

(5000, 3068)

In [14]:
# cosine(x, y)= (x.y⊺) / (||x||.||y||)
# Computing the cosine similarity on Tfidf matrix
from sklearn.metrics.pairwise import cosine_similarity

cosine_sim_matrix = cosine_similarity(tfidf_matrix, tfidf_matrix)

joblib.dump(cosine_sim_matrix, 'cosine_matrix')

['cosine_matrix']

In [15]:
data_index = pd.Series(data.index, index = data.game).drop_duplicates()

In [17]:
# Example
data_id = data_index["Shovel Knight"]

data_id

game
Shovel Knight     372
Shovel Knight     422
Shovel Knight     737
Shovel Knight    1124
Shovel Knight    1486
dtype: int64

In [23]:
def get_recommendations(game, topN):    
     topN = 10
    # Getting the movie index using its title 
     data_id = data_index[1]
    
    # Getting the pair wise similarity score for all the anime's with that 
    # anime
     cosine_scores = list(enumerate(cosine_sim_matrix[data_id]))
    
    # Sorting the cosine_similarity scores based on scores 
     cosine_scores = sorted(cosine_scores, key = lambda x:x[1], reverse = True)
    
    # Get the scores of top N most similar games 
     cosine_scores_N = cosine_scores[0: topN + 1]
    
    # Getting the movie index 
     data_idx  =  [i[0] for i in cosine_scores_N]
     data_scores =  [i[1] for i in cosine_scores_N]
    
    # Similar Games 
     df_similar_show = pd.DataFrame(columns = ["game", "Score"])
     df_similar_show['game'] = data.loc[data_idx, 'game']
     df_similar_show["Score"] = data_scores
     df_similar_show.reset_index(inplace = True)  
     df_similar_show.drop(["index"], axis=1, inplace=True)
     return(df_similar_show.iloc[1:, ])




In [24]:

rec = get_recommendations("Grand Theft Auto IV", topN = 10)
print(rec)

                        game  Score
1   Tony Hawk's Pro Skater 2    1.0
2   Tony Hawk's Pro Skater 3    1.0
3   Tony Hawk's Pro Skater 2    1.0
4   Tony Hawk's Pro Skater 4    1.0
5   Tony Hawk's Pro Skater 3    1.0
6     Tony Hawk's Pro Skater    1.0
7   Tony Hawk's Pro Skater 4    1.0
8   Tony Hawk's Pro Skater 2    1.0
9   Tony Hawk's Pro Skater 3    1.0
10  Tony Hawk's Pro Skater 3    1.0
