In [1]:
import pandas as pd

In [2]:
game_data = pd.read_csv("game.csv")

In [3]:
game_data.shape

(5000, 3)

In [4]:
game_data.columns

Index(['userId', 'game', 'rating'], dtype='object')

In [5]:
game_data.head()

Unnamed: 0,userId,game,rating
0,3,The Legend of Zelda: Ocarina of Time,4.0
1,6,Tony Hawk's Pro Skater 2,5.0
2,8,Grand Theft Auto IV,4.0
3,10,SoulCalibur,4.0
4,11,Grand Theft Auto IV,4.5


In [6]:
# Importing the TfidfVectorizer from sklearn
from sklearn.feature_extraction.text import TfidfVectorizer

# Creating TfidfVectorizer to remove all stop words
Tfidf = TfidfVectorizer(stop_words="english")

In [7]:
# Checking for the NaN values in category
game_data["rating"].isnull().sum()

0

In [8]:
#creating tfidf matrix
tfidf_matrix = Tfidf.fit_transform(game_data.game)
tfidf_matrix.shape

(5000, 3068)

In [9]:
# To find the similarity scores we import linear_kernel from sklearn
from sklearn.metrics.pairwise import linear_kernel

In [11]:
# Creating Cosine similarity matrix, which will create the matrix of similarities 
# based on the magnitude calculated based on the cosine similarities

cos_sin_matrix = linear_kernel(tfidf_matrix, tfidf_matrix)

In [12]:
cos_sin_matrix

array([[1., 0., 0., ..., 0., 0., 0.],
       [0., 1., 0., ..., 0., 0., 0.],
       [0., 0., 1., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 1., 0., 0.],
       [0., 0., 0., ..., 0., 1., 0.],
       [0., 0., 0., ..., 0., 0., 1.]])

In [13]:
# We now create a series of the game ratings, while removing the duplicate values
game_data_index = pd.Series(game_data.index, index= game_data["userId"]).drop_duplicates()

In [14]:
game_data_index.head(10)

userId
3     0
6     1
8     2
10    3
11    4
12    5
13    6
14    7
16    8
19    9
dtype: int64

In [15]:
# Checking the same for a random movie picked up
game_data_id = game_data_index[269]

game_data_id

89

In [16]:
# We will have to create a user defined function for generating recommendations for the games as under
def get_recommendations(UserId, topN):
    
    #getting the game index sing its userid
    game_data_id = game_data_index[UserId]
    
    # Getting the pair wise similarity score for all the anime's with that
    cosine_scores = list(enumerate(cos_sim_matrix[game_data_id]))
    
    cosine_scores = sorted(cosine_scores, key = lambda x:x[1], reverse=True)
    
    cosine_scores_N = cosine_scores[0:topN+1]
    
    
    # Getting the game index 
    game_data_idx = [i[0] for i in cosine_scores_N]
    
    game_data_scores = [i[1] for i in cosine_scores_N]
    
    
    games_similar = pd.DataFrame(columns=["game", "rating"])
    
    games_similar["game"] = game_data.loc[game_data_idx, "game"]    
    
    games_similar["rating"] = game_data_scores
    
    games_similar.reset_index(inplace = True) 
    
    #games_similar.drop(["game"], axis=1, inplace=True)
    print(games_similar)

In [17]:
# We are trying to recommend using the above defined function top 10 games 
# that stand similar in ratings as that of the game defined in the code

get_recommendations(285, topN=10)

game_data_index[285]


    index                         game    rating
0      95          Burnout 3: Takedown  1.000000
1     108          Burnout 3: Takedown  1.000000
2    4315                      Burnout  0.621807
3    4585                      Burnout  0.621807
4    1102              Burnout Legends  0.456389
5     405              Burnout Revenge  0.428381
6     496              Burnout Revenge  0.428381
7     577              Burnout Revenge  0.428381
8     654             Burnout Paradise  0.425606
9     855             Burnout Paradise  0.425606
10   2814  Burnout Paradise Remastered  0.357666


95