In [1]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel

In [2]:
game = pd.read_csv("game.csv")
print(game.columns)

game.game

Index(['userId', 'game', 'rating'], dtype='object')


0       The Legend of Zelda: Ocarina of Time
1                   Tony Hawk's Pro Skater 2
2                        Grand Theft Auto IV
3                                SoulCalibur
4                        Grand Theft Auto IV
                        ...                 
4995                            Donut County
4996                  MotorStorm: Apocalypse
4997                            The Last Guy
4998           Valiant Hearts: The Great War
4999                           Mothergunship
Name: game, Length: 5000, dtype: object

In [3]:
# Creating a Tfidf Vectorizer to remove all stop words
tfidf = TfidfVectorizer(stop_words = "english")   

# Preparing the Tfidf matrix by fitting and transforming
tfidf_matrix = tfidf.fit_transform(game.game)   #Transform a count matrix to a normalized tf or tf-idf representation
tfidf_matrix.shape 

(5000, 3068)

In [4]:
#we have to find similarity score , here we will calculate the cosine simalarity.
#cosine(x,y)= (x.y⊺)/(||x||.||y||)

# Computing the cosine similarity on Tfidf matrix
cosine_sim_matrix = linear_kernel(tfidf_matrix, tfidf_matrix)
cosine_sim_matrix

array([[1., 0., 0., ..., 0., 0., 0.],
       [0., 1., 0., ..., 0., 0., 0.],
       [0., 0., 1., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 1., 0., 0.],
       [0., 0., 0., ..., 0., 1., 0.],
       [0., 0., 0., ..., 0., 0., 1.]])

In [15]:
#creating mapping
game_index = pd.Series(game.index, index = game['game']).drop_duplicates()
game_index
userId = game_index["Grand Theft Auto IV"]
userId

game
Grand Theft Auto IV      2
Grand Theft Auto IV      4
Grand Theft Auto IV    474
dtype: int64

In [16]:
def get_recommendations(Name, topN):    
    
    userId = game_index[Name]
    
    cosine_scores = list(enumerate(cosine_sim_matrix[userId]))
    
    
    cosine_scores = sorted(cosine_scores, key=lambda x:x[1], reverse = True)
    
    
    cosine_scores_N = cosine_scores[0: topN+1]
    
     
    game_idx  =  [i[0] for i in cosine_scores_N]
    game_scores =  [i[1] for i in cosine_scores_N]
    
    # Similar movies and scores
    game_similar_show = pd.DataFrame(columns=["game", "Score"])
    game_similar_show["game"] = game.loc[game_idx, "game"]
    game_similar_show["Score"] = game_scores
    game_similar_show.reset_index(inplace = True)  
   
    print (game_similar_show)
    

In [18]:
# Enter your anime and number of anime's to be recommended 
get_recommendations("Super Mario Galaxy 2", topN = 5)


   index                                        game     Score
0      5                          Super Mario Galaxy  1.000000
1      6                        Super Mario Galaxy 2  1.000000
2    202    Super Mario World: Super Mario Advance 2  0.623041
3     90  Super Mario Advance 4: Super Mario Bros. 3  0.600099
4   1856                         Super Mario Advance  0.544555
5    106                              Super Mario 64  0.525323
