In [1]:
# Importing all required libraries, modules
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import joblib

In [2]:
# import Dataset 
game = pd.read_csv(r"game.csv", encoding = 'utf8')

In [3]:
# Droping of duplicates 
game.drop_duplicates(subset="game", keep="first", inplace=True)

In [4]:
game.reset_index(drop=True, inplace=True)

In [6]:
# Create a Tfidf Vectorizer to remove all stop words
tfidf = TfidfVectorizer(stop_words = "english")   # taking stop words from tfidf vectorizer 

# Transform a count matrix to a normalized tf-idf representation
tfidf_matrix = tfidf.fit(game.game)  

In [7]:
# Save the Pipeline for tfidf matrix
joblib.dump(tfidf_matrix, 'matrix')

['matrix']

In [8]:
tfidf_matrix = tfidf_matrix.transform(game.game)

tfidf_matrix.shape 

(3438, 3068)

In [9]:
# cosine(x, y)= (x.y⊺) / (||x||.||y||)
# Computing the cosine similarity on Tfidf matrix

cosine_sim_matrix = cosine_similarity(tfidf_matrix, tfidf_matrix)

# Create a mapping of anime name to index number
game_index = pd.Series(game.index, index=game['game'])

In [10]:
# Example
game_id = game_index['SoulCalibur']

game_id

3

In [12]:
# Custom Function to Find the TopN Movies to be Recommended

def get_recommendations(Tittle, topN):    
    # topN = 10
    # Getting the movie index using its title 
    game_id = game_index[Tittle]
    
    # Getting the pair wise similarity score for all the anime's with that 
    # anime
    cosine_scores = list(enumerate(cosine_sim_matrix[game_id]))
    
    # Sorting the cosine_similarity scores based on scores 
    cosine_scores = sorted(cosine_scores, key = lambda x:x[1], reverse = True)
    
    # Get the scores of top N most similar movies 
    cosine_scores_N = cosine_scores[0: topN + 1]
    
    # Getting the movie index 
    game_idx  =  [i[0] for i in cosine_scores_N]
    game_scores =  [i[1] for i in cosine_scores_N]
    
    # Similar movies and scores
    game_similar_show = pd.DataFrame(columns = ["game", "rating"])
    game_similar_show["game"] = game.loc[game_idx, "game"]
    game_similar_show["rating"] = game_scores
    game_similar_show.reset_index(inplace = True)  
    
    return(game_similar_show.iloc[1:, ])

In [13]:
rec = get_recommendations("Perfect Dark", topN = 10)
rec

Unnamed: 0,index,game,rating
1,2233,Perfect Dark Zero,0.839272
2,1207,TimeSplitters: Future Perfect,0.47248
3,406,Dark Souls,0.381355
4,1162,Radiant Historia: Perfect Chronology,0.378601
5,190,Dark Souls II,0.337531
6,2865,Enslaved: Odyssey to the West - Pigsy's Perfec...,0.325176
7,753,Dark Cloud 2,0.324736
8,2599,Dark Cloud,0.324736
9,377,Dark Souls III,0.319866
10,947,Dark Souls Remastered,0.30251
