In [3]:
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel

In [4]:
games_df = pd.read_csv('games_of_all_time.csv')
games_df.head(5)

Unnamed: 0,game_name,meta_score,user_score,platform,description,url,developer,genre,type,rating
0,The Legend of Zelda: Ocarina of Time,99.0,91.0,['nintendo-64'],"As a young boy, Link is tricked by Ganondorf, ...",https://www.metacritic.com/game/nintendo-64/th...,Nintendo,"['Action Adventure', 'Fantasy']",singleplayer,E
1,Super Mario Galaxy,97.0,91.0,['wii'],[Metacritic's 2007 Wii Game of the Year] The u...,https://www.metacritic.com/game/wii/super-mari...,Nintendo,"['Action', 'Platformer', '3D']",singleplayer,E
2,Super Mario Galaxy 2,97.0,91.0,['wii'],"Super Mario Galaxy 2, the sequel to the galaxy...",https://www.metacritic.com/game/wii/super-mari...,Nintendo EAD Tokyo,"['Action', 'Platformer', '3D']",singleplayer,E
3,Metroid Prime,97.0,89.0,['gamecube'],Samus returns in a new mission to unravel the ...,https://www.metacritic.com/game/gamecube/metro...,Retro Studios,"['Action', 'Shooter', 'First-Person', 'Sci-Fi']",singleplayer,T
4,Super Mario Odyssey,97.0,89.0,['switch'],New Evolution of Mario Sandbox-Style Gameplay....,https://www.metacritic.com/game/switch/super-m...,Nintendo,"['Action', 'Platformer', '3D']",singleplayer,E10+


In [5]:
games_df.shape

(8831, 10)

In [6]:
games_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8831 entries, 0 to 8830
Data columns (total 10 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   game_name    8831 non-null   object 
 1   meta_score   8831 non-null   float64
 2   user_score   8831 non-null   float64
 3   platform     8831 non-null   object 
 4   description  8831 non-null   object 
 5   url          8831 non-null   object 
 6   developer    8821 non-null   object 
 7   genre        8827 non-null   object 
 8   type         6727 non-null   object 
 9   rating       7691 non-null   object 
dtypes: float64(2), object(8)
memory usage: 690.0+ KB


In [7]:
columns = ['game_name','platform' , 'description' , 'developer' , 'genre']

In [8]:
games_df[columns].head(5)

Unnamed: 0,game_name,platform,description,developer,genre
0,The Legend of Zelda: Ocarina of Time,['nintendo-64'],"As a young boy, Link is tricked by Ganondorf, ...",Nintendo,"['Action Adventure', 'Fantasy']"
1,Super Mario Galaxy,['wii'],[Metacritic's 2007 Wii Game of the Year] The u...,Nintendo,"['Action', 'Platformer', '3D']"
2,Super Mario Galaxy 2,['wii'],"Super Mario Galaxy 2, the sequel to the galaxy...",Nintendo EAD Tokyo,"['Action', 'Platformer', '3D']"
3,Metroid Prime,['gamecube'],Samus returns in a new mission to unravel the ...,Retro Studios,"['Action', 'Shooter', 'First-Person', 'Sci-Fi']"
4,Super Mario Odyssey,['switch'],New Evolution of Mario Sandbox-Style Gameplay....,Nintendo,"['Action', 'Platformer', '3D']"


In [9]:
games_df[columns].isna().sum()

game_name       0
platform        0
description     0
developer      10
genre           4
dtype: int64

In [10]:
games_df['developer'] = games_df['developer'].fillna('Unknown')
games_df['genre'] = games_df['genre'].fillna('Action')

In [19]:
games_df['important_features'] = games_df['game_name']+''+games_df['platform']+''+games_df['description']+''+games_df['developer']+''+games_df['genre']

games_df.head(5)

Unnamed: 0,game_name,meta_score,user_score,platform,description,url,developer,genre,type,rating,important_features
0,The Legend of Zelda: Ocarina of Time,99.0,91.0,['nintendo-64'],"As a young boy, Link is tricked by Ganondorf, ...",https://www.metacritic.com/game/nintendo-64/th...,Nintendo,"['Action Adventure', 'Fantasy']",singleplayer,E,The Legend of Zelda: Ocarina of Time['nintendo...
1,Super Mario Galaxy,97.0,91.0,['wii'],[Metacritic's 2007 Wii Game of the Year] The u...,https://www.metacritic.com/game/wii/super-mari...,Nintendo,"['Action', 'Platformer', '3D']",singleplayer,E,Super Mario Galaxy['wii'][Metacritic's 2007 Wi...
2,Super Mario Galaxy 2,97.0,91.0,['wii'],"Super Mario Galaxy 2, the sequel to the galaxy...",https://www.metacritic.com/game/wii/super-mari...,Nintendo EAD Tokyo,"['Action', 'Platformer', '3D']",singleplayer,E,Super Mario Galaxy 2['wii']Super Mario Galaxy ...
3,Metroid Prime,97.0,89.0,['gamecube'],Samus returns in a new mission to unravel the ...,https://www.metacritic.com/game/gamecube/metro...,Retro Studios,"['Action', 'Shooter', 'First-Person', 'Sci-Fi']",singleplayer,T,Metroid Prime['gamecube']Samus returns in a ne...
4,Super Mario Odyssey,97.0,89.0,['switch'],New Evolution of Mario Sandbox-Style Gameplay....,https://www.metacritic.com/game/switch/super-m...,Nintendo,"['Action', 'Platformer', '3D']",singleplayer,E10+,Super Mario Odyssey['switch']New Evolution of ...


Now Lets create Recommendation System based on Game's important features

First we'll compute Term Frequency-Inverse Document Frequency (TF-IDF) vectors for each overview.

In [23]:
#Define a TF-IDF Vectorizer Object. Remove all english stop words such as 'the', 'a'
tfidf = TfidfVectorizer(stop_words='english')

#Construct the required TF-IDF matrix by fitting and transforming the data
tfidf_matrix = tfidf.fit_transform(games_df['important_features'])

#Output the shape of tfidf_matrix
tfidf_matrix.shape

(8831, 34246)

In [37]:
# Compute the cosine similarity matrix
cosine_sim = linear_kernel(tfidf_matrix, tfidf_matrix)

In [25]:
#Construct a reverse map of indices and game names
indices = pd.Series(games_df.index, index=games_df['game_name']).drop_duplicates()

In [38]:
def get_recommendations(game_name, cosine_sim=cosine_sim):
    # Get the index of the game that matches the title
    idx = indices[game_name]

    # Get the pairwsie similarity scores of all games with that game
    sim_scores = list(enumerate(cosine_sim[idx]))

    # Sort the games based on the similarity scores
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)

    # Get the scores of the 10 most similar games
    sim_scores = sim_scores[1:11]

    # Get the game indices
    game_indices = [i[0] for i in sim_scores]

    # Return the top 10 most similar games
    return games_df['game_name'].iloc[game_indices]

We are done with all coding let's call recommendation engine fucntion to get similar games

In [27]:
get_recommendations('The Legend of Zelda: Breath of the Wild')

157            The Legend of Zelda: A Link Between Worlds
840     The Legend of Zelda: Breath of the Wild - The ...
18                The Legend of Zelda: A Link to the Past
4917    Classic NES Series: Zelda II: The Adventure of...
57                     The Legend of Zelda: Skyward Sword
16                The Legend of Zelda Collector's Edition
4792    Cadence of Hyrule: Crypt of the NecroDancer Fe...
8126            The Legend of Zelda: Twilight Princess HD
471               Classic NES Series: The Legend of Zelda
1222                     Hyrule Warriors: Age of Calamity
Name: game_name, dtype: object

In [35]:
get_recommendations('Far Cry 3')

2808                 Castaway Paradise
4205                 Far Cry Vengeance
3583                    Lost in Blue 3
5579           Far Cry 5: Lost on Mars
5708         Friday the 13th: The Game
5124                       Dead Island
5576           Far Cry 3: Blood Dragon
6344                   Lost: Via Domus
1581    Friday the 13th: Killer Puzzle
8191       The Sims 3: Island Paradise
Name: game_name, dtype: object

In [36]:
get_recommendations('Grand Theft Auto V')

1057                     Grand Theft Auto Online
252       Grand Theft Auto: Liberty City Stories
13                  Grand Theft Auto Double Pack
5824            Grand Theft Auto: Chinatown Wars
177     Grand Theft Auto IV: The Lost and Damned
5822                         Grand Theft Auto IV
6436                                     Manhunt
286          Grand Theft Auto: Vice City Stories
5825               Grand Theft Auto: San Andreas
8476                       Tycoon City: New York
Name: game_name, dtype: object

In [39]:
get_recommendations('Resident Evil')

7281                     Resident Evil HD Remaster
7277         Resident Evil Archives: Resident Evil
7278    Resident Evil Archives: Resident Evil Zero
7276                    Resident Evil 7: biohazard
7284         Resident Evil: Operation Raccoon City
7282                Resident Evil Outbreak File #2
2485                 Resident Evil: Deadly Silence
7272                            Resident Evil 4 HD
7283                         Resident Evil Village
7268                               Resident Evil 2
Name: game_name, dtype: object