In [1]:
import pandas as pd
import numpy as np

In [2]:
df = pd.read_csv("Dataset.csv", lineterminator='\n') 
df.rename( columns={'Unnamed: 0':'Index'}, inplace=True )
df.head()

Unnamed: 0,Index,Name,Link,Summary,Genres,Developer,Img_Link
0,0,Disco Elysium: The Final Cut,https://www.metacritic.com/game/pc/disco-elysi...,Disco Elysium - The Final Cut is the definitiv...,"Role-Playing,General,Western-Style",ZA/UM,https://static.metacritic.com/images/products/...
1,1,Half-Life 2,https://www.metacritic.com/game/pc/half-life-2,[Metacritic's 2004 PC Game of the Year] By ta...,"Action,Shooter,Shooter,First-Person,Sci-Fi,Sci...",ValveSoftware,https://static.metacritic.com/images/products/...
2,2,Grand Theft Auto V,https://www.metacritic.com/game/pc/grand-theft...,Los Santos: a sprawling sun-soaked metropolis ...,"Modern,ActionAdventure,Open-World",RockstarNorth,https://static.metacritic.com/images/products/...
3,3,Out of the Park Baseball 2007,https://www.metacritic.com/game/pc/out-of-the-...,[Metacritic's 2007 PC Game of the Year] OOTP ...,"Sports,Traditional,Team,Baseball,Management,Ma...",SportsInteractive,https://static.metacritic.com/images/products/...
4,4,The Orange Box,https://www.metacritic.com/game/pc/the-orange-box,Games included in The Orange Box compilation: ...,"Action,Miscellaneous,Shooter,Compilation,First...",ValveSoftware,https://static.metacritic.com/images/products/...


In [3]:
df.describe()

Unnamed: 0,Index
count,7122.0
mean,3560.5
std,2056.088641
min,0.0
25%,1780.25
50%,3560.5
75%,5340.75
max,7121.0


In [4]:
df.isnull().sum()

Index         0
Name          0
Link          0
Summary      79
Genres        0
Developer    19
Img_Link      0
dtype: int64

In [5]:
df["Developer"].fillna("N/A", inplace=True)
df.isnull().sum()

Index         0
Name          0
Link          0
Summary      79
Genres        0
Developer     0
Img_Link      0
dtype: int64

In [6]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7122 entries, 0 to 7121
Data columns (total 7 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   Index      7122 non-null   int64 
 1   Name       7122 non-null   object
 2   Link       7122 non-null   object
 3   Summary    7043 non-null   object
 4   Genres     7122 non-null   object
 5   Developer  7122 non-null   object
 6   Img_Link   7122 non-null   object
dtypes: int64(1), object(6)
memory usage: 389.6+ KB


In [7]:
df['Summary'].head()

0    Disco Elysium - The Final Cut is the definitiv...
1    [Metacritic's 2004 PC Game of the Year]  By ta...
2    Los Santos: a sprawling sun-soaked metropolis ...
3    [Metacritic's 2007 PC Game of the Year]  OOTP ...
4    Games included in The Orange Box compilation: ...
Name: Summary, dtype: object

In [8]:
df['tags'] = df["Summary"] + df["Genres"] + df["Developer"]

In [9]:
from sklearn.feature_extraction.text import TfidfVectorizer
tfidf = TfidfVectorizer(stop_words='english')
df['tags'] = df['tags'].fillna('')
tfidf_matrix = tfidf.fit_transform(df['tags'])
tfidf_matrix.shape

(7122, 33634)

In [10]:
tfidf.get_feature_names()[6000:6010]



['charlie',
 'charlotte',
 'charm',
 'charmaine',
 'charmed',
 'charming',
 'charms',
 'charnel',
 'charr',
 'chart']

In [11]:
from sklearn.metrics.pairwise import linear_kernel

cosine_sim = linear_kernel(tfidf_matrix, tfidf_matrix)

In [12]:
cosine_sim.shape

(7122, 7122)

In [13]:
indices = pd.Series(df.index, index=df['Name']).drop_duplicates()

In [14]:
indices[:10]

Name
Disco Elysium: The Final Cut                      0
Half-Life 2                                       1
Grand Theft Auto V                                2
Out of the Park Baseball 2007                     3
The Orange Box                                    4
Half-Life                                         5
BioShock                                          6
Baldur's Gate II: Shadows of Amn                  7
Divinity: Original Sin II - Definitive Edition    8
Portal 2                                          9
dtype: int64

In [15]:
def get_rec(name, cosine_sim=cosine_sim):
    idx = indices[name]
    sim_scores = list(enumerate(cosine_sim[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    sim_scores = sim_scores[1:21]
    game_indices = [i[0] for i in sim_scores]
    return df['Name'].iloc[game_indices]

In [16]:
get_rec("The Witcher 3: Wild Hunt")

625              Thronebreaker: The Witcher Tales
375                           Monster Hunter Rise
275                         Monster Hunter: World
396                 The Witcher: Enhanced Edition
1161      Monster Hunter Stories 2: Wings of Ruin
4762                   The Witcher Adventure Game
364              Monster Hunter: World - Iceborne
293             The Witcher 2: Assassins of Kings
2640                              Aegis Defenders
2490               Borderlands 3: Bounty of Blood
59      The Witcher 3: Wild Hunt - Blood and Wine
401                 Monster Hunter Rise: Sunbreak
1329                               Hunt: Showdown
354                        Divinity: Original Sin
668           Horizon Zero Dawn: Complete Edition
27                      Divinity: Original Sin II
2592                                 Eldest Souls
6710                             Archangel (2002)
5916      Hindsight 20/20: Wrath of the Raakshasa
3664                               Book of Demons


In [17]:
get_rec("Need for Speed: Underground")

5639                                 Need for Speed World
786                                    Shift 2: Unleashed
1194                   Need for Speed: Most Wanted (2005)
4171                            Need for Speed: ProStreet
2727                               Need for Speed: Rivals
5753                          Juiced 2: Hot Import Nights
6745                        Ford Bold Moves Street Racing
4659                                       World Racing 2
1990       Need for Speed: Most Wanted - A Criterion Game
4405                              Need for Speed: The Run
1424    TOCA Race Driver 2: The Ultimate Racing Simulator
1111                        Need for Speed: Underground 2
5829                                           Test Drive
6651                                          Ford Racing
1825          Street Fighter: 30th Anniversary Collection
939                           RACE 07: Official WTCC Game
870                                 Need for Speed: Shift
311           

In [18]:
get_rec("Overwatch")

1214                    Overwatch 2
5754                  Breach (2011)
6778         Special Forces: Team X
7023                      The Flock
2410           Section 8: Prejudice
7059                New World Order
2759               ShootMania Storm
7057                         Rekoil
7078                 Sniper Rust VR
4045                    Combat Arms
5344                        Ravaged
897                        Paladins
6545                          Purge
6731                  Ace of Spades
6670               Empire Earth III
4890            The Showdown Effect
3301      Delta Force: Land Warrior
1101            Monday Night Combat
1409                  Battlefield V
7044    Shadow Harvest: Phantom Ops
Name: Name, dtype: object

In [20]:
get_rec("Firewatch")

1746       Fallout 3: Point Lookout
1673                        Proteus
6590             Escape Dead Island
4676                        Pathway
1929                   Don't Starve
5134    The Magnificent Trufflepigs
5471                 Summer in Mara
5742                       Betrayer
5826              Ryse: Son of Rome
6262                Paradise (2006)
6774                   Unknown Fate
4352                The Red Lantern
2808                      Outlast 2
5810                Summer Catchers
3049                       Event[0]
2283                  Tower of Time
6718                 We Are Chicago
2149              Song in the Smoke
6114            Secrets of Raetikon
6695               Railroad Pioneer
Name: Name, dtype: object

In [19]:
get_rec("Grand Theft Auto V")

2310                    Lords of the Realm II
2945                                  Manhunt
2686                                  Neo Cab
39              Grand Theft Auto: San Andreas
6101                APB (All Points Bulletin)
6463                        INSOMNIA: The Ark
3542                      Hitman: Codename 47
6023                            Ignite (2011)
4926                    Ministry of Broadcast
4928                    Tycoon City: New York
1043    Gemini Rue: Verschworung auf Barracus
5481                         Just Die Already
514                   Dragon Age: Inquisition
3820                      Need for Speed Heat
534                  Aliens Versus Predator 2
5641                  Hood: Outlaws & Legends
5753              Juiced 2: Hot Import Nights
1074              Need for Speed: Underground
6658                                 Postal 2
3858                                 The Crew
Name: Name, dtype: object