In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [6]:
df = pd.read_csv(r'steam_games.csv')
df.head()

Unnamed: 0,url,types,name,desc_snippet,recent_reviews,all_reviews,release_date,developer,publisher,popular_tags,game_details,languages,achievements,genre,game_description,mature_content,minimum_requirements,recommended_requirements,original_price,discount_price
0,https://store.steampowered.com/app/379720/DOOM/,app,DOOM,Now includes all three premium DLC packs (Unto...,"Very Positive,(554),- 89% of the 554 user revi...","Very Positive,(42,550),- 92% of the 42,550 use...","May 12, 2016",id Software,"Bethesda Softworks,Bethesda Softworks","FPS,Gore,Action,Demons,Shooter,First-Person,Gr...","Single-player,Multi-player,Co-op,Steam Achieve...","English,French,Italian,German,Spanish - Spain,...",54.0,Action,"About This Game Developed by id software, the...",,"Minimum:,OS:,Windows 7/8.1/10 (64-bit versions...","Recommended:,OS:,Windows 7/8.1/10 (64-bit vers...",$19.99,$14.99
1,https://store.steampowered.com/app/578080/PLAY...,app,PLAYERUNKNOWN'S BATTLEGROUNDS,PLAYERUNKNOWN'S BATTLEGROUNDS is a battle roya...,"Mixed,(6,214),- 49% of the 6,214 user reviews ...","Mixed,(836,608),- 49% of the 836,608 user revi...","Dec 21, 2017",PUBG Corporation,"PUBG Corporation,PUBG Corporation","Survival,Shooter,Multiplayer,Battle Royale,PvP...","Multi-player,Online Multi-Player,Stats","English,Korean,Simplified Chinese,French,Germa...",37.0,"Action,Adventure,Massively Multiplayer",About This Game PLAYERUNKNOWN'S BATTLEGROUND...,Mature Content Description The developers de...,"Minimum:,Requires a 64-bit processor and opera...","Recommended:,Requires a 64-bit processor and o...",$29.99,
2,https://store.steampowered.com/app/637090/BATT...,app,BATTLETECH,Take command of your own mercenary outfit of '...,"Mixed,(166),- 54% of the 166 user reviews in t...","Mostly Positive,(7,030),- 71% of the 7,030 use...","Apr 24, 2018",Harebrained Schemes,"Paradox Interactive,Paradox Interactive","Mechs,Strategy,Turn-Based,Turn-Based Tactics,S...","Single-player,Multi-player,Online Multi-Player...","English,French,German,Russian",128.0,"Action,Adventure,Strategy",About This Game From original BATTLETECH/Mec...,,"Minimum:,Requires a 64-bit processor and opera...","Recommended:,Requires a 64-bit processor and o...",$39.99,
3,https://store.steampowered.com/app/221100/DayZ/,app,DayZ,The post-soviet country of Chernarus is struck...,"Mixed,(932),- 57% of the 932 user reviews in t...","Mixed,(167,115),- 61% of the 167,115 user revi...","Dec 13, 2018",Bohemia Interactive,"Bohemia Interactive,Bohemia Interactive","Survival,Zombies,Open World,Multiplayer,PvP,Ma...","Multi-player,Online Multi-Player,Steam Worksho...","English,French,Italian,German,Spanish - Spain,...",,"Action,Adventure,Massively Multiplayer",About This Game The post-soviet country of Ch...,,"Minimum:,OS:,Windows 7/8.1 64-bit,Processor:,I...","Recommended:,OS:,Windows 10 64-bit,Processor:,...",$44.99,
4,https://store.steampowered.com/app/8500/EVE_On...,app,EVE Online,EVE Online is a community-driven spaceship MMO...,"Mixed,(287),- 54% of the 287 user reviews in t...","Mostly Positive,(11,481),- 74% of the 11,481 u...","May 6, 2003",CCP,"CCP,CCP","Space,Massively Multiplayer,Sci-fi,Sandbox,MMO...","Multi-player,Online Multi-Player,MMO,Co-op,Onl...","English,German,Russian,French",,"Action,Free to Play,Massively Multiplayer,RPG,...",About This Game,,"Minimum:,OS:,Windows 7,Processor:,Intel Dual C...","Recommended:,OS:,Windows 10,Processor:,Intel i...",Free,


In [7]:
df.columns

Index(['url', 'types', 'name', 'desc_snippet', 'recent_reviews', 'all_reviews',
       'release_date', 'developer', 'publisher', 'popular_tags',
       'game_details', 'languages', 'achievements', 'genre',
       'game_description', 'mature_content', 'minimum_requirements',
       'recommended_requirements', 'original_price', 'discount_price'],
      dtype='object')

In [11]:
#Making dataframe with relevant features
features = ['name','developer','publisher','genre','mature_content']
df = df[features]
df

Unnamed: 0,name,developer,publisher,genre,mature_content
0,DOOM,id Software,"Bethesda Softworks,Bethesda Softworks",Action,
1,PLAYERUNKNOWN'S BATTLEGROUNDS,PUBG Corporation,"PUBG Corporation,PUBG Corporation","Action,Adventure,Massively Multiplayer",Mature Content Description The developers de...
2,BATTLETECH,Harebrained Schemes,"Paradox Interactive,Paradox Interactive","Action,Adventure,Strategy",
3,DayZ,Bohemia Interactive,"Bohemia Interactive,Bohemia Interactive","Action,Adventure,Massively Multiplayer",
4,EVE Online,CCP,"CCP,CCP","Action,Free to Play,Massively Multiplayer,RPG,...",
...,...,...,...,...,...
40828,Rocksmith® 2014 Edition – Remastered – Sabaton...,Ubisoft - San Francisco,,"Casual,Simulation",
40829,Rocksmith® 2014 Edition – Remastered – Stone T...,Ubisoft - San Francisco,,"Casual,Simulation",
40830,Fantasy Grounds - Quests of Doom 4: A Midnight...,"SmiteWorks USA, LLC",,"Indie,RPG,Strategy",
40831,Mega Man X5 Sound Collection,"CAPCOM CO., LTD","CAPCOM CO., LTD,CAPCOM CO., LTD",Action,


In [12]:
df.isnull().sum()

name                 16
developer           343
publisher          5100
genre               438
mature_content    37936
dtype: int64

In [15]:
#drop games with no names
df.dropna(subset=['name'],inplace=True)

In [16]:
df.isnull().sum()

name                  0
developer           327
publisher          5085
genre               423
mature_content    37920
dtype: int64

In [17]:
#Fill empty values with null strings
df.fillna('',inplace=True)
df.isnull().sum()

name              0
developer         0
publisher         0
genre             0
mature_content    0
dtype: int64

In [18]:
#Combine all features
df['combined_features'] = df.apply(lambda x: ' '.join(x.values.astype(str)), axis=1)
df.head()

Unnamed: 0,name,developer,publisher,genre,mature_content,combined_features
0,DOOM,id Software,"Bethesda Softworks,Bethesda Softworks",Action,,"DOOM id Software Bethesda Softworks,Bethesda S..."
1,PLAYERUNKNOWN'S BATTLEGROUNDS,PUBG Corporation,"PUBG Corporation,PUBG Corporation","Action,Adventure,Massively Multiplayer",Mature Content Description The developers de...,PLAYERUNKNOWN'S BATTLEGROUNDS PUBG Corporation...
2,BATTLETECH,Harebrained Schemes,"Paradox Interactive,Paradox Interactive","Action,Adventure,Strategy",,BATTLETECH Harebrained Schemes Paradox Interac...
3,DayZ,Bohemia Interactive,"Bohemia Interactive,Bohemia Interactive","Action,Adventure,Massively Multiplayer",,"DayZ Bohemia Interactive Bohemia Interactive,B..."
4,EVE Online,CCP,"CCP,CCP","Action,Free to Play,Massively Multiplayer,RPG,...",,"EVE Online CCP CCP,CCP Action,Free to Play,Mas..."


In [19]:
#Convert text into matrix of token counts
cv = CountVectorizer()
count_matrix = cv.fit_transform(df['combined_features'])
count_matrix

<40817x39055 sparse matrix of type '<class 'numpy.int64'>'
	with 459635 stored elements in Compressed Sparse Row format>

In [20]:
#Calculate cosine similarity
cosine_sim = cosine_similarity(count_matrix)
cosine_sim

array([[1.        , 0.03077287, 0.07715167, ..., 0.06804138, 0.05025189,
        0.        ],
       [0.03077287, 1.        , 0.05698029, ..., 0.        , 0.01855674,
        0.02585438],
       [0.07715167, 0.05698029, 1.        , ..., 0.06299408, 0.04652421,
        0.06482037],
       ...,
       [0.06804138, 0.        , 0.06299408, ..., 1.        , 0.        ,
        0.0571662 ],
       [0.05025189, 0.01855674, 0.04652421, ..., 0.        , 1.        ,
        0.        ],
       [0.        , 0.02585438, 0.06482037, ..., 0.0571662 , 0.        ,
        1.        ]])

In [21]:
#function that takes in game title as input and returns the top 10 recommended games
def get_recommendations(title, no_of_recommendations):
    #get the index of the game that matches the title
    idx = df[df['name'] == title].index[0]

    #create a series with the similarity scores in descending order
    score_series = pd.Series(cosine_sim[idx]).sort_values(ascending=False)

    #get the indexes of the most similar games
    top_indexes = list(score_series.iloc[1:no_of_recommendations+1].index)

    #return most similar games
    return df.iloc[top_indexes]

In [24]:
get_recommendations('DOOM', 5)

Unnamed: 0,name,developer,publisher,genre,mature_content,combined_features
788,DOOM VFR,id Software,"Bethesda Softworks,Bethesda Softworks",Action,,"DOOM VFR id Software Bethesda Softworks,Bethes..."
839,Doom 3: BFG Edition,id Software,"Bethesda Softworks,Bethesda Softworks",Action,,Doom 3: BFG Edition id Software Bethesda Softw...
1823,RAGE,id Software,"Bethesda Softworks,Bethesda Softworks",Action,,"RAGE id Software Bethesda Softworks,Bethesda S..."
1763,Wolfenstein 3D,id Software,"Bethesda-Softworks,Bethesda-Softworks",Action,,"Wolfenstein 3D id Software Bethesda-Softworks,..."
1303,Quake Live™,id Software,"Bethesda Softworks,Bethesda Softworks",Action,,"Quake Live™ id Software Bethesda Softworks,Bet..."
