# imports

In [1]:
# imports
%matplotlib inline
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats
from ast import literal_eval
from sklearn.model_selection import KFold, train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer
from sklearn.metrics.pairwise import linear_kernel, cosine_similarity
from nltk.stem.snowball import SnowballStemmer
from nltk.stem.wordnet import WordNetLemmatizer
from nltk.corpus import wordnet
from surprise import Reader, Dataset, SVD # for collabrotative filtering and matrix factorization
from surprise.model_selection import cross_validate
import string
import re
from nltk.corpus import stopwords

import warnings; warnings.simplefilter('ignore')
pd.set_option('display.max_columns', None)

In [2]:
df = pd. read_csv('Steam Dataset.csv')
df.head()

Unnamed: 0,Positive,Negative,Game_id,Game_name,Positive_Rate,links,Game_description
0,285421,3487,620,Portal 2,0.98793,https://store.steampowered.com/app/620,"<div class=""game_area_description"" id=""game_ar..."
1,102805,1640,400,Portal,0.984298,https://store.steampowered.com/app/400,"<div class=""game_area_description"" id=""game_ar..."
2,439665,8619,413150,Stardew Valley,0.980773,https://store.steampowered.com/app/413150,"<div class=""game_area_description"" id=""game_ar..."
3,476762,9701,431960,Wallpaper Engine,0.980058,https://store.steampowered.com/app/431960,"<div class=""game_area_description"" id=""game_ar..."
4,910629,19342,105600,Terraria,0.979202,https://store.steampowered.com/app/105600,"<div class=""game_area_description"" id=""game_ar..."


In [8]:
df['Game_description'][5]

"game play world's number 1 online action game. engage incredibly realistic brand terrorist warfare wildly popular team-based game. ally teammates complete strategic missions. take enemy sites. rescue hostages. role affects team's success. team's success affects role."

# Stopwords

In [2]:
stop = set(stopwords.words('english'))
punctuation = list(string.punctuation)
stop.update(punctuation)

# Preprocessing


In [6]:
def remove_stopwords(text):
    final_text = []
    for i in text.split():
        if i.strip().lower() not in stop:
            final_text.append(i.strip())
    return " ".join(final_text)

def processPost(text:str):
    text = re.sub('((www\.[^\s]+)|(https?://[^\s]+))',' ',text)
    text = re.sub('\\s+', ' ', text)
    text = re.sub('<.*?>',' ', text)
    text= text.lower()
    text= remove_stopwords(text)
    return text
df['Game_description'] = df['Game_description'].apply(processPost)

In [9]:
df['Game_description'][5]

"game play world's number 1 online action game. engage incredibly realistic brand terrorist warfare wildly popular team-based game. ally teammates complete strategic missions. take enemy sites. rescue hostages. role affects team's success. team's success affects role."

In [10]:
df

Unnamed: 0,Positive,Negative,Game_id,Game_name,Positive_Rate,links,Game_description
0,285421,3487,620,Portal 2,0.987930,https://store.steampowered.com/app/620,game portal 2 draws award-winning formula inno...
1,102805,1640,400,Portal,0.984298,https://store.steampowered.com/app/400,game portal™ new single player game valve. set...
2,439665,8619,413150,Stardew Valley,0.980773,https://store.steampowered.com/app/413150,game stardew valley open-ended country-life rp...
3,476762,9701,431960,Wallpaper Engine,0.980058,https://store.steampowered.com/app/431960,software wallpaper engine enables use live wal...
4,910629,19342,105600,Terraria,0.979202,https://store.steampowered.com/app/105600,"game dig, fight, explore, build: world fingert..."
...,...,...,...,...,...,...,...
95,1108250,875814,578080,PUBG: BATTLEGROUNDS,0.558576,https://store.steampowered.com/app/578080,"game land, loot, survive! play pubg: battlegro..."
96,115650,93024,433850,Z1 Battle Royale,0.554214,https://store.steampowered.com/app/433850,"game z1 battle royale free play, fast-paced, a..."
97,1315,1176,439700,Z1 Battle Royale: Test Server,0.527900,https://store.steampowered.com/app/439700,"game test server z1 battle royale free play, f..."
98,30501,29952,1089350,NBA 2K20,0.504541,https://store.steampowered.com/app/1089350,game note: multiplayer servers nba 2k20 shutdo...


# Tf-idf

In [12]:
tf = TfidfVectorizer(analyzer='word',ngram_range=(1, 2),min_df=0, stop_words='english')
tfidf_matrix = tf.fit_transform(df['Game_description'])

In [13]:
tfidf_matrix.shape

(100, 17449)

# Cosine Similarity

In [14]:
cosine_sim = linear_kernel(tfidf_matrix, tfidf_matrix)

In [15]:
print(cosine_sim.shape)
print(cosine_sim[0])

(100, 100)
[1.         0.11926131 0.03270857 0.01515316 0.02097364 0.02882757
 0.01162177 0.05256713 0.05242094 0.01769296 0.0246597  0.06152613
 0.01531197 0.00898642 0.0216172  0.03370947 0.02356805 0.01538263
 0.02061917 0.05367107 0.04740144 0.01053809 0.02692721 0.02176376
 0.01389294 0.01369197 0.04568278 0.01862118 0.01867454 0.01596634
 0.0086455  0.07947391 0.03524796 0.01245734 0.00992491 0.04309343
 0.02870796 0.00243108 0.06675055 0.01801148 0.02570804 0.04427376
 0.01591303 0.01492361 0.03136281 0.02145427 0.00653059 0.02380225
 0.02900405 0.01929094 0.02611634 0.01270843 0.01577796 0.00332088
 0.02278299 0.01541152 0.02863519 0.0061923  0.01781712 0.03442806
 0.02861987 0.02915476 0.04982444 0.02169729 0.02139933 0.01932556
 0.07750882 0.01395743 0.00734137 0.01853358 0.01506764 0.01394257
 0.01244654 0.02841873 0.01604578 0.02356259 0.00664962 0.07750882
 0.0108122  0.01778489 0.01723673 0.02415425 0.02509614 0.02259877
 0.01701886 0.01515388 0.01045817 0.01680468 0.0135

# Cosine similarity score

In [19]:
smd = df.reset_index()
titles = df['Game_name']
indices = pd.Series(df.index, index=df['Game_name'])

In [25]:
def get_recommendations(Game_name):
    idx = indices[Game_name]
    sim_scores = list(enumerate(cosine_sim[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    sim_scores = sim_scores[1:31] # you can change these numbers to retrieve more or less trhan 30 recommendations
    Gameindices = [i[0] for i in sim_scores]
    return titles.iloc[Gameindices]

In [32]:
get_recommendations('Terraria').head(10)

87                  Lost Ark
47             7 Days to Die
18                   Valheim
73                 Fallout 4
2             Stardew Valley
89                 New World
72                     Trove
29          Cities: Skylines
35    A Story About My Uncle
56              Apex Legends
Name: Game_name, dtype: object

In [33]:
get_recommendations('Portal').head(10)

0                           Portal 2
66      Fall Guys: Ultimate Knockout
77                NARAKA: BLADEPOINT
83      Half-Life Deathmatch: Source
7                        Half-Life 2
8                      Left 4 Dead 2
24                  Human: Fall Flat
31    Call of Duty: Modern Warfare 2
20                       Dying Light
99                  Battlefield 2042
Name: Game_name, dtype: object