In [1]:
# Importing libraries

import pandas as pd
import numpy as np

In [2]:
# Reading csv file with features - Appid, Name, Categories, Genres, Developers

prime = pd.read_csv("../DATA/steam.csv")

In [3]:
# The database has been cleaned before hand and has no null values

prime.isna().sum()

appid               0
name                0
release_date        0
english             0
developer           0
publisher           0
platforms           0
required_age        0
categories          0
genres              0
steamspy_tags       0
achievements        0
positive_ratings    0
negative_ratings    0
average_playtime    0
median_playtime     0
owners              0
price               0
dtype: int64

In [4]:
# Reading csv file with features - Appid, Tags

tags = pd.read_csv("../DATA/steamspy_tag_data.csv")

In [5]:
# The database has been cleaned before hand and has no null values

tags.isna().sum()

appid           0
1980s           0
1990s           0
2.5d            0
2d              0
               ..
world_war_i     0
world_war_ii    0
wrestling       0
zombies         0
e_sports        0
Length: 372, dtype: int64

In [6]:
# Splitting the data into strings for encoding

cats = prime['categories'].str.split(';')
gens = prime['genres'].str.split(';')
devs = prime['developer'].str.split(';')
pubs = prime['publisher'].str.split(';')

In [7]:
from sklearn.preprocessing import MultiLabelBinarizer
mlb = MultiLabelBinarizer()

In [8]:
# Using MultiLabel Binarizer for One Hot Encoding

cats = pd.DataFrame(mlb.fit_transform(cats)) #Categories
gens = pd.DataFrame(mlb.fit_transform(gens)) #Genres
devs = pd.DataFrame(mlb.fit_transform(devs)) #Devs
pubs = pd.DataFrame(mlb.fit_transform(pubs)) #Publishers

In [9]:
tagsf = tags.drop('appid', axis = 1) #Tags

In [10]:
# Obtaining year from date

year = prime['release_date'].str.split('-').apply(pd.Series)[0]

In [11]:
# Considering games with mean owners > 10,000

own = prime['owners'].str.split('-').apply(pd.Series)
own = own.apply(pd.to_numeric)
own = np.mean(own, axis = 1)
own.columns = ['mean']

In [12]:
st = pd.DataFrame({'name':prime.name, 'own':own})

In [13]:
names = st[st.own > 10000].name

In [14]:
from sklearn.metrics.pairwise import cosine_similarity as cs

In [15]:
# Using cosine similarity to find games with closest features

def Similarity(game1, game2):
    
    ind1 = prime[prime.name == game1].index[0]
    ind2 = prime[prime.name == game2].index[0]
    
    a1 = tagsf.iloc[ind1].values.reshape(1, -1)
    b1 = tagsf.iloc[ind2].values.reshape(1, -1)
    score1 = cs(a1, b1) #Tags
    
    a2 = cats.iloc[ind1].values.reshape(1, -1)
    b2 = cats.iloc[ind2].values.reshape(1, -1)
    score2 = cs(a2, b2) #Categories
    
    a3 = gens.iloc[ind1].values.reshape(1, -1)
    b3 = gens.iloc[ind2].values.reshape(1, -1)
    score3 = cs(a3, b3) #Genres
    
    a4 = devs.iloc[ind1].values.reshape(1, -1)
    b4 = devs.iloc[ind2].values.reshape(1, -1)
    score4 = cs(a4, b4) #Developers
    
    a5 = pubs.iloc[ind1].values.reshape(1, -1)
    b5 = pubs.iloc[ind2].values.reshape(1, -1)
    score5 = cs(a5, b5) #Publishers
    
    # Multiplying each score with a value depending on importance (chosen arbitrarily)
    return (10*score1 + 5*score2 + 7.5*score3 + 5*score4 + 2.5*score5)/30

In [16]:
import operator

In [17]:
# Using cosine similarity to find games with closest features

def predict():
    text = input('Enter a game title : ')
    print("Games that match in database: ")
    game = prime[prime['name'].str.contains(text)]['name']
    print(game.to_string(index = False))
    new = input('Enter the game : ')
    print('Selected game : ', new)
    
    sims = []
    
    for i in names:
        if i != new:
            dist = Similarity(new, i)[0][0]
            sims.append((i, dist))
    
    sims.sort(key=operator.itemgetter(1), reverse = True)
    
    return sims[:10]

In [18]:
predict()

Enter a game title : Counter-Strike
Games that match in database: 
                  Counter-Strike
  Counter-Strike: Condition Zero
          Counter-Strike: Source
Counter-Strike: Global Offensive
   Counter-Strike Nexon: Zombies
Enter the game : Counter-Strike: Global Offensive
Selected game :  Counter-Strike: Global Offensive


[('Team Fortress 2', 0.7868367712364619),
 ('Counter-Strike: Source', 0.778155172500101),
 ('Day of Defeat: Source', 0.7395063367131635),
 ('Counter-Strike: Condition Zero', 0.7248721395324492),
 ('Counter-Strike', 0.7167997167102086),
 ('Day of Defeat', 0.7042400938515111),
 ('Half-Life 2: Deathmatch', 0.6952429304712673),
 ('Left 4 Dead 2', 0.6950004981825599),
 ('Half-Life Deathmatch: Source', 0.6940970613741658),
 ('Team Fortress Classic', 0.6861884852288639)]

In [19]:
predict()

Enter a game title : Grand Theft Auto
Games that match in database: 
                        Grand Theft Auto III
                 Grand Theft Auto: Vice City
               Grand Theft Auto: San Andreas
                            Grand Theft Auto
                          Grand Theft Auto 2
                         Grand Theft Auto IV
Grand Theft Auto: Episodes from Liberty City
                          Grand Theft Auto V
Enter the game : Grand Theft Auto: San Andreas
Selected game :  Grand Theft Auto: San Andreas


[('Grand Theft Auto: Vice City', 0.7357589653104039),
 ('Max Payne 2: The Fall of Max Payne', 0.7333415107812489),
 ('Half-Life: Blue Shift', 0.6847258860472657),
 ('Half-Life: Source', 0.6746247955111944),
 ('Grand Theft Auto III', 0.6632664456366444),
 ('Call of Duty: United Offensive', 0.6481993376850743),
 ("Tom Clancy's Rainbow Six® 3 Gold", 0.6397891462676494),
 ('Ultimate Doom', 0.6392875731650906),
 ('Half-Life 2: Lost Coast', 0.6377978193680611),
 ('Grand Theft Auto', 0.6345243310289337)]

In [20]:
predict()

Enter a game title : Hotline Miami
Games that match in database: 
                Hotline Miami
Hotline Miami 2: Wrong Number
Enter the game : Hotline Miami 2: Wrong Number
Selected game :  Hotline Miami 2: Wrong Number


[('RUINER', 0.6843389701100918),
 ('Disastr_Blastr', 0.659673599738897),
 ('Red Game Without A Great Name', 0.6477818075876812),
 ('Daedalus - No Escape', 0.6443008907726375),
 ('Visitors', 0.6434853460477419),
 ('HitBox', 0.6433748610186377),
 ('Bullshot', 0.6359348493936086),
 ('Hotline Miami', 0.6358444214915816),
 ('Katana ZERO', 0.6348377853324283),
 ('Ellipsis', 0.6335969604531034)]

In [21]:
predict()

Enter a game title : Sleeping Dogs
Games that match in database: 
Sleeping Dogs: Definitive Edition
Enter the game : Sleeping Dogs: Definitive Edition
Selected game :  Sleeping Dogs: Definitive Edition


[('Life is Strange: Before the Storm', 0.6058569040484315),
 ('Spirits of Xanadu', 0.6038657522696406),
 ('Resident Evil 6 / Biohazard 6', 0.6031368099953286),
 ('The LEGO® Movie - Videogame', 0.5962080199362043),
 ('LEGO® Jurassic World', 0.5799315809292589),
 ('Mega Man Legacy Collection 2 / ロックマン クラシックス コレクション 2', 0.5780984653467898),
 ('Insecticide Part 1', 0.5774534799994158),
 ('Spaceport Hope', 0.5757674567795924),
 ('NARUTO SHIPPUDEN: Ultimate Ninja STORM 4', 0.5567201119537109),
 ('Shutshimi', 0.5534459170136924)]

In [22]:
predict()

Enter a game title : Batman
Games that match in database: 
                      LEGO® Batman™: The Videogame
    Batman: Arkham Asylum Game of the Year Edition
    Batman: Arkham City - Game of the Year Edition
                            Batman™: Arkham Knight
                           Batman™: Arkham Origins
                  LEGO® Batman™ 2: DC Super Heroes
Batman™: Arkham Origins Blackgate - Deluxe Edition
                    LEGO® Batman™ 3: Beyond Gotham
                      Batman - The Telltale Series
                                Batman™: Arkham VR
    Batman: The Enemy Within - The Telltale Series
Enter the game : Batman: Arkham City - Game of the Year Edition
Selected game :  Batman: Arkham City - Game of the Year Edition


[('Batman: Arkham Asylum Game of the Year Edition', 0.6666666666666665),
 ('Life is Strange: Before the Storm', 0.6349127452820155),
 ('LEGO® Jurassic World', 0.6315894325021372),
 ('Mad Max', 0.6284674208099924),
 ('Disney Epic Mickey 2:  The Power of Two', 0.6258782020493653),
 ('Thief', 0.6087852752813592),
 ('The LEGO® Movie - Videogame', 0.5805690111248356),
 ('LEGO® Star Wars™ - The Complete Saga', 0.578694089242577),
 ('Batman™: Arkham Knight', 0.5783348996486836),
 ('Hitman: Absolution™', 0.577935791788991)]