In [1]:
import numpy as np
import pandas as pd

import sklearn
from sklearn.neighbors import NearestNeighbors
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import CountVectorizer

In [2]:
games = pd.read_csv('steamID.csv', encoding='unicode_escape', error_bad_lines=False)
games.columns = ['appid','name','release_date','english','developer','publisher','platforms','required_age','categories','genres','steamspy_tags','achievements','positive_ratings','negative_ratings','average_playtime','median_playtime','owners','price']
games.head()

Unnamed: 0,appid,name,release_date,english,developer,publisher,platforms,required_age,categories,genres,steamspy_tags,achievements,positive_ratings,negative_ratings,average_playtime,median_playtime,owners,price
0,0,Counter-Strike,2000-11-01,1,Valve,Valve,windows;mac;linux,0,Multi-player;Online Multi-Player;Local Multi-P...,Action,Action;FPS;Multiplayer,0.0,124534.0,3339.0,17612.0,317,10000000-20000000,7.19
1,1,Team Fortress Classic,1999-04-01,1,Valve,Valve,windows;mac;linux,0,Multi-player;Online Multi-Player;Local Multi-P...,Action,Action;FPS;Multiplayer,0.0,3318.0,633.0,277.0,62,5000000-10000000,3.99
2,2,Day of Defeat,2003-05-01,1,Valve,Valve,windows;mac;linux,0,Multi-player;Valve Anti-Cheat enabled,Action,FPS;World War II;Multiplayer,0.0,3416.0,398.0,187.0,34,5000000-10000000,3.99
3,3,Deathmatch Classic,2001-06-01,1,Valve,Valve,windows;mac;linux,0,Multi-player;Online Multi-Player;Local Multi-P...,Action,Action;FPS;Multiplayer,0.0,1273.0,267.0,258.0,184,5000000-10000000,3.99
4,4,Half-Life: Opposing Force,1999-11-01,1,Gearbox Software,Valve,windows;mac;linux,0,Single-player;Multi-player;Valve Anti-Cheat en...,Action,FPS;Action;Sci-fi,0.0,5250.0,288.0,624.0,415,5000000-10000000,3.99


In [3]:
#create list of columns
columns = ['name', 'developer','genres']

In [4]:
#Creating a function to combine all data i want in 1 column
def combine_features(data):
    features = []
    for i in range(0, data.shape[0]):
        features.append( str(data['name'][i]) + ' ' + str(data['developer'][i]) + ' ' + str(data['genres'][i]) )
        
    return features

In [5]:
#make new column
games['combined_features'] = combine_features(games)

#show new col

games

Unnamed: 0,appid,name,release_date,english,developer,publisher,platforms,required_age,categories,genres,steamspy_tags,achievements,positive_ratings,negative_ratings,average_playtime,median_playtime,owners,price,combined_features
0,0,Counter-Strike,2000-11-01,1,Valve,Valve,windows;mac;linux,0,Multi-player;Online Multi-Player;Local Multi-P...,Action,Action;FPS;Multiplayer,0.0,124534.0,3339.0,17612.0,317,10000000-20000000,7.19,Counter-Strike Valve Action
1,1,Team Fortress Classic,1999-04-01,1,Valve,Valve,windows;mac;linux,0,Multi-player;Online Multi-Player;Local Multi-P...,Action,Action;FPS;Multiplayer,0.0,3318.0,633.0,277.0,62,5000000-10000000,3.99,Team Fortress Classic Valve Action
2,2,Day of Defeat,2003-05-01,1,Valve,Valve,windows;mac;linux,0,Multi-player;Valve Anti-Cheat enabled,Action,FPS;World War II;Multiplayer,0.0,3416.0,398.0,187.0,34,5000000-10000000,3.99,Day of Defeat Valve Action
3,3,Deathmatch Classic,2001-06-01,1,Valve,Valve,windows;mac;linux,0,Multi-player;Online Multi-Player;Local Multi-P...,Action,Action;FPS;Multiplayer,0.0,1273.0,267.0,258.0,184,5000000-10000000,3.99,Deathmatch Classic Valve Action
4,4,Half-Life: Opposing Force,1999-11-01,1,Gearbox Software,Valve,windows;mac;linux,0,Single-player;Multi-player;Valve Anti-Cheat en...,Action,FPS;Action;Sci-fi,0.0,5250.0,288.0,624.0,415,5000000-10000000,3.99,Half-Life: Opposing Force Gearbox Software Action
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
27071,27070,Room of Pandora,2019-04-24,1,SHEN JIAWEI,SHEN JIAWEI,windows,0,Single-player;Steam Achievements,Adventure;Casual;Indie,Adventure;Indie;Casual,7.0,3.0,0.0,0.0,0,0-20000,2.09,Room of Pandora SHEN JIAWEI Adventure;Casual;I...
27072,27071,Cyber Gun,2019-04-23,1,Semyon Maximov,BekkerDev Studio,windows,0,Single-player,Action;Adventure;Indie,Action;Indie;Adventure,0.0,8.0,1.0,0.0,0,0-20000,1.69,Cyber Gun Semyon Maximov Action;Adventure;Indie
27073,27072,Super Star Blast,2019-04-24,1,EntwicklerX,EntwicklerX,windows,0,Single-player;Multi-player;Co-op;Shared/Split ...,Action;Casual;Indie,Action;Indie;Casual,24.0,0.0,1.0,0.0,0,0-20000,3.99,Super Star Blast EntwicklerX Action;Casual;Indie
27074,27073,New Yankee 7: Deer Hunters,2019-04-17,1,Yustas Game Studio,Alawar Entertainment,windows;mac,0,Single-player;Steam Cloud,Adventure;Casual;Indie,Indie;Casual;Adventure,0.0,2.0,0.0,0.0,0,0-20000,5.19,New Yankee 7: Deer Hunters Yustas Game Studio ...


In [6]:
#convert text in new col to matrix wordcount
cm = CountVectorizer().fit_transform(games['combined_features'])
cm = cm.astype(np.float32)

In [7]:
#Get cosine similarity matrix from the count matrix
cs = cosine_similarity(cm)
#test scores
#print(cs)

In [8]:
Name = games['name'][0]

Name

#Name = 'Counter-Strike'

'Counter-Strike'

In [9]:
game_id = int(games[games.name == Name]['appid'].values[0])
#show the game id
game_id

0

In [10]:
#create list of tuples
scores = list(enumerate(cs[game_id]))
print(scores)

[(0, 1.0), (1, 0.4472136), (2, 0.4472136), (3, 0.5), (4, 0.18898223), (5, 0.57735026), (6, 0.5), (7, 0.8164966), (8, 0.18898223), (9, 0.5), (10, 0.8944272), (11, 0.4472136), (12, 0.4082483), (13, 0.4472136), (14, 0.4082483), (15, 0.4082483), (16, 0.4082483), (17, 0.57735026), (18, 0.4082483), (19, 0.37796447), (20, 0.5), (21, 0.5), (22, 0.37796447), (23, 0.5), (24, 0.5), (25, 0.57735026), (26, 0.0), (27, 0.17677669), (28, 0.2236068), (29, 0.20412415), (30, 0.0), (31, 0.0), (32, 0.0), (33, 0.0), (34, 0.0), (35, 0.0), (36, 0.0), (37, 0.0), (38, 0.0), (39, 0.0), (40, 0.0), (41, 0.18898223), (42, 0.0), (43, 0.0), (44, 0.16666667), (45, 0.20412415), (46, 0.20412415), (47, 0.2236068), (48, 0.2236068), (49, 0.2236068), (50, 0.2236068), (51, 0.25), (52, 0.2236068), (53, 0.16666667), (54, 0.16666667), (55, 0.18898223), (56, 0.20412415), (57, 0.16666667), (58, 0.16666667), (59, 0.16666667), (60, 0.20412415), (61, 0.28867513), (62, 0.0), (63, 0.2236068), (64, 0.18898223), (65, 0.28867513), (66, 0

In [11]:
#sort and organise the simularity scores
sorted_scores = sorted(scores, key = lambda x:x[1], reverse= True)

sorted_scores = sorted_scores[1:]
sorted_scores

[(10, 0.8944272),
 (7, 0.8164966),
 (5, 0.57735026),
 (17, 0.57735026),
 (25, 0.57735026),
 (24262, 0.53033006),
 (3, 0.5),
 (6, 0.5),
 (9, 0.5),
 (20, 0.5),
 (21, 0.5),
 (23, 0.5),
 (24, 0.5),
 (23748, 0.5),
 (14618, 0.47434163),
 (1, 0.4472136),
 (2, 0.4472136),
 (11, 0.4472136),
 (13, 0.4472136),
 (2502, 0.4472136),
 (23438, 0.41602516),
 (12, 0.4082483),
 (14, 0.4082483),
 (15, 0.4082483),
 (16, 0.4082483),
 (18, 0.4082483),
 (1880, 0.4082483),
 (2263, 0.4082483),
 (4952, 0.4082483),
 (9065, 0.4082483),
 (14483, 0.4082483),
 (16152, 0.4082483),
 (18240, 0.4082483),
 (19, 0.37796447),
 (22, 0.37796447),
 (4915, 0.37796447),
 (5190, 0.37796447),
 (9238, 0.37796447),
 (10202, 0.37796447),
 (15983, 0.37796447),
 (16741, 0.37796447),
 (2201, 0.35355338),
 (2788, 0.35355338),
 (3932, 0.35355338),
 (5809, 0.35355338),
 (6385, 0.35355338),
 (7721, 0.35355338),
 (8060, 0.35355338),
 (14347, 0.35355338),
 (16700, 0.35355338),
 (16911, 0.35355338),
 (17534, 0.35355338),
 (20498, 0.35355338),


In [13]:
#create loop to suggest 5 songs
j = 0
print('The 10 most recommended games to ' + Name + ' are:\n')


for item in sorted_scores:
    
    #game_title = games[games.appid == item[0]]['name'].values[0]
    #print(j+1, game_title)
    
    game_title = games.name[item[0]]
    print(j+1, game_title ,' ', sorted_scores[j])
    
    j = j+1
    if j > 9:
        break
        


The 10 most recommended games to Counter-Strike are:

1 Counter-Strike: Source   (10, 0.8944272)
2 Counter-Strike: Condition Zero   (7, 0.8164966)
3 Ricochet   (5, 0.57735026)
4 Portal   (17, 0.57735026)
5 Counter-Strike: Global Offensive   (25, 0.57735026)
6 Conjure Strike   (24262, 0.53033006)
7 Deathmatch Classic   (3, 0.5)
8 Half-Life   (6, 0.5)
9 Half-Life 2   (9, 0.5)
10 Left 4 Dead   (20, 0.5)
