In [1]:
import pandas as pd
import numpy as np
import scipy.sparse as sparse
import operator

In [2]:
data_recommender = pd.read_csv("../data/steam-200k.csv",
                               header=None,
                               names = ['user_id','game_id', 'type', 'purchased', 'idk'])

data_recommender = data_recommender[data_recommender['type'] == "purchase"]

data_recommender.drop(labels=['idk', 'type'], inplace=True, axis=1)

In [3]:
data_recommender.head(5)

Unnamed: 0,user_id,game_id,purchased
0,151603712,The Elder Scrolls V Skyrim,1.0
2,151603712,Fallout 4,1.0
4,151603712,Spore,1.0
6,151603712,Fallout New Vegas,1.0
8,151603712,Left 4 Dead 2,1.0


In [4]:
unique_games = list(data_recommender['game_id'].unique())
unique_users = list(data_recommender['user_id'].unique())

In [5]:
print("Users: ", len(unique_users), "Games: ", len(unique_games))

Users:  12393 Games:  5155


In [6]:
users_index = {user:index for index, user in enumerate(unique_users)}
games_index = {game:index for index, game in enumerate(unique_games)}
reverse_games_index = {index:game for index, game in enumerate(unique_games)}

In [7]:
data_recommender['user_id'] = data_recommender['user_id'].apply(lambda user: users_index[user])
data_recommender['game_id'] = data_recommender['game_id'].apply(lambda game: games_index[game])

In [8]:
data_recommender.head()

Unnamed: 0,user_id,game_id,purchased
0,0,0,1.0
2,0,1,1.0
4,0,2,1.0
6,0,3,1.0
8,0,4,1.0


In [9]:
import implicit

In [10]:
sparse_item_user = sparse.csr_matrix((data_recommender['purchased'].astype(float),
                                      (data_recommender['game_id'],
                                       data_recommender['user_id'])))

In [11]:
model = implicit.als.AlternatingLeastSquares(factors=30,iterations=20)
model.fit(sparse_item_user)



HBox(children=(IntProgress(value=0, max=20), HTML(value='')))




In [12]:
user_items = sparse_item_user.T.tocsr()

In [13]:
def find_similar(game_name):
    game_id = games_index[game_name]
    similar_items = model.similar_items(itemid=game_id, N=10)
    named_items = list(map(lambda item: (reverse_games_index[item[0]], item[1]), similar_items))
    for item in named_items:
        print("Game:", item[0], "similarity:", item[1])

In [14]:
find_similar("The Elder Scrolls V Skyrim")

Game: The Elder Scrolls V Skyrim similarity: 0.8172805
Game: The Elder Scrolls V Skyrim - Hearthfire similarity: 0.79042995
Game: The Elder Scrolls V Skyrim - Dawnguard similarity: 0.7880612
Game: The Elder Scrolls V Skyrim - Dragonborn similarity: 0.78765875
Game: Skyrim High Resolution Texture Pack similarity: 0.7381079
Game: Skyrim Script Extender (SKSE) similarity: 0.6412391
Game: RPG Maker MV similarity: 0.63550746
Game: RPG Maker MV Cover Art Characters Pack similarity: 0.63550746
Game: RPG Maker MV Essentials Add-On similarity: 0.6355074
Game: Mind Snares Alice's Journey similarity: 0.58780074


In [15]:
users_latent = model.user_factors
items_latent = model.item_factors

In [16]:
games_list = {game[0]:items_latent[game[1]] for game in games_index.items()}
user_list = {user[0]:users_latent[user[1]] for user in users_index.items()}

In [84]:
games_list

{'The Elder Scrolls V Skyrim': array([ 0.10123789, -0.12386379,  0.03026488, -0.1291546 , -0.01814792,
        -0.14741747,  0.31552407, -0.16554683, -0.09263043,  0.26319835,
         0.0759095 , -0.00630583,  0.03303558,  0.13791938,  0.10652603,
         0.42741308, -0.06048607, -0.16107737,  0.16401868,  0.04510145,
        -0.04772431, -0.02106058,  0.11481655,  0.18756506, -0.04553388,
         0.08417933,  0.13242187,  0.12988311, -0.08829109,  0.0227983 ],
       dtype=float32),
 'Fallout 4': array([-0.0034915 ,  0.03923044,  0.01381542,  0.02605645,  0.03037376,
         0.05000309,  0.10435114, -0.01716608,  0.03098231,  0.02084804,
        -0.01295871, -0.00389987,  0.01985668,  0.01269321,  0.00487309,
         0.09148005,  0.01319617, -0.02516556,  0.10484453,  0.03705217,
         0.06591884,  0.01580344, -0.02190168,  0.05008338,  0.00775857,
        -0.01509525,  0.05262512,  0.00266028, -0.00843693, -0.02975995],
       dtype=float32),
 'Spore': array([ 0.00641723,  0.

In [85]:
def find_similar_game(game_name, games):
    game_vector = games[game_name]
    distances = list(map(lambda game: (game[0],np.linalg.norm(game_vector-game[1])), games.items()))
    sorted_distances = sorted(distances, key=operator.itemgetter(1), reverse=False)
    for item in sorted_distances[0:10]:
        print("Game:", item[0], "similarity:", item[1])

In [86]:
find_similar_game("Half-Life", games_list)

Game: Half-Life similarity: 0.0
Game: Half-Life Blue Shift similarity: 0.03944826
Game: Half-Life Opposing Force similarity: 0.0406978
Game: Team Fortress Classic similarity: 0.046254657
Game: Deathmatch Classic similarity: 0.46659175
Game: Ricochet similarity: 0.46659175
Game: Day of Defeat similarity: 0.46953994
Game: Half-Life Source similarity: 0.5163845
Game: Half-Life Deathmatch Source similarity: 0.5678053
Game: Left 4 Dead similarity: 0.60427004


In [21]:
def recommend_to_user(user, user_list, games_list):
    pass