In [12]:
import pandas as pd
import numpy as np
from scipy.sparse import csr_matrix
from sklearn.preprocessing import MinMaxScaler
from sklearn.neighbors import NearestNeighbors

In [13]:
# load dataframe and remove doubles
df = pd.read_csv('data/game_data.csv', 
                       usecols=['id','title', 'use','hours'],
                      dtype={'id': 'int32','title': 'str', 'use': 'str', 'hours':'float32'})
df = df[df.use != "purchase"]

In [14]:
# create id for games
games = df['title']
games = games.drop_duplicates()
df_games = pd.DataFrame(games)
df_games.insert(0, 'game_id', range(1, df_games.size + 1))
df_games

Unnamed: 0,game_id,title
1,1,The Elder Scrolls V Skyrim
3,2,Fallout 4
5,3,Spore
7,4,Fallout New Vegas
9,5,Left 4 Dead 2
...,...,...
199584,3596,Space Colony
199662,3597,Life is Hard
199666,3598,Executive Assault
199734,3599,O.R.B.


In [15]:
# normalize data
df_copy = df.copy()
df_copy['hours'] = MinMaxScaler().fit_transform(np.array(df_copy['hours']).reshape(-1,1))
df_copy.head()

Unnamed: 0,id,title,use,hours
1,151603712,The Elder Scrolls V Skyrim,play,0.023218
3,151603712,Fallout 4,play,0.007393
5,151603712,Spore,play,0.001259
7,151603712,Fallout New Vegas,play,0.001021
9,151603712,Left 4 Dead 2,play,0.000749


In [16]:
df_games_tmp = df_games[["game_id","title"]]
df_copy = pd.merge(df_copy, df_games_tmp, left_on="title", right_on="title", how="left")
df_copy

Unnamed: 0,id,title,use,hours,game_id
0,151603712,The Elder Scrolls V Skyrim,play,0.023218,1
1,151603712,Fallout 4,play,0.007393,2
2,151603712,Spore,play,0.001259,3
3,151603712,Fallout New Vegas,play,0.001021,4
4,151603712,Left 4 Dead 2,play,0.000749,5
...,...,...,...,...,...
70484,128470551,Fallen Earth,play,0.000196,1350
70485,128470551,Magic Duels,play,0.000179,1290
70486,128470551,Titan Souls,play,0.000119,1275
70487,128470551,Grand Theft Auto Vice City,play,0.000119,1005


In [17]:
df_game_features = df_copy.pivot_table(index='game_id',
                                   columns='id',
                                   values='hours').fillna(0)
mat_game_features = csr_matrix(df_game_features.values)

In [18]:
model = NearestNeighbors(metric='cosine', algorithm='brute')
model.fit(mat_game_features)

NearestNeighbors(algorithm='brute', metric='cosine')

In [19]:
#query_index = np.random.choice(df_game_features.shape[0])
#query_index = 0

print("what games do you like?")
name = input()

index = df_copy.index[df_copy['title'] == name][0]
ind_col = df_copy['game_id'][index] - 1

query_index = ind_col

distances, indices = model.kneighbors(df_game_features.iloc[query_index, :].values.reshape(1, -1), n_neighbors = 6)



for i in range(0, len(distances.flatten())):
    if i == 0:
        print('Recommendations for {0}:\n'.format(df_copy[df_copy['game_id'] == df_game_features.index[query_index]]['title'].head(1)))
    else:
        print('{0}: {1}, with distance of {2}:'.format(i, df_copy[df_copy['game_id'] == df_game_features.index[indices.flatten()[i]]]['title'].head(1) , distances.flatten()[i]))

what games do you like?


 Grand Theft Auto Vice City


Recommendations for 2618    Grand Theft Auto Vice City
Name: title, dtype: object:

1: 1147    Grand Theft Auto San Andreas
Name: title, dtype: object, with distance of 0.6232748031616211:
2: 14561    Cossacks II Napoleonic Wars
Name: title, dtype: object, with distance of 0.7205737829208374:
3: 2616    Grand Theft Auto III
Name: title, dtype: object, with distance of 0.8225887417793274:
4: 296    Max Payne 2 The Fall of Max Payne
Name: title, dtype: object, with distance of 0.8384779691696167:
5: 12682    Final DOOM
Name: title, dtype: object, with distance of 0.8556896448135376:
