In [1]:
import pandas as pd
import difflib
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
def load_data():
    user_game_df = pd.read_csv("raw_data/rawg_user_games.csv")
    return user_game_df

In [3]:
def get_ratings(df):
    filter_df = df[df['user_rating']>0]
    ratings_df = filter_df[['user_id','game_id','user_rating']]
    return ratings_df

In [4]:
data = load_data()
rating_df = get_ratings(data)

In [5]:
rating_df

Unnamed: 0,user_id,game_id,user_rating
0,rasha1,366889,1
1,rasha1,33,4
3,rasha1,3727,4
4,rasha1,58084,3
5,rasha1,10533,4
...,...,...,...
104809,Guigsan,857,4
104810,Guigsan,58616,4
104811,Guigsan,28153,5
104812,Guigsan,4166,5


In [7]:
rating_df[rating_df['game_id'] == 366889]

Unnamed: 0,user_id,game_id,user_rating
0,rasha1,366889,1
7552,609,366889,4
17263,1372,366889,5
32165,2709,366889,4
38540,3214,366889,4
42691,3499,366889,5


In [8]:
rating_df.head(20)

Unnamed: 0,user_id,game_id,user_rating
0,rasha1,366889,1
1,rasha1,33,4
3,rasha1,3727,4
4,rasha1,58084,3
5,rasha1,10533,4
6,rasha1,3387,5
7,rasha1,2551,5
8,rasha1,11973,5
9,rasha1,3070,4
10,rasha1,802,4


In [92]:
aa = rating_df[rating_df['user_id'] == '24']
aa.shape

(19, 3)

In [89]:
aa = aa[['game_id', 'user_rating']]
aa

Unnamed: 0,game_id,user_rating
0,366889,1
1,33,4
3,3727,4
4,58084,3
5,10533,4
6,3387,5
7,2551,5
8,11973,5
9,3070,4
10,802,4


In [90]:
aa_dict = aa.to_dict("records")
aa_dict

[{'game_id': 366889, 'user_rating': 1},
 {'game_id': 33, 'user_rating': 4},
 {'game_id': 3727, 'user_rating': 4},
 {'game_id': 58084, 'user_rating': 3},
 {'game_id': 10533, 'user_rating': 4},
 {'game_id': 3387, 'user_rating': 5},
 {'game_id': 2551, 'user_rating': 5},
 {'game_id': 11973, 'user_rating': 5},
 {'game_id': 3070, 'user_rating': 4},
 {'game_id': 802, 'user_rating': 4},
 {'game_id': 5679, 'user_rating': 4},
 {'game_id': 3328, 'user_rating': 5}]

In [14]:
def transform_df(df):
    game_matrix_df = df.pivot(index = 'user_id', columns ='game_id', values = 'user_rating').fillna(0)
    return game_matrix_df

In [15]:
rating_matrix = transform_df(rating_df)

In [86]:
rating_matrix.shape

(2245, 10406)

In [17]:
u_id = rating_matrix.index

In [28]:
u_id.shape

(2245,)

In [19]:
rating_matrix.shape

(2245, 10406)

In [20]:
game_id_matrix = rating_matrix.columns

In [27]:
game_id_matrix.shape

(10406,)

In [25]:
real_sample = rating_matrix.iloc[0].values
real_sample

array([0., 0., 0., ..., 0., 0., 0.])

In [29]:
new_user = [
    { 
        "game_id": 26,
        "user_rating": 4 
    },
    { 
        "game_id": 28,
        "user_rating": 5 
    },
    { 
        "game_id": 256,
        "user_rating": 0 
    }
    ]

In [31]:
X_matrix = pd.DataFrame(game_id_matrix)

In [32]:
X_matrix.shape

(10406, 1)

In [33]:
X_matrix

Unnamed: 0,game_id
0,2
1,7
2,12
3,14
4,15
...,...
10401,522978
10402,527389
10403,537744
10404,544024


In [34]:
X_matrix['ratings'] = 0

In [35]:
X_matrix = X_matrix.set_index('game_id')

In [36]:
X_matrix.shape

(10406, 1)

In [40]:
X_matrix

Unnamed: 0_level_0,ratings
game_id,Unnamed: 1_level_1
2,0
7,0
12,0
14,0
15,0
...,...
522978,0
527389,0
537744,0
544024,0


In [37]:
256 in X_matrix.index

False

In [38]:
new_user[0]["user_rating"]

4

In [39]:
new_user

[{'game_id': 26, 'user_rating': 4},
 {'game_id': 28, 'user_rating': 5},
 {'game_id': 256, 'user_rating': 0}]

In [41]:
for game in aa_dict:
    game_id = game["game_id"]
    ratings = game["user_rating"]
    if game_id in X_matrix.index:
        X_matrix.loc[game_id,'ratings'] = ratings

In [43]:
X_matrix[X_matrix.index == 284]

Unnamed: 0_level_0,ratings
game_id,Unnamed: 1_level_1
284,5


In [49]:
X = X_matrix['ratings'].values

In [50]:
X

array([0, 0, 0, ..., 0, 0, 0])

In [53]:
pd.DataFrame(X).value_counts()

0    10387
4        8
3        6
5        5
dtype: int64

In [None]:
X_matrix.shape

In [None]:
X

## Import preprocessor

In [54]:
import pickle5 as pickle

In [55]:
with open('preproc.pickle', "rb") as input_file:
    proc = pickle.load(input_file)

In [58]:
preproc_matrix = proc.rating_matrix

In [59]:
preproc_matrix

game_id,2,7,12,14,15,20,21,22,24,25,...,514897,516113,516940,517303,520354,522978,527389,537744,544024,548148
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1002,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1007,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
10083,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
10092,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
997,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
998,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9982,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Guigsan,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [None]:
! ls ..

In [60]:
u_id = preproc_matrix.index

In [61]:
u_id

Index(['1', '1002', '1007', '10083', '10092', '101', '10108', '1024', '1034',
       '10433',
       ...
       '970', '9744', '978', '9813', '984', '997', '998', '9982', 'Guigsan',
       'rasha1'],
      dtype='object', name='user_id', length=2245)

In [62]:
import numpy as np
from sklearn.neighbors import NearestNeighbors
samples = preproc_matrix

In [63]:
samples.shape

(2245, 10406)

In [65]:
neigh = NearestNeighbors(n_neighbors=1)
neigh.fit(samples)

NearestNeighbors(n_neighbors=1)

In [66]:
user_collection1 = [{"game_id": 10646, "user_rating": 4},
 {"game_id": 19279, "user_rating": 5},
 {"game_id": 18099, "user_rating": 3},
 {"game_id": 19495, "user_rating": 4},
 {"game_id": 264828, "user_rating": 3},
 {"game_id": 11593, "user_rating": 5},
 {"game_id": 21371, "user_rating": 4},
 {"game_id": 19442, "user_rating": 4},
 {"game_id": 51487, "user_rating": 4},
 {"game_id": 2536, "user_rating": 4},
 {"game_id": 284, "user_rating": 5},
 {"game_id": 3955, "user_rating": 4},
 {"game_id": 59637, "user_rating": 3},
 {"game_id": 19458, "user_rating": 5},
 {"game_id": 2830, "user_rating": 4},
 {"game_id": 1682, "user_rating": 3},
 {"game_id": 4223, "user_rating": 3},
 {"game_id": 39, "user_rating": 3},
 {"game_id": 2454, "user_rating": 5}]

In [67]:
X = proc.get_X_vector(user_collection1)

In [81]:
pd.DataFrame(X).value_counts()

0    10387
4        8
3        6
5        5
dtype: int64

In [76]:
X.reshape(-1, 1).transpose()

(1, 10406)

In [78]:
y = neigh.kneighbors(X.reshape(-1, 1).transpose(), 1, return_distance=False)

In [79]:
y

array([[990]])

In [82]:
y_index = y[0][0]

In [83]:
y_index

990

In [87]:
u_id

Index(['1', '1002', '1007', '10083', '10092', '101', '10108', '1024', '1034',
       '10433',
       ...
       '970', '9744', '978', '9813', '984', '997', '998', '9982', 'Guigsan',
       'rasha1'],
      dtype='object', name='user_id', length=2245)

In [84]:
y_final = u_id[y_index]

In [85]:
y_final

'24'

In [None]:
user_collection2 = [
      {"game_id":546464, "user_rating": 4},
      {"game_id":30933, "user_rating": 4},
      {"game_id":35971, "user_rating": 4}
    ]

In [None]:
X2 = proc.get_X_vector(user_collection2)

In [None]:
y2 = neigh.kneighbors(X2.reshape(-1, 1).transpose(), 1, return_distance=False)

In [None]:
y2_index = y2[0][0]

In [None]:
y2_index

In [None]:
y2_final = u_id[y2_index]
y2_final