# Import Library

In [1]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.decomposition import TruncatedSVD, NMF
from sklearn.preprocessing import MinMaxScaler

# Get Data

In [2]:
def load_data():
    user_game_df = pd.read_csv("../raw_data/rawg_user_games.csv")
    user_game_df['purchase'] = 1
    return user_game_df

In [3]:
data = load_data()
data

Unnamed: 0.1,Unnamed: 0,user_id,game_id,game_slug,game_name,user_rating,metacritic,rawg_rating,released,play_time,purchase
0,0,1,446900,high-rise-a-puzzle-cityscape,High Rise - A Puzzle Cityscape,4,,0.00,2020-05-16,0,1
1,1,1,264830,twinfold,Twinfold,5,,0.00,2018-12-05,0,1
2,2,1,58443,pocket-run-pool,Pocket-Run Pool,4,,0.00,2018-05-24,0,1
3,3,1,51288,cinco-paus,Cinco Paus,0,,0.00,2017-12-25,0,1
4,4,1,21889,desert-fox-the-battle-of-el-alamein,Desert Fox: The Battle of El Alamein,4,,0.00,2014-06-26,0,1
...,...,...,...,...,...,...,...,...,...,...,...
255361,255361,999,4062,bioshock-infinite,BioShock Infinite,0,94.0,4.39,2013-03-26,12,1
255362,255362,999,802,borderlands-2,Borderlands 2,0,89.0,4.05,2012-09-18,10,1
255363,255363,999,5679,the-elder-scrolls-v-skyrim,The Elder Scrolls V: Skyrim,0,94.0,4.42,2011-11-11,45,1
255364,255364,999,3328,the-witcher-3-wild-hunt,The Witcher 3: Wild Hunt,0,92.0,4.67,2015-05-18,50,1


In [4]:
data[['user_rating']].value_counts()

user_rating
0              230988
4                9723
3                6435
5                5798
1                2422
dtype: int64

# Rating filtering Matrix

In [5]:
R = data[['user_id', 'game_name', 'purchase']]
R

Unnamed: 0,user_id,game_name,purchase
0,1,High Rise - A Puzzle Cityscape,1
1,1,Twinfold,1
2,1,Pocket-Run Pool,1
3,1,Cinco Paus,1
4,1,Desert Fox: The Battle of El Alamein,1
...,...,...,...
255361,999,BioShock Infinite,1
255362,999,Borderlands 2,1
255363,999,The Elder Scrolls V: Skyrim,1
255364,999,The Witcher 3: Wild Hunt,1


In [6]:
R_train, R_test = train_test_split(R, test_size=0.01, random_state=37)

In [7]:
R_train

Unnamed: 0,user_id,game_name,purchase
5613,29,Too Human,1
251908,983,Endless Space,1
116195,450,PAYDAY 2,1
18624,105,Batman: Arkham City - Game of the Year Edition,1
74432,283,Tomb Raider IV: The Last Revelation,1
...,...,...,...
197596,722,Anoxemia,1
162043,590,Zen Pinball 2,1
205644,750,Alpha Prime,1
20843,112,Pid,1


In [8]:
R_test

Unnamed: 0,user_id,game_name,purchase
131748,507,Medusa's Labyrinth,1
215720,807,Age of Empires II HD,1
40266,176,Salt and Sanctuary,1
71223,277,Need For Speed: Hot Pursuit,1
189604,685,LEGO Batman,1
...,...,...,...
218670,821,Vampire: The Masquerade - Bloodlines,1
110275,434,Nethergate: Resurrection,1
77907,295,Hostile Waters: Antaeus Rising,1
227100,862,Gothic 3,1


In [9]:
def transform_df(df, val):
    game_matrix_df = df.pivot(index = 'game_name', columns ='user_id', values = val).fillna(0)
    return game_matrix_df


In [10]:
R_matrix = transform_df(R_train, 'purchase')
R_matrix

user_id,1,2,3,4,5,6,7,8,9,11,...,986,988,989,991,993,994,995,997,998,999
game_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
! That Bastard Is Trying To Steal Our Gold !,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
!AnyWay!,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
!Dead Pixels Adventure!,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
"""BUTTS: The VR Experience""",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
"""Glow Ball"" - The billiard puzzle game",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
龙魂时刻,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
신기록: A Compendium of Ghosts,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Ｄｒｅａｍ Ｄｅａｌｅｒ △,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
🐰RabbiruN🐰,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [11]:
def svd_inverse(df, nb_components):
    svd = TruncatedSVD(n_components=nb_components)
    games_factors = svd.fit_transform(df) 
    r = svd.inverse_transform(games_factors)
    return pd.DataFrame(r, index=df.index, columns=df.columns), 

In [12]:
R_pred_svd_inv = svd_inverse(R_matrix, 200)
R_pred_svd_inv

(user_id                                            1         2         3    \
 game_name                                                                    
 ! That Bastard Is Trying To Steal Our Gold ! -0.000068 -0.005655 -0.004681   
 !AnyWay!                                      0.005344 -0.002141 -0.001481   
 !Dead Pixels Adventure!                      -0.001167  0.003326 -0.003892   
 "BUTTS: The VR Experience"                   -0.000886 -0.000838  0.000336   
 "Glow Ball" - The billiard puzzle game        0.002732 -0.008931 -0.010066   
 ...                                                ...       ...       ...   
 龙魂时刻                                         -0.001904 -0.002658  0.000242   
 신기록: A Compendium of Ghosts                  -0.001904 -0.002658  0.000242   
 Ｄｒｅａｍ Ｄｅａｌｅｒ △                                0.000077  0.000152 -0.005657   
 🐰RabbiruN🐰                                   -0.000133  0.001095 -0.000324   
 🔴 Circles                                    -0.007

In [14]:
#'Armada: Modern Tanks' in R_pred_svd_inv.index

In [17]:
def scale_pred_matrix(df):
    scaler = MinMaxScaler()
    df = pd.DataFrame(scaler.fit_transform(df), index=df.index)
    return df

In [18]:
R_pred_svd_inv_scaled = scale_pred_matrix(R_pred_svd_inv)
R_pred_svd_inv_scaled.shape

ValueError: Found array with dim 3. MinMaxScaler expected <= 2.

In [None]:
R_pred_svd_inv_scaled

In [None]:
def get_predicted_ratings(matrix, user_id, game_name):
    try:
        pred = matrix[matrix.index == game_name][user_id][0]
    except:
        #print(f"could not find game {game_name} or user {user_id}")
        pred = 0
    return pred

In [None]:
R_pred_svd_inv_scaled[R_pred_svd_inv_scaled.index == '🔴 Circles'][3]

In [None]:
get_predicted_ratings(R_pred_svd_inv_scaled, 205, 'Armada: Modern Tanks')

In [None]:
get_predicted_ratings(R_pred_svd_inv_scaled, 203, 'Armada: Modern Tanks')

In [None]:
def make_y_pred(test_df, matrix):
    test_df_ = test_df.copy()
    y_pred = []
    for _, row in test_df_.iterrows():
        user_id = row['user_id']
        game_name = row['game_name']
        y_pred.append(get_predicted_ratings(matrix, user_id, game_name))
    test_df_['y_pred'] = y_pred
    return test_df_

In [None]:
df_final = make_y_pred(R_test, R_pred_svd_inv_scaled)
df_final

In [None]:
#mae

In [None]:
from sklearn.metrics import mean_squared_error, mean_absolute_error

In [None]:
mean_absolute_error(df_final['purchase'], df_final['y_pred'])

## metrics with ratings

In [None]:
ratings_df = data[['user_id', 'game_name', 'user_rating']]
ratings_df = ratings_df[ratings_df['user_rating'] > 0]
ratings_df

In [None]:
X_train, X_test = train_test_split(ratings_df, test_size=0.01, random_state=37)

In [None]:
X_train['user_rating'].value_counts()

In [None]:
X_test.shape

In [None]:
ratings_matrix = transform_df(X_train, 'user_rating')
ratings_matrix

In [None]:
ratings_pred_svd_inv = svd_inverse(ratings_matrix, 50)
ratings_pred_svd_inv

In [None]:
ratings_pred_svd_inv_scaled = scale_pred_matrix(ratings_pred_svd_inv)
ratings_pred_svd_inv_scaled.shape

In [None]:
ratings_predictions = make_y_pred(X_test, ratings_pred_svd_inv_scaled)
ratings_predictions

In [None]:
mean_absolute_error(ratings_predictions['user_rating'], ratings_predictions['y_pred'])