# Library import

In [61]:
import pandas as pd
import numpy as np

from sklearn.decomposition import TruncatedSVD, NMF

# Data import

In [15]:
def load_data():
    user_game_df = pd.read_csv("../raw_data/rawg_user_games.csv")
    user_game_df['purchase'] = 1
    return user_game_df[['user_id','game_id', 'game_name', 'purchase']]

In [137]:
rawg_user_game_df = load_data()
rawg_user_game_df 

Unnamed: 0,user_id,game_id,game_name,purchase
0,1,446900,High Rise - A Puzzle Cityscape,1
1,1,264830,Twinfold,1
2,1,58443,Pocket-Run Pool,1
3,1,51288,Cinco Paus,1
4,1,21889,Desert Fox: The Battle of El Alamein,1
...,...,...,...,...
255361,999,4062,BioShock Infinite,1
255362,999,802,Borderlands 2,1
255363,999,5679,The Elder Scrolls V: Skyrim,1
255364,999,3328,The Witcher 3: Wild Hunt,1


# Prepare Data

In [17]:
def transform_df(df):
    game_matrix_df = df.pivot(index = 'game_name', columns ='user_id', values = 'purchase').fillna(0)
    return game_matrix_df


In [19]:
matrix = transform_df(rawg_user_game_df)
matrix

user_id,1,2,3,4,5,6,7,8,9,11,...,986,988,989,991,993,994,995,997,998,999
game_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
! That Bastard Is Trying To Steal Our Gold !,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
!AnyWay!,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
!Dead Pixels Adventure!,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
"""BUTTS: The VR Experience""",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
"""Glow Ball"" - The billiard puzzle game",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
龙魂时刻,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
신기록: A Compendium of Ghosts,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Ｄｒｅａｍ Ｄｅａｌｅｒ △,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
🐰RabbiruN🐰,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


# Models


## SVD

In [104]:
def svd_model(df, nb_components):
    """
    df is a dataframe with games in rows and users in columns
    """
    svd = TruncatedSVD(n_components=nb_components)
    games_factors = svd.fit_transform(df) 
    games_factors = pd.DataFrame(games_factors, index=df.index)
    users_factors = pd.DataFrame(svd.components_, columns=df.columns)
    return games_factors, users_factors

In [105]:
games_factors, users_factors = svd_model(matrix, 200)

In [106]:
# games_factors

In [107]:
#users_factors

In [108]:
def compute_prediction(games_factor, users_factors):
    return pd.DataFrame(
        np.dot(games_factors, users_factors), 
        index=games_factors.index, 
        columns=users_factors.columns
    )

In [109]:
pred = compute_prediction(games_factors, users_factors)
#pred = svd_inverse(matrix, 200) 
pred

user_id,1,2,3,4,5,6,7,8,9,11,...,986,988,989,991,993,994,995,997,998,999
game_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
! That Bastard Is Trying To Steal Our Gold !,-0.004675,-0.005379,-0.004536,-0.000401,0.000365,-1.710643e-04,-0.000363,-0.005531,-0.000109,0.001573,...,0.001495,-0.001879,-0.002780,0.001405,-0.002836,-0.002515,-0.001110,-0.014775,-0.008839,0.002071
!AnyWay!,0.004729,-0.001296,-0.001201,0.000282,-0.000188,5.066790e-04,-0.000128,-0.004295,0.000352,0.002554,...,0.000908,0.000237,-0.002962,-0.001635,-0.002620,-0.002674,-0.002984,-0.005161,0.002041,0.006637
!Dead Pixels Adventure!,-0.002478,0.005418,-0.003972,0.000531,0.000728,-5.732443e-04,0.000165,-0.006699,-0.001793,0.001580,...,0.000053,-0.001706,0.011108,-0.003724,0.004155,-0.014511,-0.002572,0.016447,-0.006457,0.004849
"""BUTTS: The VR Experience""",0.002529,-0.002986,0.000378,-0.000653,0.000292,1.109948e-07,0.000188,-0.001884,0.001073,-0.003243,...,-0.000038,-0.001513,0.002247,0.001683,-0.003421,0.002162,-0.000260,-0.000328,-0.001719,-0.001362
"""Glow Ball"" - The billiard puzzle game",-0.004838,-0.017052,-0.008407,-0.001794,-0.000260,-1.097040e-03,0.000240,0.003660,-0.005029,0.000893,...,0.005137,-0.000028,-0.005283,-0.001495,-0.003679,0.008574,-0.004971,-0.031050,-0.035615,-0.019344
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
龙魂时刻,-0.000253,-0.002904,0.000327,-0.000248,-0.000070,-2.571061e-04,0.000101,-0.000591,-0.000064,-0.000712,...,-0.000061,0.000243,0.001159,0.000806,-0.001261,0.000186,-0.001378,-0.000183,0.000711,0.001879
신기록: A Compendium of Ghosts,-0.000253,-0.002904,0.000327,-0.000248,-0.000070,-2.571061e-04,0.000101,-0.000591,-0.000064,-0.000712,...,-0.000061,0.000243,0.001159,0.000806,-0.001261,0.000186,-0.001378,-0.000183,0.000711,0.001879
Ｄｒｅａｍ Ｄｅａｌｅｒ △,-0.003471,0.002159,-0.005479,-0.000429,0.000833,-1.496192e-04,-0.000098,-0.014996,-0.001851,0.003323,...,0.001375,-0.006212,0.007957,-0.004835,0.000943,-0.016640,-0.002826,0.000897,-0.007288,0.011371
🐰RabbiruN🐰,0.001672,-0.000551,-0.000335,0.000243,-0.000297,4.980657e-05,0.000084,-0.001104,-0.000331,0.000890,...,0.000431,0.003149,-0.000792,-0.000062,0.000146,-0.000397,-0.000731,-0.000985,-0.002762,0.005668


In [110]:
def predict_the_next_game(gamer_id, prediction_df, game_per_user_df):
    user_game_df = pd.DataFrame({
    "predicts": prediction_df[gamer_id],
    "portfolio":game_per_user_df[gamer_id]
    })
    user_game_df['score'] = 0.0
    for game in user_game_df.index :
        if user_game_df.portfolio[game] < 1.0 :
            user_game_df['score'][game] = user_game_df['predicts'][game]
    return user_game_df.sort_values('score', ascending=False)

In [111]:
predict_the_next_game(42, pred, matrix).head(15)

Unnamed: 0_level_0,predicts,portfolio,score
game_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
The Playroom,0.243809,0.0,0.243809
Tom Clancy's Splinter Cell: Conviction,0.228162,0.0,0.228162
Dead Star,0.213998,0.0,0.213998
Kingdoms of Amalur: Reckoning,0.203838,0.0,0.203838
Risen 3 - Titan Lords,0.202833,0.0,0.202833
ENSLAVED: Odyssey to the West Premium Edition,0.200975,0.0,0.200975
PlayerUnknown’s Battlegrounds,0.194564,0.0,0.194564
SMITE,0.193367,0.0,0.193367
NBA 2K16,0.184786,0.0,0.184786
Red Dead Redemption 2,0.183634,0.0,0.183634


In [116]:
# pred

In [117]:
# svd = TruncatedSVD(n_components=200)
# games_factors = svd.fit_transform(matrix) 

In [118]:
# games_factors.shape

In [119]:
# svd.inverse_transform(games_factors)

In [120]:
# np.dot(games_factors, users_factors)

In [121]:
# matrix

## NMF

In [None]:
def nmf_model(df, nb_components):
    """
    df is a dataframe with games in rows and users in columns
    """
    svd = NMF(n_components=nb_components)
    games_factors = svd.fit_transform(df) 
    games_factors = pd.DataFrame(games_factors, index=df.index)
    users_factors = pd.DataFrame(svd.components_, columns=df.columns)
    return games_factors, users_factors

In [113]:
games_factors_nmf, users_factors_nmf = svd_model(matrix, 200)

In [114]:
pred_nmf = compute_prediction(games_factors, users_factors)
pred_nmf

user_id,1,2,3,4,5,6,7,8,9,11,...,986,988,989,991,993,994,995,997,998,999
game_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
! That Bastard Is Trying To Steal Our Gold !,-0.004675,-0.005379,-0.004536,-0.000401,0.000365,-1.710643e-04,-0.000363,-0.005531,-0.000109,0.001573,...,0.001495,-0.001879,-0.002780,0.001405,-0.002836,-0.002515,-0.001110,-0.014775,-0.008839,0.002071
!AnyWay!,0.004729,-0.001296,-0.001201,0.000282,-0.000188,5.066790e-04,-0.000128,-0.004295,0.000352,0.002554,...,0.000908,0.000237,-0.002962,-0.001635,-0.002620,-0.002674,-0.002984,-0.005161,0.002041,0.006637
!Dead Pixels Adventure!,-0.002478,0.005418,-0.003972,0.000531,0.000728,-5.732443e-04,0.000165,-0.006699,-0.001793,0.001580,...,0.000053,-0.001706,0.011108,-0.003724,0.004155,-0.014511,-0.002572,0.016447,-0.006457,0.004849
"""BUTTS: The VR Experience""",0.002529,-0.002986,0.000378,-0.000653,0.000292,1.109948e-07,0.000188,-0.001884,0.001073,-0.003243,...,-0.000038,-0.001513,0.002247,0.001683,-0.003421,0.002162,-0.000260,-0.000328,-0.001719,-0.001362
"""Glow Ball"" - The billiard puzzle game",-0.004838,-0.017052,-0.008407,-0.001794,-0.000260,-1.097040e-03,0.000240,0.003660,-0.005029,0.000893,...,0.005137,-0.000028,-0.005283,-0.001495,-0.003679,0.008574,-0.004971,-0.031050,-0.035615,-0.019344
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
龙魂时刻,-0.000253,-0.002904,0.000327,-0.000248,-0.000070,-2.571061e-04,0.000101,-0.000591,-0.000064,-0.000712,...,-0.000061,0.000243,0.001159,0.000806,-0.001261,0.000186,-0.001378,-0.000183,0.000711,0.001879
신기록: A Compendium of Ghosts,-0.000253,-0.002904,0.000327,-0.000248,-0.000070,-2.571061e-04,0.000101,-0.000591,-0.000064,-0.000712,...,-0.000061,0.000243,0.001159,0.000806,-0.001261,0.000186,-0.001378,-0.000183,0.000711,0.001879
Ｄｒｅａｍ Ｄｅａｌｅｒ △,-0.003471,0.002159,-0.005479,-0.000429,0.000833,-1.496192e-04,-0.000098,-0.014996,-0.001851,0.003323,...,0.001375,-0.006212,0.007957,-0.004835,0.000943,-0.016640,-0.002826,0.000897,-0.007288,0.011371
🐰RabbiruN🐰,0.001672,-0.000551,-0.000335,0.000243,-0.000297,4.980657e-05,0.000084,-0.001104,-0.000331,0.000890,...,0.000431,0.003149,-0.000792,-0.000062,0.000146,-0.000397,-0.000731,-0.000985,-0.002762,0.005668


In [115]:
predict_the_next_game(42, pred_nmf, matrix).head(15)

Unnamed: 0_level_0,predicts,portfolio,score
game_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
The Playroom,0.243809,0.0,0.243809
Tom Clancy's Splinter Cell: Conviction,0.228162,0.0,0.228162
Dead Star,0.213998,0.0,0.213998
Kingdoms of Amalur: Reckoning,0.203838,0.0,0.203838
Risen 3 - Titan Lords,0.202833,0.0,0.202833
ENSLAVED: Odyssey to the West Premium Edition,0.200975,0.0,0.200975
PlayerUnknown’s Battlegrounds,0.194564,0.0,0.194564
SMITE,0.193367,0.0,0.193367
NBA 2K16,0.184786,0.0,0.184786
Red Dead Redemption 2,0.183634,0.0,0.183634


## SVD_Inverse

In [None]:
def svd_inverse(df, nb_components):
    svd = TruncatedSVD(n_components=nb_components)
    games_factors = svd.fit_transform(df) 
    r = svd.inverse_transform(games_factors)
    return pd.DataFrame(r, index=df.index, columns=df.columns)

In [122]:
pred_svd_inv = svd_inverse(matrix, 200)

In [123]:
predict_the_next_game(42, pred_svd_inv, matrix).head(15)

Unnamed: 0_level_0,predicts,portfolio,score
game_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Tom Clancy's Splinter Cell: Conviction,0.33714,0.0,0.33714
Mass Effect: Andromeda,0.321518,0.0,0.321518
The Talos Principle,0.28567,0.0,0.28567
Heavy Rain,0.281564,0.0,0.281564
Tom Clancy's Rainbow Six Siege,0.275695,0.0,0.275695
Castlevania: Lords of Shadow 2,0.269826,0.0,0.269826
STAR WARS Battlefront,0.265757,0.0,0.265757
Devil May Cry 4,0.252216,0.0,0.252216
Batman: Arkham City,0.251443,0.0,0.251443
DC Universe Online,0.251436,0.0,0.251436


## NMF inverse

In [127]:
def nmf_inverse(df, nb_components):
    svd = NMF(n_components=nb_components)
    games_factors = svd.fit_transform(df) 
    r = svd.inverse_transform(games_factors)
    return pd.DataFrame(r, index=df.index, columns=df.columns)

In [134]:
pred_nmf_inv = nmf_inverse(matrix, 200)



In [135]:
predict_the_next_game(42, pred_nmf_inv, matrix).head(15)

Unnamed: 0_level_0,predicts,portfolio,score
game_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Transistor,0.054837,0.0,0.054837
The Guest,0.046218,0.0,0.046218
Castlevania: Lords of Shadow,0.041122,0.0,0.041122
Gemini Rue,0.039079,0.0,0.039079
Tropico 3,0.039069,0.0,0.039069
Kult: Heretic Kingdoms,0.039043,0.0,0.039043
Primordia,0.037304,0.0,0.037304
Ace Combat 6: Fires of Liberation,0.034392,0.0,0.034392
Algotica - Iteration 1,0.034339,0.0,0.034339
Gears of War 2,0.034315,0.0,0.034315


# Split set