In [3]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import pickle
from sklearn import preprocessing
from scipy.sparse.linalg import svds
import seaborn as sns
import matplotlib.pyplot as plt
from progressbar import progressbar
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_samples, silhouette_score


In [4]:
df = pd.read_feather('C:/Users/AdamS/Desktop/SteamRevs/Data/updated/highRevs.feather')
positive = df[df.voted_up == True].head(1_000_000).copy()
negative = df[df.voted_up == False].copy()

In [19]:
negative = negative[negative['steamid'].isin(positive['steamid'])]

In [20]:
negative

Unnamed: 0,steamid,appid,voted_up,votes_up,votes_funny,weighted_vote_score,playtime_forever,playtime_at_review,num_games_owned,num_reviews,review,unix_timestamp_created,unix_timestamp_updated,reviewcount
14,76561198130679813,40,False,0,0,0.000000,8,8,61,25,rather play quake,1618942673,1618942673,21
35,76561198254602420,730,False,4,1,0.524173,17046,13778,34,18,After 230 hours of playing i am feeling free ...,1502118887,1502118887,17
38,76561198254602420,578080,False,9,0,0.518260,63652,1943,34,18,I'm not competent to comment on this right now...,1502035285,1502035285,17
39,76561198254602420,271590,False,3,0,0.550409,35626,27522,34,18,.FOR HACKERS ........./´¯/)...............\n.....,1501324796,1514742001,17
44,76561198254602420,433850,False,5,0,0.548828,698,24,34,18,ONLY TO SAY : \nGIVE ME BACK 20 $ YOU STUPID S...,1479675053,1502229504,17
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1132884,76561198113100571,532210,False,4,0,0.407515,1480,1480,203,74,"im not saying no to the game, im saying no to ...",1579872378,1580969764,35
1132886,76561198113100571,219640,False,6,0,0.510419,389,389,203,74,did you know they are making chivalry 2 exclus...,1566190505,1566190505,35
1132891,76561198113100571,252490,False,0,1,0.492051,97817,85305,203,74,"the worst game ever, dont ask me to play it",1464912854,1543707924,35
1132895,76561198113100571,629730,False,0,0,0.476190,7049,1343,203,74,very poorly optimized,1577494697,1610029256,35


In [21]:
def setInteraction(df_highest_revs):
    df_highest_revs['interaction'] = 0
    conditions = [
        (df_highest_revs['playtime_forever'] < 5) & df_highest_revs['voted_up'],
        (df_highest_revs['playtime_forever'] > 5) & (df_highest_revs['playtime_forever'] < 20) & df_highest_revs['voted_up'],
        (df_highest_revs['playtime_forever'] > 20) & (df_highest_revs['playtime_forever'] < 100) & df_highest_revs['voted_up'],
        (df_highest_revs['playtime_forever'] > 100) & (df_highest_revs['playtime_forever'] < 500) & df_highest_revs['voted_up'],
        (df_highest_revs['playtime_forever'] > 500) & (df_highest_revs['playtime_forever'] < 100) & df_highest_revs['voted_up'],
        (df_highest_revs['playtime_forever'] > 1000) & df_highest_revs['voted_up'],
    ]
    values = [0, 1, 10, 100, 1000, 10000]
    df_highest_revs['interaction'] = np.select(conditions, values)
    df_highest_revs['interaction']

In [22]:
setInteraction(positive)

In [23]:
negative['interaction'] = 5

In [24]:
%%time
user_item_p = positive.copy().groupby(['steamid','appid'])['interaction'].first().unstack(fill_value = 0.0)

Wall time: 12.1 s


In [25]:
%%time
user_item_n = negative.groupby(['steamid','appid'])['interaction'].first().unstack(fill_value = 0.0)

Wall time: 1.5 s


In [31]:
def getMatrix(user_item):
    U, sigma, Vt = svds(user_item.to_numpy().astype('float'), k = 64)
    U.shape, sigma.shape, Vt.shape
    sigma_diag_matrix=np.diag(sigma)
    train_full_matrix = np.dot(np.dot(U, sigma_diag_matrix), Vt)
    prediction_train_full = pd.DataFrame(train_full_matrix, columns = user_item.columns, index=user_item.index)
    prediction_train_full.reset_index(inplace = True)
    prediction_train_full.set_index('steamid')
    return(prediction_train_full)

In [32]:
%%time
full_p = getMatrix(user_item_p)

Wall time: 3min 54s


In [33]:
%%time
full_n = getMatrix(user_item_n)

Wall time: 1min 22s


In [34]:
def getRecs(userID, prediction_train_full, df_highest_revs):
    topRecs = prediction_train_full[prediction_train_full.steamid == userID].iloc[0].sort_values(ascending = False)[1:6].to_frame()
    topInteraction = df_highest_revs[df_highest_revs.steamid == userID].appid.sort_values(ascending = False)[1:6].to_frame()
    topInteraction
    topRecs.columns = ['pred']
    return topRecs

In [35]:
userID = 76561198065609312
bestGames = getRecs(userID, full_p, positive)
worstGames = getRecs(userID, full_n, negative)

In [41]:
print(bestGames)
print(worstGames)

               pred
appid              
359550  9998.075588
578080   246.072264
221100   212.468793
581320   158.078246
10       135.248107
            pred
appid           
578080  4.993179
489940  0.045548
513710  0.038615
582660  0.036151
728540  0.028029
