In [1]:
import pandas as pd 
import numpy as np 
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
import tempfile
import os
from scipy.sparse import coo_matrix

steam game reviews:
95 - 100 | 500+ reviews | positive | overwhelming\
85 - 100 |  50+ reviews | positive | very\
80 - 100 |   1+ reviews | positive\
70 -  79 |   1+ reviews | positive | mostly\
40 -  69 |   1+ reviews | mixed\
20 -  39 |   1+ reviews | negative | mostly\
 0 -  19 |   1+ reviews | negative\
 0 -  19 |  50+ reviews | negative | very\
 0 -  19 | 500+ reviews | negative | overwhelming

In [2]:
users = pd.read_csv('../users.csv')
games = pd.read_csv('../games.csv')
recommendations = pd.read_csv('../recommendations.csv')

In [4]:
unique_userid = users['user_id'].unique()
unique_appid = games['app_id'].unique()

user_index = {user_id: idx for idx, user_id in enumerate(unique_userid)}
app_index = {app_id: idx for idx, app_id in enumerate(unique_appid)}

row_indices = []
col_indices = []
data = []

for _,row in recommendations.iterrows():
    user_id = row['user_id']
    app_id = row['app_id']
    is_recommended = row['score']

    row_indices.append(user_index[user_id])
    col_indices.append(app_index[app_id])
    data.append(1 if is_recommended else 0)

KeyboardInterrupt: 

In [4]:
num_users = len(unique_userid)
num_games = len(unique_appid)
rating_matrix_sparse = coo_matrix((data, (row_indices, col_indices)), shape=(num_users, num_games))

In [5]:
from scipy.sparse import save_npz
save_npz('../rating_matrix_sparse_scores.npz', rating_matrix_sparse)

In [7]:
rating_matrix_csr = rating_matrix_sparse.tocsr()

In [11]:
print(rating_matrix_csr[7360263])

<Compressed Sparse Row sparse matrix of dtype 'int64'
	with 1 stored elements and shape (1, 50872)>
  Coords	Values
  (0, 47701)	1


In [None]:
from scipy.sparse import load_npz
rating_matrix_sparse = load_npz('rating_matrix_sparse.npz')

# Parameters
num_users, num_games = rating_matrix_sparse.shape
num_latent_features = 10  # Number of latent features
learning_rate = 0.01      # Learning rate for gradient descent
regularization = 0.1      # Regularization term to prevent overfitting
num_epochs = 50           # Number of epochs for training

# Initialize U and V matrices with small random values
U = np.random.normal(scale=1.0 / num_latent_features, size=(num_users, num_latent_features)) #user
V = np.random.normal(scale=1.0 / num_latent_features, size=(num_games, num_latent_features)) #game

# Training loop
for epoch in range(num_epochs):
    total_error = 0
    
    # Iterate over all non-zero entries in the sparse matrix
    for user_idx, game_idx, rating in zip(rating_matrix_sparse.row, rating_matrix_sparse.col, rating_matrix_sparse.data):
        # Predict the rating using the dot product of U[user_idx] and V[game_idx]
        prediction = np.dot(U[user_idx], V[game_idx])
        error = rating - prediction
        
        # Update U and V using gradient descent
        U[user_idx] += learning_rate * (error * V[game_idx] - regularization * U[user_idx])
        V[game_idx] += learning_rate * (error * U[user_idx] - regularization * V[game_idx])
        
        total_error += error ** 2
    
    print(f"Epoch {epoch + 1}/{num_epochs}, Total Error: {total_error:.4f}")

print("Training completed.")

Epoch 1/50, Total Error: 35309645.6908
Epoch 2/50, Total Error: 35295227.8336
Epoch 3/50, Total Error: 35271044.8506
Epoch 4/50, Total Error: 35106353.2280
Epoch 5/50, Total Error: 34314316.5735
Epoch 6/50, Total Error: 32537312.8388
Epoch 7/50, Total Error: 30125527.5908
Epoch 8/50, Total Error: 27592696.2391
Epoch 9/50, Total Error: 25238583.8596
Epoch 10/50, Total Error: 23162314.5223
Epoch 11/50, Total Error: 21365541.7268
Epoch 12/50, Total Error: 19816614.4507
Epoch 13/50, Total Error: 18477593.4206
Epoch 14/50, Total Error: 17313541.7312
Epoch 15/50, Total Error: 16294902.2610
Epoch 16/50, Total Error: 15397463.3396
Epoch 17/50, Total Error: 14601613.8487
Epoch 18/50, Total Error: 13891494.3495
Epoch 19/50, Total Error: 13254239.2678
Epoch 20/50, Total Error: 12679354.7025
Epoch 21/50, Total Error: 12158225.0347
Epoch 22/50, Total Error: 11683727.2222
Epoch 23/50, Total Error: 11249930.5869
Epoch 24/50, Total Error: 10851862.9552
Epoch 25/50, Total Error: 10485327.8209
Epoch 26/