1. Games - dataset describing the characteristics of each game, including its game type, difficulty, and duration.
2. User — dataset containing information regarding user amount of games and amount of reviews 
3. Recommendation — dataset containing all recommendation data with informations about whether recommendation was funny (number of upvotes), helpful (number of likes), date, is game recommended (yes/no), hours played, user_id and review_id

In [6]:
import pandas as pd 
import numpy as np 
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
import tempfile

In [3]:
users = pd.read_csv('users.csv')
recommendations = pd.read_csv('recommendations.csv')
games = pd.read_csv('games.csv')

data = recommendations.merge(users, on="user_id").merge(games, on="app_id")

In [4]:
user_ids = data['user_id'].unique()
app_ids = data['app_id'].unique()
user_idx = {id: idx for idx, id in enumerate(user_ids)}
app_idx = {id: idx for idx, id in enumerate(app_ids)}

In [None]:
%%skip
interaction_matrix = np.zeros((len(user_ids), len(app_ids)))
for _, row in data.iterrows():
    interaction_matrix[user_idx[row['user_id']], app_idx[row['app_id']]] = row['is_recommended']

In [7]:
interaction_file = tempfile.NamedTemporaryFile(delete=False, mode='w+')
u_matrix_file = tempfile.NamedTemporaryFile(delete=False, mode='w+')
v_matrix_file = tempfile.NamedTemporaryFile(delete=False, mode='w+')

# Create and store the interaction matrix
for _, row in data.iterrows():
    interaction_file.write(f"{user_idx[row['user_id']]},{app_idx[row['app_id']]},{row['is_recommended']}\n")
interaction_file.seek(0)  # Reset file pointer


0

In [9]:
interactions = []
interaction_file.seek(0)
for line in interaction_file:
    interactions.append(line.strip())

# Perform train-test split
train_interactions, test_interactions = train_test_split(interactions, test_size=0.2, random_state=42)
train_file = tempfile.NamedTemporaryFile(delete=False, mode='w+')
test_file = tempfile.NamedTemporaryFile(delete=False, mode='w+')
for line in train_interactions:
    train_file.write(line + "\n")
for line in test_interactions:
    test_file.write(line + "\n")
train_file.seek(0)
test_file.seek(0)

0

In [None]:
import os
interaction_tempfile_path = interaction_file.name
train_tempfile_path = train_file.name
test_tempfile_path = test_file.name

# Close the temp files
interaction_file.close()
train_file.close()
test_file.close()

# Desired permanent file paths
interaction_file_path = 'interactions.csv'
train_file_path = 'train.csv'
test_file_path = 'test.csv'

# Rename temp files to permanent files
os.rename(interaction_tempfile_path, interaction_file_path)
os.rename(train_tempfile_path, train_file_path)
os.rename(test_tempfile_path, test_file_path)

In [30]:
v_matrix_tempfile_path = v_matrix_file.name 
u_matrix_tempfile_path = u_matrix_file.name 

v_matrix_file.close()
u_matrix_file.close()

v_matrix_file_path = 'v_matrix.csv'
u_matrix_file_path = 'u_matrix.csv'

os.rename(v_matrix_tempfile_path, v_matrix_file_path)
os.rename(u_matrix_tempfile_path, u_matrix_file_path)
v_matrix_file = open('v_matrix.csv', mode='r')
u_matrix_file = open('u_matrix.csv', mode='r')

In [24]:
def funk_svd_file(interaction_file, num_users, num_items, num_features=10, learning_rate=0.01, reg_param=0.02, epochs=10):
    U = np.random.normal(scale=1./num_features, size=(num_users, num_features))
    V = np.random.normal(scale=1./num_features, size=(num_items, num_features))
    
    for epoch in range(epochs):
        interaction_file.seek(0)
        for line in interaction_file:
            user_id, app_id, interaction = line.strip().split(",")
            user_id, app_id = int(user_id), int(app_id)
            interaction = int(interaction == True)
            prediction = np.dot(U[user_id], V[app_id])
            error = interaction - prediction
            U[user_id] += learning_rate * (error * V[app_id] - reg_param * U[user_id])
            V[app_id] += learning_rate * (error * U[user_id] - reg_param * V[app_id])
        print(f"Epoch {epoch + 1}/{epochs} completed.")
    return U, V

In [18]:
interaction_file = open(interaction_file_path, mode='r') 
train_file = open(train_file_path, mode='r') 
test_file = open(test_file_path, mode='r')

In [19]:
num_users = len(user_ids)
num_items = len(app_ids)
U, V = funk_svd_file(interaction_file, num_users, num_items, num_features=20, epochs=5)


Epoch 1/5 completed.
Epoch 2/5 completed.
Epoch 3/5 completed.
Epoch 4/5 completed.
Epoch 5/5 completed.


In [26]:
def evaluate_on_test(test_file, U, V):
    test_file.seek(0)
    total_error = 0
    count = 0
    for line in test_file:
        user_id, app_id, interaction = line.strip().split(",")
        user_id, app_id = int(user_id), int(app_id)
        interaction = int(interaction == True)
        prediction = np.dot(U[user_id], V[app_id])
        error = (interaction - prediction) ** 2
        total_error += error
        count += 1
    rmse = np.sqrt(total_error / count)
    print(f"RMSE on Test Data: {rmse:.4f}")
    return rmse

In [27]:
evaluate_on_test(test_file, U, V)

RMSE on Test Data: 0.0022


np.float64(0.0022089374009210574)

In [42]:
def recommend_games_from_files(user_id, U, V, top_n=5):
    # U = np.loadtxt(u_matrix_file.name, delimiter=",")
    # V = np.loadtxt(v_matrix_file.name, delimiter=",")
    # print(V)
    user_index = user_idx[user_id]
    predictions = np.dot(U[user_index], V.T)
    top_games_idx = np.argsort(-predictions)[:top_n]
    recommended_game_ids = [app_ids[idx] for idx in top_games_idx]
    return games[games['app_id'].isin(recommended_game_ids)][['app_id', 'title', 'rating', 'price_final']]

In [43]:
user_to_recommend = user_ids[0]
recommendations = recommend_games_from_files(user_to_recommend, U, V)
print(recommendations)

        app_id                          title         rating  price_final
11965  1662910                     GridlessDB       Positive        19.99
24469  1892680           Vitality Girl Ⅱ:Fire       Positive         3.99
30913   761000        Floresia I : Intemporel          Mixed         0.00
33569   790450                         ANYKEY  Very Positive         0.00
37120   452000  Military Life: Tank Simulator          Mixed         3.99
