In [1]:
import contextlib
import io

f = io.StringIO()
with contextlib.redirect_stdout(f):
    import numpy as np
    import pandas as pd
    import import_ipynb
    import baseline_model
    import als
    import spectral_regularization_model
    import nuclear_norm_model
    import warnings
    warnings.filterwarnings("ignore", category=UserWarning)

In [2]:
def calculateRMSE(pred, df):
    original = df.pivot(index='u_id', columns='a_id', values='score').fillna(0).values
    mask = original > 0
    pred = pred[mask]
    original = original[mask]
    return np.sqrt(np.mean((np.asarray(pred) - np.asarray(original)) ** 2))

In [3]:
df_1 = pd.read_csv('data/100x100.csv')
df_2 = pd.read_csv('data/100x100_2.csv')

In [4]:
# baseline model x = 10000, using dataset 1
baseline_matrix_1 = baseline_model.baseline_model(10000, df_1)
calculateRMSE(baseline_matrix_1, df_1)

status: optimal


2.1080952228275684

In [5]:
# baseline model x = 10000, using dataset 2
baseline_matrix_2 = baseline_model.baseline_model(10000, df_2)
calculateRMSE(baseline_matrix_2, df_2)

status: optimal


2.241315137364796

In [6]:
# ALS at rank 20, 1000 iterations, using dataset 1
X, Y = als.als(df_1, 20, 1000)
als_matrix_1 = X @ Y.T
calculateRMSE(als_matrix_1, df_1)

0.01773258575026133

In [7]:
# ALS at rank 20, 1000 iterations, using dataset 2
X, Y = als.als(df_2, 20, 1000)
als_matrix_2 = X @ Y.T
calculateRMSE(als_matrix_2, df_2)

0.021637404731245695

In [None]:
# Spectral Regularization Model at lambda 10, using dataset 1
spectral_matrix_1 = spectral_regularization_model.spectral_regularization_model(10, df_1)
calculateRMSE(spectral_matrix_1, df_1)

status: optimal


1.295096573902575

In [13]:
# Spectral Regularization Model at lambda 10, using dataset 2
spectral_matrix_2 = spectral_regularization_model.spectral_regularization_model(10, df_2)
calculateRMSE(spectral_matrix_2, df_2)

status: optimal


1.6328061203922248

In [10]:
# nuclear norm model, using dataset 1
nuclear_matrix_1 = nuclear_norm_model.nuclear_norm_model_df(df_1)
calculateRMSE(nuclear_matrix_1, df_1)

Optimization succeeded.


6.675280732099155e-09

In [11]:
# nuclear norm model, using dataset 2
nuclear_matrix_2 = nuclear_norm_model.nuclear_norm_model_df(df_2)
calculateRMSE(nuclear_matrix_2, df_2)

Optimization succeeded.


4.374271801530336e-09