# Recommender systems

## 1.1 Naive Approaches

In [11]:
# -*- coding: utf-8 -*-

import sklearn
from sklearn.model_selection import KFold
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
import multiprocessing

path = "./ml-1m/ratings.dat" # Read the dataset
table = pd.read_table(path, sep="::", names=["UserID", "MovieID", "Rating", "Timestamp"], 
                      engine="python")
table

Unnamed: 0,UserID,MovieID,Rating,Timestamp
0,1,1193,5,978300760
1,1,661,3,978302109
2,1,914,3,978301968
3,1,3408,4,978300275
4,1,2355,5,978824291
...,...,...,...,...
1000204,6040,1091,1,956716541
1000205,6040,1094,5,956704887
1000206,6040,562,5,956704746
1000207,6040,1096,4,956715648


In [12]:
def row_col_mean(row_col, index,dataset): # Average of the row and col in the dataset
    if row_col == "row":
        return(dataset.loc[(dataset["UserID"]==index), "Rating"].mean())
    else:
        return(dataset.loc[(dataset["MovieID"]==index), "Rating"].mean())

In [13]:
def DataRouter(dataset): 
    """
    data preprocess for linear regression
    dataset: train or test, pd dataframe
    return: X, Y for linear regression
    """
    user_mean = dict()
    movie_mean = dict()
    for i in dataset['UserID'].unique():
        user_mean[i] = row_col_mean("row", i, dataset)
    for i in dataset['MovieID'].unique():
        movie_mean[i] = row_col_mean("col", i, dataset)
    
    dataset['X1'] = '' 
    dataset['X2'] = ''

    for index, row in dataset.iterrows():
        dataset.loc[index, 'X1'] = user_mean[row['UserID']]
        dataset.loc[index, 'X2'] = movie_mean[row['MovieID']]
    X = dataset.loc[:,['X1','X2']].values
    Y = dataset.loc[:,['Rating']].values  
    return(X, Y)

In [14]:
def LinearReg(X, Y):
    reg = LinearRegression().fit(X, Y)
    coef = reg.coef_
    intercept = reg.intercept_
    return(coef, intercept)

In [15]:
def LinearRegNoIntercept(X, Y):
    reg = LinearRegression(fit_intercept=False).fit(X, Y)
    coef = reg.coef_
    intercept = reg.intercept_
    return(coef, intercept)

In [16]:
def evaluation(X, Y, coef, intercept): 
    """
    evaluation for linear regression
    """
    reg = LinearRegression().fit(X, Y)
    reg.coef_ = coef
    reg.intercept_ = intercept
    
    y_pred = np.array(reg.predict(X))
    y_pred = np.where(y_pred < 1, 1, y_pred)
    y_pred = np.where(y_pred > 5, 5, y_pred)

    rmse = np.sqrt(np.mean((Y - y_pred) ** 2))
    mae = np.mean(np.abs(Y - y_pred))
    return(rmse,mae)

In [17]:
def report(mean_rmse, std_rmse, mean_mae, std_mae, info="Train"):
    """
    print the result
    """
    print(f"{info} RMSE Mean: {mean_rmse}")
    print(f"{info} RMSE std: {std_rmse}")
    print(f"\n{info} MAE Mean: {mean_mae}")
    print(f"{info} MAE std: {std_mae}")

In [18]:
# Kfold, test and train are set
kf = KFold(n_splits=5, shuffle=True, random_state=1) 

In [19]:
%%time
# Liniear regression of two averages with intercept
test_rmse_list = []
test_mae_list = []
train_rmse_list = []
train_mae_list = []

for train_index, test_index in kf.split(table):
    train = table.loc[train_index.tolist(), ["UserID", "MovieID", "Rating"]]
    test = table.loc[test_index.tolist(), ["UserID", "MovieID", "Rating"]]
    
    X_train, Y_train = DataRouter(train)
    X_test, Y_test = DataRouter(test)
    coef, interc = LinearReg(X_train, Y_train)
    test_rmse, test_mae = evaluation(X_test, Y_test, coef, interc)
    train_rmse, train_mae = evaluation(X_train, Y_train, coef, interc)
    test_rmse_list.append(test_rmse)
    test_mae_list.append(test_mae)
    train_rmse_list.append(train_rmse)
    train_mae_list.append(train_mae)

report(np.mean(train_rmse_list), np.std(train_rmse_list), np.mean(train_mae_list), 
       np.std(train_mae_list), info="Train")
report(np.mean(test_rmse_list), np.std(test_rmse_list), np.mean(test_mae_list), 
       np.std(test_mae_list), info="Test")

Train RMSE Mean: 0.9145443262957137
Train RMSE std: 0.00042429662756855474

Train MAE Mean: 0.7247995102650913
Train MAE std: 0.00045659741953063055
Test RMSE Mean: 0.9001653681660222
Test RMSE std: 0.0016310273475170262

Test MAE Mean: 0.7122092602307395
Test MAE std: 0.001920795140638446
CPU times: user 14min, sys: 4.76 s, total: 14min 5s
Wall time: 13min 57s


In [20]:
%%time
# Liniear regression of two averages without intercept
test_rmse_list = []
test_mae_list = []
train_rmse_list = []
train_mae_list = []

for train_index, test_index in kf.split(table):
    train = table.loc[train_index.tolist(), ["UserID", "MovieID", "Rating"]]
    test = table.loc[test_index.tolist(), ["UserID", "MovieID", "Rating"]]
    
    X_train, Y_train = DataRouter(train)
    X_test, Y_test = DataRouter(test)
    coef, interc = LinearRegNoIntercept(X_train, Y_train)
    test_rmse, test_mae = evaluation(X_test, Y_test, coef, interc)
    train_rmse, train_mae = evaluation(X_train, Y_train, coef, interc)
    test_rmse_list.append(test_rmse)
    test_mae_list.append(test_mae)
    train_rmse_list.append(train_rmse)
    train_mae_list.append(train_mae)
    
report(np.mean(train_rmse_list), np.std(train_rmse_list), np.mean(train_mae_list), 
       np.std(train_mae_list), info="Train")
report(np.mean(test_rmse_list), np.std(test_rmse_list), np.mean(test_mae_list), 
       np.std(test_mae_list), info="Test")

Train RMSE Mean: 0.9465499537839577
Train RMSE std: 0.0003342578808442147

Train MAE Mean: 0.7585512269178818
Train MAE std: 0.0003182251782626315
Test RMSE Mean: 0.9344570734200918
Test RMSE std: 0.00127624501554858

Test MAE Mean: 0.748706056925681
Test MAE std: 0.0012824171256624554
CPU times: user 13min 51s, sys: 4.05 s, total: 13min 55s
Wall time: 13min 45s


In [21]:
def global_avg(dataset): # Calculate Global average rating
    ga = dataset['Rating'].mean()
    return(ga)

In [22]:
%%time
test_rmse_list = []
test_mae_list = []
train_rmse_list = []
train_mae_list = []

for train_index, test_index in kf.split(table):
    train = table.loc[train_index.tolist(), ["UserID", "MovieID", "Rating"]]
    test = table.loc[test_index.tolist(), ["UserID", "MovieID", "Rating"]]
    
    test_rmse = np.sqrt(np.mean((test['Rating'] - global_avg(test))**2))
    test_mae = np.mean(np.abs(test['Rating']- global_avg(test)))
    train_rmse = np.sqrt(np.mean((train['Rating'] - global_avg(train))**2))
    train_mae = np.mean(np.abs(train['Rating'] - global_avg(train)))

    test_rmse_list.append(test_rmse)
    test_mae_list.append(test_mae)
    train_rmse_list.append(train_rmse)
    train_mae_list.append(train_mae)
    
report(np.mean(train_rmse_list), np.std(train_rmse_list), np.mean(train_mae_list), 
       np.std(train_mae_list), info="Train")
report(np.mean(test_rmse_list), np.std(test_rmse_list), np.mean(test_mae_list), 
       np.std(test_mae_list), info="Test")

Train RMSE Mean: 1.1171011110023854
Train RMSE std: 0.0004203500090281157

Train MAE Mean: 0.9338607806758029
Train MAE std: 0.00038334720904158897
Test RMSE Mean: 1.1170984720424795
Test RMSE std: 0.0016813423548895845

Test MAE Mean: 0.9338595477225654
Test MAE std: 0.001534945750123129
CPU times: user 792 ms, sys: 143 ms, total: 934 ms
Wall time: 933 ms


In [23]:
%%time
# Average rating per user
test_rmse_list = []
test_mae_list = []
train_rmse_list = []
train_mae_list = []

for train_index, test_index in kf.split(table):
    train = table.loc[train_index.tolist(), ["UserID", "MovieID", "Rating"]]
    test = table.loc[test_index.tolist(), ["UserID", "MovieID", "Rating"]]
    
    user_avg = train[['UserID','Rating']].groupby('UserID').mean().rename(columns={'Rating':'Average'})
    train_merge = pd.merge(train, user_avg, on='UserID')
    test_merge = pd.merge(test, user_avg, on='UserID')

    test_rmse = np.sqrt(np.mean((test_merge['Rating'] - test_merge['Average'])**2))
    test_mae = np.mean(np.abs(test_merge['Rating']- test_merge['Average']))
    train_rmse = np.sqrt(np.mean((train_merge['Rating'] - train_merge['Average'])**2))
    train_mae = np.mean(np.abs(train_merge['Rating'] - train_merge['Average']))

    test_rmse_list.append(test_rmse)
    test_mae_list.append(test_mae)
    train_rmse_list.append(train_rmse)
    train_mae_list.append(train_mae)
    
report(np.mean(train_rmse_list), np.std(train_rmse_list), np.mean(train_mae_list), 
       np.std(train_mae_list), info="Train")
report(np.mean(test_rmse_list), np.std(test_rmse_list), np.mean(test_mae_list), 
       np.std(test_mae_list), info="Test")

Train RMSE Mean: 1.0276718866687955
Train RMSE std: 0.0005528163518390866

Train MAE Mean: 0.8227317798294085
Train MAE std: 0.0004731890438345407
Test RMSE Mean: 1.0354887413559504
Test RMSE std: 0.002196430566260904

Test MAE Mean: 0.8290076950378905
Test MAE std: 0.0017401821127500715
CPU times: user 1.11 s, sys: 267 ms, total: 1.38 s
Wall time: 1.38 s


In [24]:
%%time
# Average rating per item
test_rmse_list = []
test_mae_list = []
train_rmse_list = []
train_mae_list = []

for train_index, test_index in kf.split(table):
    train = table.loc[train_index.tolist(), ["UserID", "MovieID", "Rating"]]
    test = table.loc[test_index.tolist(), ["UserID", "MovieID", "Rating"]]
    
    movie_avg = train[['MovieID','Rating']].groupby('MovieID').mean().rename(columns={'Rating':'Average'})
    train_merge = pd.merge(train, movie_avg, on='MovieID')
    test_merge = pd.merge(test, movie_avg, on='MovieID')

    test_rmse = np.sqrt(np.mean((test_merge['Rating'] - test_merge['Average'])**2))
    test_mae = np.mean(np.abs(test_merge['Rating']- test_merge['Average']))
    train_rmse = np.sqrt(np.mean((train_merge['Rating'] - train_merge['Average'])**2))
    train_mae = np.mean(np.abs(train_merge['Rating'] - train_merge['Average']))

    test_rmse_list.append(test_rmse)
    test_mae_list.append(test_mae)
    train_rmse_list.append(train_rmse)
    train_mae_list.append(train_mae)
    
report(np.mean(train_rmse_list), np.std(train_rmse_list), np.mean(train_mae_list), 
       np.std(train_mae_list), info="Train")
report(np.mean(test_rmse_list), np.std(test_rmse_list), np.mean(test_mae_list), 
       np.std(test_mae_list), info="Test")

Train RMSE Mean: 0.9742112263767705
Train RMSE std: 0.00018290851118296756

Train MAE Mean: 0.7783430056529332
Train MAE std: 0.00028529865704437574
Test RMSE Mean: 0.9794200889294983
Test RMSE std: 0.0007691010494227671

Test MAE Mean: 0.782308674322367
Test MAE std: 0.0006285312602745488
CPU times: user 1.18 s, sys: 247 ms, total: 1.43 s
Wall time: 1.43 s


### A Parent Class for Task 1.2 and 1.3

In [1]:
import sklearn
from sklearn.model_selection import KFold
import pandas as pd
import numpy as np
import multiprocessing
import random

path = "./ml-1m/ratings.dat"
seeds = 1

class MatrixModels():
    """
    The template for task 1.2 and 1.3.
    Args:
        path: File path.
        num_factors: The number of features.
        num_iter: Max iterations.
        seeds: Set the random state.
        save_UM: Save the feature matrices.
        save_info: Path to save feature matrices.
        random_normal: The matrix initialization method.
    """
    def __init__(self, path, num_factors, num_iter, seeds, save_UM, save_info="", random_normal=True):
        self.path = path
        self.num_factors = num_factors
        self.num_iter = num_iter
        self.seeds = seeds
        self.save_UM = save_UM
        self.save_info = save_info
        self.random_normal = random_normal
        self.I = 6040
        self.J = 3952
        self.table = pd.read_table(self.path, sep="::", names=["UserID", "MovieID", "Rating", "Timestamp"], 
                                   engine="python")
    
    def _evaluation(self, U, M, table):
        """
        Evaluate model performance.
        """
        y_true = table[:, 2]
        y_pred = []
        
        for i in range(table.shape[0]):
            y_pred.append(np.matmul(U[table[i, 0] - 1, :], M[:, table[i, 1] - 1]))
        
        # If y_pred < 1, y_pred = 1. If y_pred > 5, y_pred = 5.
        y_pred = np.array(y_pred)
        y_pred = np.where(y_pred < 1, 1, y_pred)
        y_pred = np.where(y_pred > 5, 5, y_pred)
        
        # Calculate RMSE and MAE
        rmse = np.sqrt(np.mean((y_true - y_pred) ** 2))
        mae = np.mean(np.abs(y_true - y_pred))
        return rmse, mae
    
    def _train_iteration(U, M, table_train):
        print("Implement this method.")
        return U, M
        
    def run_model(self, train_index, test_index, num_work=0):
        """
        Run the experiment on one fold.
        """
        table_train, table_test = self.table.iloc[train_index], self.table.iloc[test_index]
        table_train, table_test = table_train.to_numpy(), table_test.to_numpy()

        # Initialization
        if self.random_normal:
            np.random.seed(self.seeds)
            U = np.random.normal(0, 0.1, (self.I, self.num_factors))
            np.random.seed(self.seeds)
            M = np.random.normal(0, 0.1, (self.num_factors, self.J))
        else:
            U = np.ones((self.I, self.num_factors))
            M = np.ones((self.num_factors, self.J))

        # Training model
        for epoch in range(num_iter):
            info = f"\rWorker {num_work}: {epoch} epoch.\r"
            print(info, end="")
            U, M = self._train_iteration(U, M, table_train)
            print(" " * len(info), end="")
            
        # Save results
        if self.save_UM:
            np.save(f"./UM/{self.save_info}_U_{num_work}.npy", U) 
            np.save(f"./UM/{self.save_info}_M_{num_work}.npy", M)
        
        # Evaluating model
        train_rmse, train_mae = self._evaluation(U, M, table_train)
        self.train_rmse_lst.append(train_rmse)
        self.train_mae_lst.append(train_mae)
        
        test_rmse, test_mae = self._evaluation(U, M, table_test)
        self.test_rmse_lst.append(test_rmse)
        self.test_mae_lst.append(test_mae)
        
        print(f"\rWorker {num_work}: Done.\nTrain RMSE: {train_rmse}, MAE: {train_mae}.\nTest RMSE: {test_rmse}, MAE: {test_mae}.")
        
    def main(self):
        kf = KFold(n_splits=5, shuffle=True, random_state=self.seeds) # five folds
        arg_lst = []
        num_work = 1
        for train_index, test_index in kf.split(self.table):
            arg_lst.append((train_index, test_index, num_work))
            num_work += 1
        
        # Multiprocessing
        manager = multiprocessing.Manager()
        self.train_rmse_lst = manager.list()
        self.train_mae_lst = manager.list()
        self.test_rmse_lst = manager.list()
        self.test_mae_lst = manager.list()
        workers = []
        for i in range(5):
            p = multiprocessing.Process(target=self.run_model, args=arg_lst[i])
            workers.append(p)
            p.start()

        for p in workers:
            p.join()

        self.train_rmse_lst = np.array(self.train_rmse_lst)
        self.train_mae_lst = np.array(self.train_mae_lst)
        self.test_rmse_lst = np.array(self.test_rmse_lst)
        self.test_mae_lst = np.array(self.test_mae_lst)
        
        print(f"\nTrain RMSE Mean: {np.mean(self.train_rmse_lst)}")
        print(f"Train RMSE std: {np.std(self.train_rmse_lst)}")
        print(f"\nTrain MAE Mean: {np.mean(self.train_mae_lst)}")
        print(f"Train MAE std: {np.std(self.train_mae_lst)}")
        
        print(f"\nTest RMSE Mean: {np.mean(self.test_rmse_lst)}")
        print(f"Test RMSE std: {np.std(self.test_rmse_lst)}")
        print(f"\nTest MAE Mean: {np.mean(self.test_mae_lst)}")
        print(f"Test MAE std: {np.std(self.test_mae_lst)}\n")

## 1.2 UV Matrix Decomposition

In [2]:
class UVDecomposition(MatrixModels):
    """
    Implement 1.2 UV Matrix Decomposition.
    Args:
        path: File path.
        num_factors: The number of features.
        num_iter: Max iterations.
        seeds: Set the random state.
        save_UM: Save the feature matrices.
        save_info: Path to save feature matrices. 
        random_normal: The matrix initialization method.
    """
    def __init__(self, path, num_factors, num_iter, seeds, save_UM, save_info="", random_normal=False):
        super().__init__(path, num_factors, num_iter, seeds, save_UM, save_info, random_normal)
        
    def _update_U(self, index, table_train, U, V):
        r, s = index
        sum_1 = 0
        sum_2 = 0
        M = table_train[table_train[:, 0] == (r + 1), :]
        
        for row in range(M.shape[0]):
            j = M[row, 1] - 1
            prod = np.matmul(U[r, :], V[:, j]) - U[r, s] * V[s, j]
            sum_1 += V[s, j] * (M[row, 2] - prod)
            sum_2 += V[s, j] ** 2
            
        if sum_2 == 0:
            sum_2 = 0.001
        U[r, s] = sum_1 / sum_2
        return U

    def _update_V(self, index, table_train, U, V):
        r, s = index
        sum_1 = 0
        sum_2 = 0
        M = table_train[table_train[:, 1] == (s + 1), :]

        for row in range(M.shape[0]):
            i = M[row, 0] - 1
            prod = np.matmul(U[i, :], V[:, s]) - U[i, r] * V[r, s]
            sum_1 += U[i, r] * (M[row, 2] - prod)
            sum_2 += U[i, r] ** 2
        
        if sum_2 == 0:
            sum_2 = 0.001
        V[r, s] = sum_1 / sum_2
        return V
        
    def _train_iteration(self, U, V, table_train):
        """
        One iteration of training model.
        """
        u_index = list(np.ndindex(U.shape))
        v_index = list(np.ndindex(V.shape))
        random.shuffle(u_index)
        random.shuffle(v_index)
        while (len(u_index) > 0) | (len(v_index) > 0):
            if (len(u_index) > 0):
                u = u_index.pop()
                U = self._update_U(u, table_train, U, V)
            if (len(v_index) > 0):
                v = v_index.pop()
                V = self._update_V(v, table_train, U, V)
        return U, V

In [3]:
%%time
num_factors = 5
num_iter = 30

model = UVDecomposition(path, num_factors, num_iter, seeds, False)
model.main()

Worker 5: Done.      
Train RMSE: 0.8218164942587157, MAE: 0.6441966475729487.
Test RMSE: 0.8905092218953938, MAE: 0.6946348681360691.
Worker 1: Done.      
Train RMSE: 0.825131966598441, MAE: 0.6471236014951063.
Test RMSE: 0.898121455066345, MAE: 0.7009553454137548.
Worker 2: Done.      
Train RMSE: 0.8289865784749201, MAE: 0.6500763148802429.
Test RMSE: 0.9020688067410898, MAE: 0.7035226916009186.
Worker 4: Done.                           
Train RMSE: 0.8246744326173886, MAE: 0.6467785020199717.
Test RMSE: 0.8929408048664373, MAE: 0.6956072108084338.
Worker 3: Done.
Train RMSE: 0.8344056390081162, MAE: 0.6549232355688468.
Test RMSE: 0.9059621882360736, MAE: 0.7078084372729082.

Train RMSE Mean: 0.8270030221915163
Train RMSE std: 0.004348712032261338

Train MAE Mean: 0.6486196603074232
Train MAE std: 0.003661912098334261

Test RMSE Mean: 0.897920495361068
Test RMSE std: 0.005685806683488269

Test MAE Mean: 0.7005057106464169
Test MAE std: 0.004921305804948875

CPU times: user 2.92 s, 

## 1.3 Matrix Factorization
- Update the weights based on the rows or columns of matrices,
$$
\begin{align*}
    U^{(t+1)}[i, :] &= U^{(t)}[i, :] + \eta\left( 2e_{ij}M^{(t)}[:, j] - \lambda U^{(t)}[i, :] \right) \\
    M^{(t+1)}[:, j] &= M^{(t)}[:, j] + \eta\left( 2e_{ij}U^{(t)}[i, :] - \lambda M^{(t)}[:, j] \right) 
\end{align*}
$$
- Set the random seed to 1 both in weights initialization and five-fold division.
- Multiprocessing programming.

In [4]:
class GravityTikk(MatrixModels):
    """
    Implement 1.3 Matrix Factorization.
    Args:
        path: File path.
        num_factors: The number of features.
        num_iter: Max iterations.
        seeds: Set the random state.
        save_UM: Save the feature matrices.
        save_info: Path to save feature matrices.
        regularization: factor lambda of regularization term.
        learn_rate: Learning rate.
    """
    def __init__(self, path, num_factors, num_iter, seeds, save_UM, save_info, regularization, learn_rate):
        super().__init__(path, num_factors, num_iter, seeds, save_UM, save_info)
        self.regularization = regularization
        self.learn_rate = learn_rate
    
    def _train_iteration(self, U, M, table_train):
        """
        One iteration of training model.
        """
        for i in range(table_train.shape[0]):
            # Calculate the error
            error = table_train[i, 2] - np.matmul(U[table_train[i, 0] - 1, :], M[:, table_train[i, 1] - 1])
        
            # Calculate the gradient
            gradient_U = 2 * error * M[:, table_train[i, 1] - 1] - self.regularization * U[table_train[i, 0] - 1, :]
            gradient_M = 2 * error * U[table_train[i, 0] - 1, :] - self.regularization * M[:, table_train[i, 1] - 1]
            
            # Update U, M
            U[table_train[i, 0] - 1, :] += self.learn_rate * gradient_U
            M[:, table_train[i, 1] - 1] += self.learn_rate * gradient_M
        
        return U, M

In [5]:
%%time
# Hyperparameters: Setting 1, Suggested Setting
num_factors = 10
num_iter = 75
regularization = 0.05
learn_rate = 0.005

model = GravityTikk(path, num_factors, num_iter, seeds, True, "Setting_1", regularization, learn_rate)
model.main()

Worker 2: Done.      
Train RMSE: 0.7688675734040371, MAE: 0.6030635926962993.
Test RMSE: 0.869961123237032, MAE: 0.6795618867494324.
Worker 5: Done.                           
Train RMSE: 0.7683601075592711, MAE: 0.6031151686238267.
Test RMSE: 0.869252118445245, MAE: 0.6788092372142813.
Worker 3: Done.
Train RMSE: 0.7692080879100817, MAE: 0.6039271897899572.
Test RMSE: 0.8667277030457634, MAE: 0.6771413764266705.
Worker 4: Done.      
Train RMSE: 0.7695233923797611, MAE: 0.604229214831441.
Test RMSE: 0.8681647402923015, MAE: 0.6778429775492133.
Worker 1: Done.      
Train RMSE: 0.7687806027625633, MAE: 0.6036467031032469.
Test RMSE: 0.8688135792354811, MAE: 0.6793417110229322.

Train RMSE Mean: 0.7689479528031429
Train RMSE std: 0.00039467198908102393

Train MAE Mean: 0.6035963738089543
Train MAE std: 0.0004534036071952779

Test RMSE Mean: 0.8685838528511646
Test RMSE std: 0.0010970117837797114

Test MAE Mean: 0.6785394377925058
Test MAE std: 0.0009166753391357066

CPU times: user 3.2

In [6]:
%%time
# Hyperparameters: Setting 2
num_factors = 20
num_iter = 75
regularization = 0.05
learn_rate = 0.005

model = GravityTikk(path, num_factors, num_iter, seeds, True, "Setting_2", regularization, learn_rate)
model.main()

Worker 5: Done.      
Train RMSE: 0.7000436331962429, MAE: 0.5472587163336917.
Test RMSE: 0.8853501529849257, MAE: 0.6877561033227745.
Worker 3: Done.och.                       
Train RMSE: 0.7003560334230837, MAE: 0.5478898065930472.
Test RMSE: 0.8826690432936825, MAE: 0.6864840763568284.
Worker 2: Done.                           
Train RMSE: 0.7001989296843371, MAE: 0.5468004775412718.
Test RMSE: 0.8858082288824141, MAE: 0.6883424110941695.
Worker 1: Done.
Train RMSE: 0.7003354623549547, MAE: 0.5474956277467825.
Test RMSE: 0.8850883813443313, MAE: 0.6885226869761961.
Worker 4: Done.      
Train RMSE: 0.7005654171342083, MAE: 0.5478999179616113.
Test RMSE: 0.8849430954431121, MAE: 0.6878591775524062.

Train RMSE Mean: 0.7002998951585653
Train RMSE std: 0.00017365636761776292

Train MAE Mean: 0.5474689092352809
Train MAE std: 0.0004134293100975816

Test RMSE Mean: 0.8847717803896933
Test RMSE std: 0.001091771830001555

Test MAE Mean: 0.6877928910604749
Test MAE std: 0.00071460723634371

In [7]:
%%time
# Hyperparameters: Setting 3
num_factors = 10
num_iter = 100
regularization = 0.05
learn_rate = 0.005

model = GravityTikk(path, num_factors, num_iter, seeds, True, "Setting_3", regularization, learn_rate)
model.main()

Worker 3: Done.                           
Train RMSE: 0.7676115130465193, MAE: 0.6023186435014537.
Test RMSE: 0.8680923980044838, MAE: 0.6778206777685686.
Worker 5: Done.
Train RMSE: 0.7667639200427913, MAE: 0.6015428217625625.
Test RMSE: 0.8707837737807261, MAE: 0.6796100573091163.
Worker 2: Done.                           
Train RMSE: 0.7672440591992136, MAE: 0.6014728735722354.
Test RMSE: 0.8711468042809881, MAE: 0.6802037689134355.
Worker 4: Done.
Train RMSE: 0.7678718338092348, MAE: 0.602625982773912.
Test RMSE: 0.8697171843897353, MAE: 0.6787139198311353.
Worker 1: Done.      
Train RMSE: 0.7671447086099045, MAE: 0.602093143030455.
Test RMSE: 0.8701534264618374, MAE: 0.6800962248980237.

Train RMSE Mean: 0.7673272069415328
Train RMSE std: 0.0003833952802915864

Train MAE Mean: 0.6020106929281237
Train MAE std: 0.0004446046010671937

Test RMSE Mean: 0.8699787173835543
Test RMSE std: 0.0010648630307538373

Test MAE Mean: 0.6792889297440559
Test MAE std: 0.0009031256111939246

CPU 

In [8]:
%%time
# Hyperparameters: Setting 4
num_factors = 10
num_iter = 75
regularization = 0.01
learn_rate = 0.005

model = GravityTikk(path, num_factors, num_iter, seeds, True, "Setting_4", regularization, learn_rate)
model.main()

Worker 2: Done.                         
Train RMSE: 0.7627406897827123, MAE: 0.5944101358043267.
Test RMSE: 0.8818924101549159, MAE: 0.6839446153786101.
Worker 5: Done.                                                                     
Train RMSE: 0.7618778535004692, MAE: 0.5942762130463269.
Test RMSE: 0.881042540260366, MAE: 0.6829680176699284.
Worker 3: Done.
Train RMSE: 0.7629294300940438, MAE: 0.595132831987448.
Test RMSE: 0.8792546811915221, MAE: 0.6814628644315769.
Worker 1: Done.
Train RMSE: 0.7625131134655544, MAE: 0.5949206366619278.
Test RMSE: 0.8813513623505448, MAE: 0.6839251525032416.
Worker 4: Done.
Train RMSE: 0.7632412432495219, MAE: 0.5955417399619365.
Test RMSE: 0.8800734003309089, MAE: 0.6821658987625727.

Train RMSE Mean: 0.7626604660184603
Train RMSE std: 0.0004583175420140644

Train MAE Mean: 0.5948563114923932
Train MAE std: 0.00046604730451567367

Test RMSE Mean: 0.8807228788576517
Test RMSE std: 0.0009425927128970909

Test MAE Mean: 0.6828933097491859
Test M

In [9]:
%%time
# Hyperparameters: Setting 5
num_factors = 10
num_iter = 75
regularization = 0.05
learn_rate = 0.001

model = GravityTikk(path, num_factors, num_iter, seeds, True, "Setting_5", regularization, learn_rate)
model.main()

Worker 2: Done.och.                       
Train RMSE: 0.7969787147044373, MAE: 0.6281863982603141.
Test RMSE: 0.8635206324338419, MAE: 0.6780072350774219.
Worker 1: Done.      
Train RMSE: 0.7980031717521593, MAE: 0.6290379954199994.
Test RMSE: 0.8606193243204672, MAE: 0.6763413993038675.
Worker 3: Done.                           
Train RMSE: 0.797677201124004, MAE: 0.6290281186698652.
Test RMSE: 0.8583405702971703, MAE: 0.6746248330543247.
Worker 4: Done.
Train RMSE: 0.7995131609109801, MAE: 0.6306805228799153.
Test RMSE: 0.861120907669563, MAE: 0.6759132640553595.
Worker 5: Done.      
Train RMSE: 0.7976918426982756, MAE: 0.6289112804059047.
Test RMSE: 0.8616604275949277, MAE: 0.6764743933286965.

Train RMSE Mean: 0.7979728182379713
Train RMSE std: 0.0008400185061625757

Train MAE Mean: 0.6291688631271998
Train MAE std: 0.000818979442966997

Test RMSE Mean: 0.8610523724631941
Test RMSE std: 0.0016738062660901203

Test MAE Mean: 0.676272224963934
Test MAE std: 0.0010861556305292468

