In [2]:
import numpy as np
import pandas as pd
import load_data as ld
from utils import RMSE 
import copy
from timeit import default_timer as timer
from sklearn.model_selection import train_test_split

In [3]:
class SVD():
    
    def __init__(self, R, val_R, latent_size=50, ld=1e-3, learning_rate=0.001, epochs=200):
        
        self._R = R             # explicit matrix
        self._val_R = val_R
        self._I = copy.deepcopy(self._R)    # implicit matrix
        self._I[self._I != 0] = 1
        self._val_I = copy.deepcopy(self._val_R)    # implicit matrix
        self._val_I[self._val_I != 0] = 1
        self._N, self._M = R.shape
        self._latent = latent_size
        self._lr = learning_rate
        self._epochs =epochs
        self._lambda = ld        
        self._P = np.random.normal(0, 0.1, size=(self._N, latent_size))
        self._Q = np.random.normal(0, 0.1, size=(self._M, latent_size))
        self.b_u = np.zeros(self._N)
        self.b_i = np.zeros(self._M)
        self.mu = np.mean(self._R[np.where(self._R != 0)])

                    
    def update_SVD(self):
        loss = self._I* (self._R - self.get_pred())
        # derivate of U
        grads_p = np.dot(loss, -self._Q) + self._lambda*self._P
        # derivate of V
        grads_q = np.dot(loss.T, -self._P) + self._lambda*self._Q   
        
        self.b_u = self.b_u - self._lr * (-np.mean(loss, axis=1) + self._lambda * self.b_u) 
        self.b_i = self.b_i - self._lr * (-np.mean(loss, axis=0) + self._lambda * self.b_i)        
        self._P = self._P - self._lr * grads_p
        self._Q = self._Q - self._lr * grads_q
    
    def get_pred(self):
        return self.mu + self.b_u[:, np.newaxis] + self.b_i[np.newaxis,:] + np.dot(self._P, self._Q.T)
            

    def train(self):
        
        train_rmse_list = []
        vali_rmse_list = []
        last_vali_rmse = None
        
        start_time = timer()
        for epoch in range(self._epochs):
            start = timer()
            
            self.update_SVD()
            pred = self.get_pred()
            train_rmse = RMSE(self._R, pred)
            val_rmse =  RMSE(self._val_R, pred)
                
            train_rmse_list.append(train_rmse)
            vali_rmse_list.append(val_rmse)
            
            print('traning iteration:{: d} ,train_RMSE:{: f}, val_RMSE:{: f}'.format(epoch, train_rmse, val_rmse))
            
            if last_vali_rmse and last_vali_rmse - val_rmse <= -0.0001:
                print('convergence at iterations:{: d}'.format(epoch))
                break
            else:
                last_vali_rmse = val_rmse
            
                
        print("Total time for training : %.4f" % (timer()-start_time))
        return self._P, self._Q, train_rmse_list, vali_rmse_list    

In [4]:
df = ld.load_rating_data()
N, M = len(df.user_id.unique()), len(df.item_id.unique())
ratio = 0.8
print("N, M : ", N, M)
train, test = train_test_split(df, test_size=1-ratio)
val, test = train_test_split(test, test_size=0.5)
train.reset_index(drop=True, inplace=True)
val.reset_index(drop=True, inplace=True)
test.reset_index(drop=True, inplace=True)

N, M :  943 1682


In [5]:
R = np.zeros([N, M])    
for i in range(len(train)):
    R[int(train.iloc[i, 0])-1, int(train.iloc[i, 1])-1] = float(train.iloc[i, 2])

val_R = np.zeros([N, M])    
for i in range(len(val)):
    val_R[int(val.iloc[i, 0])-1, int(val.iloc[i, 1])-1] = float(val.iloc[i, 2])

In [15]:
svd = SVD(R, val_R, latent_size=50, ld=0.01, learning_rate=0.002, epochs=100)
P1, Q1, train_rmse_list, vali_rmse_list = svd.train()

traning iteration: 0 ,train_RMSE: 1.125899, val_RMSE: 1.129653
traning iteration: 1 ,train_RMSE: 1.123542, val_RMSE: 1.129468
traning iteration: 2 ,train_RMSE: 1.121189, val_RMSE: 1.129277
traning iteration: 3 ,train_RMSE: 1.118810, val_RMSE: 1.129066
traning iteration: 4 ,train_RMSE: 1.116375, val_RMSE: 1.128821
traning iteration: 5 ,train_RMSE: 1.113853, val_RMSE: 1.128525
traning iteration: 6 ,train_RMSE: 1.111209, val_RMSE: 1.128161
traning iteration: 7 ,train_RMSE: 1.108404, val_RMSE: 1.127706
traning iteration: 8 ,train_RMSE: 1.105393, val_RMSE: 1.127133
traning iteration: 9 ,train_RMSE: 1.102125, val_RMSE: 1.126409
traning iteration: 10 ,train_RMSE: 1.098540, val_RMSE: 1.125495
traning iteration: 11 ,train_RMSE: 1.094570, val_RMSE: 1.124345
traning iteration: 12 ,train_RMSE: 1.090137, val_RMSE: 1.122904
traning iteration: 13 ,train_RMSE: 1.085155, val_RMSE: 1.121112
traning iteration: 14 ,train_RMSE: 1.079530, val_RMSE: 1.118901
traning iteration: 15 ,train_RMSE: 1.073165, val_R