In [1]:
import numpy as np
import pandas as pd
import os
np.random.seed(0)

In [2]:
DATA_PATH = "../data/ml-100k/"

names = ["user_id", "item_id", "rating", "timestamp"]
df = pd.read_csv(os.path.join(DATA_PATH, "u.data"), sep='\t', names=names)

n_users = df.user_id.unique().shape[0]
n_items = df.item_id.unique().shape[0]

ratings = np.zeros((n_users, n_items))
for _, row in df.iterrows():
    ratings[row.user_id - 1, row.item_id - 1] = row.rating

In [3]:
def train_test_split(ratings: np.ndarray):
    test = np.zeros(ratings.shape)
    train = ratings.copy()
    for user in range(ratings.shape[0]):
        test_ratings = np.random.choice(ratings[user, :].nonzero()[0], size=10, replace=False)
        train[user, test_ratings] = 0
        test[user, test_ratings] = ratings[user, test_ratings]
        
    assert np.all((train * test) == 0)
    return train, test

train, test = train_test_split(ratings)

In [97]:
def mse(predictions, ground_truth):
    predictions = predictions[ground_truth.nonzero()].flatten()
    ground_truth = ground_truth[ground_truth.nonzero()].flatten()
    return np.mean((predictions - ground_truth) ** 2)

## ALS

The loss function for ALS is
$$
    L=\sum_{(i,j)\in R} (r_{ij}-x_i^Ty_j)^2 + \lambda_x \sum_{i} \lVert x_i \rVert^2 + \lambda_y \sum_{j} \lVert y_j \lVert^2.
$$

Let's give random values to $X$ and $Y$ matrices and fix $X$ matrix first. Then, we can optimize all vectors $y_j$ independently from each other:  

$$
    \arg \min_{y_j} \sum_{(i,j)\in R} (r_{ij}-x_i^Ty_j)^2 + \lambda_x \sum_{i} \lVert x_i \rVert^2 + \lambda_y \lVert y_j \lVert^2 = \arg\min_{y_{j}}\sum _{( i,j) \in R} r_{ij}^{2} -2\sum _{( i,j) \in R} r_{ij} x_{i}^{T} y_{j} +\sum _{( i,j) \in R}\left( x_{i}^{T} y_{j}\right)^{2} +\lambda _{y}\Vert y_{j}\Vert ^{2} =\\
    \arg\min_{y_{j}} -2\left(\sum _{( i,j) \in R} r_{ij} x_{i}^{T}\right) y_{j} +\sum _{( i,j) \in R} y_{j}^{T} x_{i} x_{i}^{T} y_{j} +\lambda _{y} y_{j}^{T} y_{j} = \arg\min_{y_{j}} -2\left(\sum _{( i,j) \in R} r_{ij} x_{i}^{T}\right) y_{j} +y_{j}^{T}\left(\sum _{( i,j) \in R} x_{i} x_{i}^{T} +\lambda _{y}\right) y_{j}\\ 
    \Rightarrow y_{j} =\left(\sum _{( i,j) \in R} x_{i} x_{i}^{T} +\lambda _{y}\right)^{-1}\left(\sum _{( i,j) \in R} r_{ij} x_{i}\right)
$$  
So, the final step solution for $Y$ is:
$$
    Y = R^TX(A + \lambda_y)^{-1},
$$
where $A = \{a_{ij}\}_{i,j=1}^k,\ a_{ij}=\sum_{k=1}^{N}x_{ki}x_{kj}$

When we fix $Y$ matrix, formula for $X$ will be:  
$$
     \arg \min_{x_i} \sum_{(i,j)\in R} (r_{ij}-x_i^Ty_j)^2 + \lambda_x \lVert x_i \rVert^2 + \lambda_y \sum_{j} \lVert y_j \lVert^2 = \arg\min_{x_{i}}\sum _{( i,j) \in R} r_{ij}^{2} -2\sum _{( i,j) \in R} r_{ij} x_{i}^{T} y_{j} +\sum _{( i,j) \in R}\left( x_{i}^{T} y_{j}\right)^{2} +\lambda _{x}\Vert x_{i}\Vert ^{2} =\\
    \arg\min_{x_{i}} -2 x_{i}^{T}\left(\sum _{( i,j) \in R} r_{ij} y_{j}\right) +\sum _{( i,j) \in R} x_{i}^{T} y_{j} y_{j}^{T} x_{i} +\lambda _{x} x_{i}^{T} x_{i} = \arg\min_{y_{j}} -2x_{i}^{T}\left(\sum _{( i,j) \in R} r_{ij}y_{j}\right) +x_{i}^{T}\left(\sum _{( i,j) \in R} y_{j} y_{j}^{T} +\lambda _{x}\right) x_{i}\\ 
    \Rightarrow x_{i} =\left(\sum _{( i,j) \in R} y_{j} y_{j}^{T} +\lambda _{x}\right)^{-1}\left(\sum _{( i,j) \in R} r_{ij} y_{j}\right)
$$
So, the final step solution for $Y$ is:
$$
    X = RY(A + \lambda_x)^{-1},
$$
where $A = \{a_{ij}\}_{i,j=1}^k,\ a_{ij}=\sum_{k=1}^{M}y_{ki}y_{kj}$

In [138]:
class ALS:
    def __init__(self,
                 hidden_size: int = 40,
                 lambda_x: float = 10,
                 lambda_y: float = 10):
        
        self.hidden_size = hidden_size
        
        self.lambda_x = lambda_x
        self.lambda_y = lambda_y
        
    def _compute_loss(self, ratings: np.ndarray):
        nonzero_items = ratings.nonzero()
        
        main_term = np.sum((ratings[nonzero_items] - np.dot(self.X, self.Y.T)[nonzero_items]) ** 2)
        user_regularization = self.lambda_x * np.sum(np.linalg.norm(self.X, axis=-1))
        item_regularization = self.lambda_y * np.sum(np.linalg.norm(self.Y, axis=-1))
        
        return main_term + user_regularization + item_regularization
        
    def _step(self, ratings: np.ndarray, stage: str):
        if stage == "item":
            inverse_matrix = np.linalg.inv(np.einsum("ij,ik->jk", self.X, self.X) + self.lambda_y)
            RTX = ratings.T.dot(self.X)
            
            self.Y = RTX.dot(inverse_matrix)
        elif stage == "user":
            inverse_matrix = np.linalg.inv(np.einsum("ij,ik->jk", self.Y, self.Y) + self.lambda_x)
            RY = ratings.dot(self.Y)
            
            self.X = RY.dot(inverse_matrix)
        else:
            assert False, "Invalid stage name!"
        
            
    def _train_step(self, train_ratings: np.ndarray):
        self._step(train_ratings, "item")
        self._step(train_ratings, "user")

        loss = self._compute_loss(train_ratings)
        
        return loss
        
    
    def _validation_step(self, val_ratings: np.ndarray):
        loss = self._compute_loss(val_ratings)
        
        return loss
    
        
    def fit(self, 
            train_ratings: np.ndarray, 
            val_ratings: np.ndarray, 
            n_epochs: int = 20,
            metric_function = None,
            verbose=True):
        
        self.X = np.random.randn(ratings.shape[0], self.hidden_size)
        self.Y = np.random.randn(ratings.shape[1], self.hidden_size)
        
        
        for epoch in range(n_epochs):
            train_loss = self._train_step(train_ratings)
            val_loss = self._validation_step(val_ratings)
            
            print(f"+--------\t----------\t-------+")
            print(f"|--------\t EPOCH: {epoch + 1}\t-------|")
            print(f"+--------\t----------\t-------+")
            if metric_function is not None:
                all_predictions = self.predict_all()
                
                train_metric = metric_function(all_predictions, train_ratings)
                val_metric = metric_function(all_predictions, val_ratings)
                
                print(f"Train metric: {train_metric}\t\tValidation metric: {val_metric}")
            
            print(f"Train loss: {train_loss}\t\tValidation loss: {val_loss}")
            
    def predict(self, user: int, item: int):
        return self.X[user].dot(self.Y[item])
    
    def predict_all(self):
        return self.X.dot(self.Y.T)

In [140]:
als = ALS()
als.fit(train, test, n_epochs=40, metric_function=mse)

+--------	----------	-------+
|--------	 EPOCH: 1	-------|
+--------	----------	-------+
Train metric: 5.52133941727585		Validation metric: 9.725381374143891
Train loss: 618865.972851474		Validation loss: 210508.60818697713
+--------	----------	-------+
|--------	 EPOCH: 2	-------|
+--------	----------	-------+
Train metric: 4.241355928593501		Validation metric: 8.640423970366813
Train loss: 508433.4294351986		Validation loss: 205773.02102304422
+--------	----------	-------+
|--------	 EPOCH: 3	-------|
+--------	----------	-------+
Train metric: 4.081615556758799		Validation metric: 8.538820734781014
Train loss: 495423.53256258206		Validation loss: 206272.6911159226
+--------	----------	-------+
|--------	 EPOCH: 4	-------|
+--------	----------	-------+
Train metric: 4.026706951309441		Validation metric: 8.5033806005291
Train loss: 491044.99270804966		Validation loss: 206533.02319094303
+--------	----------	-------+
|--------	 EPOCH: 5	-------|
+--------	----------	-------+
Train metr

Train metric: 3.9601284133162276		Validation metric: 8.44498233320636
Train loss: 485623.9386083224		Validation loss: 206591.29161640766
+--------	----------	-------+
|--------	 EPOCH: 38	-------|
+--------	----------	-------+
Train metric: 3.9600511846680932		Validation metric: 8.444724691582083
Train loss: 485617.88708706107		Validation loss: 206589.80513329094
+--------	----------	-------+
|--------	 EPOCH: 39	-------|
+--------	----------	-------+
Train metric: 3.9599807495878174		Validation metric: 8.444483071898441
Train loss: 485612.46196343534		Validation loss: 206588.48084126896
+--------	----------	-------+
|--------	 EPOCH: 40	-------|
+--------	----------	-------+
Train metric: 3.959916605960102		Validation metric: 8.444257319308113
Train loss: 485607.6117444389		Validation loss: 206587.31126370796


## Funk SVD

Funk SVD has the same loss function as ALS but it is optimized by gradient descent

In [162]:
class FunkSVD:
    def __init__(self,
                 hidden_size: int = 40,
                 lambda_x: float = 10,
                 lambda_y: float = 10,
                 learning_rate: float = 3e-4):
        
        self.hidden_size = hidden_size
        
        self.lambda_x = lambda_x
        self.lambda_y = lambda_y
        self.learning_rate = learning_rate
        
    def _compute_loss(self, ratings: np.ndarray):
        nonzero_items = ratings.nonzero()
        
        main_term = np.sum((ratings[nonzero_items] - np.dot(self.X, self.Y.T)[nonzero_items]) ** 2)
        user_regularization = self.lambda_x * np.sum(np.linalg.norm(self.X, axis=-1))
        item_regularization = self.lambda_y * np.sum(np.linalg.norm(self.Y, axis=-1))
        
        return main_term + user_regularization + item_regularization
        
    def _step(self, ratings: np.ndarray):
        X_lambda = np.linalg.inv(np.einsum("ij,ik->jk", self.Y, self.Y) + self.lambda_x)
        X_lambda = self.X.dot(X_lambda.T)
        RY = ratings.dot(self.Y)
        
        Y_lambda = np.linalg.inv(np.einsum("ij,ik->jk", self.X, self.X) + self.lambda_y)
        Y_lambda = self.Y.dot(Y_lambda.T)
        RTX = ratings.T.dot(self.X)

        X_gradient = -RY + X_lambda
        Y_gradient = -RTX + Y_lambda
        
        self.X -= self.learning_rate * X_gradient
        self.Y -= self.learning_rate * Y_gradient
        
            
    def _train_step(self, train_ratings: np.ndarray):
        self._step(train_ratings)
        loss = self._compute_loss(train_ratings)
        
        return loss
        
    
    def _validation_step(self, val_ratings: np.ndarray):
        loss = self._compute_loss(val_ratings)
        
        return loss
    
        
    def fit(self, 
            train_ratings: np.ndarray, 
            val_ratings: np.ndarray, 
            max_epochs: int = 20,
            metric_function = None,
            verbose=True,
            eps=1e-6):
        
        self.X = np.random.randn(ratings.shape[0], self.hidden_size)
        self.Y = np.random.randn(ratings.shape[1], self.hidden_size)
        
        previous_predictions = None
        current_predictions = None
        for epoch in range(max_epochs):
            train_loss = self._train_step(train_ratings)
            val_loss = self._validation_step(val_ratings)
            
            print(f"+--------\t----------\t-------+")
            print(f"|--------\t EPOCH: {epoch + 1}\t-------|")
            print(f"+--------\t----------\t-------+")
            previous_predictions = current_predictions
            current_predictions = self.predict_all()
            if metric_function is not None:
                
                
                train_metric = metric_function(current_predictions, train_ratings)
                val_metric = metric_function(current_predictions, val_ratings)
                
                print(f"Train metric: {train_metric}\t\tValidation metric: {val_metric}")
            
            print(f"Train loss: {train_loss}\t\tValidation loss: {val_loss}")
            
            if previous_predictions is not None and np.abs(current_predictions - previous_predictions < eps).all():
                print("Criterion is reached. Leaving the training")
            
    def predict(self, user: int, item: int):
        return self.X[user].dot(self.Y[item])
    
    def predict_all(self):
        return self.X.dot(self.Y.T)

In [164]:
funk_svd = FunkSVD(learning_rate=1e-6)
funk_svd.fit(train, test, max_epochs=1000, metric_function=mse)

+--------	----------	-------+
|--------	 EPOCH: 1	-------|
+--------	----------	-------+
Train metric: 54.40117072444807		Validation metric: 55.55683717495832
Train loss: 5092145.307297968		Validation loss: 688932.2493445638
+--------	----------	-------+
|--------	 EPOCH: 2	-------|
+--------	----------	-------+
Train metric: 54.39902920622601		Validation metric: 55.55678228107275
Train loss: 5091951.359906542		Validation loss: 688931.7416091682
+--------	----------	-------+
|--------	 EPOCH: 3	-------|
+--------	----------	-------+
Train metric: 54.396888661634684		Validation metric: 55.55672800742423
Train loss: 5091757.5010127425		Validation loss: 688931.2400384998
+--------	----------	-------+
|--------	 EPOCH: 4	-------|
+--------	----------	-------+
Train metric: 54.394749089614415		Validation metric: 55.556674353395735
Train loss: 5091563.730520598		Validation loss: 688930.7446267421
+--------	----------	-------+
|--------	 EPOCH: 5	-------|
+--------	----------	-------+
Train m

+--------	----------	-------+
|--------	 EPOCH: 38	-------|
+--------	----------	-------+
Train metric: 54.322574811599125		Validation metric: 55.5552144074634
Train loss: 5085027.452694837		Validation loss: 688917.523870684
+--------	----------	-------+
|--------	 EPOCH: 39	-------|
+--------	----------	-------+
Train metric: 54.320468618643645		Validation metric: 55.55518205407053
Train loss: 5084836.7164031435		Validation loss: 688917.2403824736
+--------	----------	-------+
|--------	 EPOCH: 40	-------|
+--------	----------	-------+
Train metric: 54.31836336067921		Validation metric: 55.55515029831146
Train loss: 5084646.065109646		Validation loss: 688916.9628460067
+--------	----------	-------+
|--------	 EPOCH: 41	-------|
+--------	----------	-------+
Train metric: 54.3162590366773		Validation metric: 55.55511913958158
Train loss: 5084455.498721198		Validation loss: 688916.6912555895
+--------	----------	-------+
|--------	 EPOCH: 42	-------|
+--------	----------	-------+
Train 

Train metric: 54.24733361378024		Validation metric: 55.554421896318566
Train loss: 5078213.834339618		Validation loss: 688911.027421826
+--------	----------	-------+
|--------	 EPOCH: 75	-------|
+--------	----------	-------+
Train metric: 54.2452604383073		Validation metric: 55.55441067908336
Train loss: 5078026.099825055		Validation loss: 688910.9546313193
+--------	----------	-------+
|--------	 EPOCH: 76	-------|
+--------	----------	-------+
Train metric: 54.243188161304566		Validation metric: 55.554400037912444
Train loss: 5077838.447001354		Validation loss: 688910.887589514
+--------	----------	-------+
|--------	 EPOCH: 77	-------|
+--------	----------	-------+
Train metric: 54.24111678177208		Validation metric: 55.55438997221225
Train loss: 5077650.875777962		Validation loss: 688910.826290826
+--------	----------	-------+
|--------	 EPOCH: 78	-------|
+--------	----------	-------+
Train metric: 54.23904629871063		Validation metric: 55.5543804813895
Train loss: 5077463.38606439

Train metric: 54.171216782914996		Validation metric: 55.55438592435889
Train loss: 5071321.36436668		Validation loss: 688912.1196047731
+--------	----------	-------+
|--------	 EPOCH: 112	-------|
+--------	----------	-------+
Train metric: 54.16917619074887		Validation metric: 55.554395628403846
Train loss: 5071136.592639254		Validation loss: 688912.2558189775
+--------	----------	-------+
|--------	 EPOCH: 113	-------|
+--------	----------	-------+
Train metric: 54.16713646055592		Validation metric: 55.55440588673952
Train loss: 5070951.899297727		Validation loss: 688912.3975771316
+--------	----------	-------+
|--------	 EPOCH: 114	-------|
+--------	----------	-------+
Train metric: 54.165097591363775		Validation metric: 55.55441669878263
Train loss: 5070767.284254051		Validation loss: 688912.5448737533
+--------	----------	-------+
|--------	 EPOCH: 115	-------|
+--------	----------	-------+
Train metric: 54.163059582200745		Validation metric: 55.55442806395016
Train loss: 5070582

+--------	----------	-------+
|--------	 EPOCH: 148	-------|
+--------	----------	-------+
Train metric: 54.09628143850817		Validation metric: 55.55510961662932
Train loss: 5064536.335577792		Validation loss: 688920.8093769213
+--------	----------	-------+
|--------	 EPOCH: 149	-------|
+--------	----------	-------+
Train metric: 54.094272097259484		Validation metric: 55.555139442964105
Train loss: 5064354.4059879305		Validation loss: 688921.1470862903
+--------	----------	-------+
|--------	 EPOCH: 150	-------|
+--------	----------	-------+
Train metric: 54.09226358247641		Validation metric: 55.55516980217791
Train loss: 5064172.5515688285		Validation loss: 688921.4901384774
+--------	----------	-------+
|--------	 EPOCH: 151	-------|
+--------	----------	-------+
Train metric: 54.090255893212486		Validation metric: 55.55520069369671
Train loss: 5063990.772234789		Validation loss: 688921.8385280938
+--------	----------	-------+
|--------	 EPOCH: 152	-------|
+--------	----------	-----

KeyboardInterrupt: 