## Probabilistic Matrix Factorization

In [9]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import time

In [13]:
class PMF():

    def __init__(self, learning_rate, latent_dim, regu_u, regu_i, num_user, num_item):

        self.learning_rate = learning_rate
        self.latent_dim = latent_dim

        self.regu_u = regu_u
        self.regu_i = regu_i

        self.U = 0.1 * np.random.randn(num_user, latent_dim)
        self.V = 0.1 * np.random.randn(num_item, latent_dim)

    def predict(self, user, item):
        return np.dot(self.U[user], self.V[item].T)

    def train(self, train, test, maxiter):

        rmse_errors = list()
        mae_errors = list()
        losses = list()
        i = 0
        pre_mae = 100000

        for i in range(maxiter):

            loss = 0.0
            for user in range(train.shape[1]):
                for item in range(train.shape[0]):
                    if train.iloc[item, user] > 0:

                        predicted_rating = self.predict(user, item)
                        error = train.iloc[item, user] - predicted_rating
                        loss += error ** 2

                        self.U[user] += self.learning_rate * (error * self.V[item] - self.regu_u * self.U[user])
                        self.V[item] += self.learning_rate * (error * self.U[user] - self.regu_i * self.V[item])

                        loss += self.regu_u * np.square(self.U[user]).mean() + self.regu_i * np.square(self.V[item]).mean()

            loss = 0.5 * loss / len(train)
            losses.append(loss)
            rmse, mae = self.eval_rmse(test)
            rmse_errors.append(rmse)
            mae_errors.append(mae)
            print(f'iter: {i} loss: {loss} rmse: {rmse} mae: {mae}')

            if pre_mae - mae < 0.01:
                break
            pre_mae = mae

        return i, losses, rmse_errors, mae_errors
    
    def eval_rmse(self, test):

        tmp_rmse = 0
        tmp_mae = 0
        k = 0

        for user in range(test.shape[1]):
            for item in range(test.shape[0]):
                if test.iloc[item, user] > 0:
                    k += 1
                    predict_rating = self.predict(user, item)
                    tmp_rmse += np.square(test.iloc[item, user] - predict_rating)
                    tmp_mae += np.abs(test.iloc[item, user] - predict_rating)

        return np.sqrt(tmp_rmse / k), tmp_mae / k

In [14]:
def plot(epochs, losses, rmse_errors, mae_errors):
    plt.figure()
    plt.plot(np.arange(epochs), losses, rmse_errors, mae_errors)
    plt.legend(['loss', 'rmse', 'mae'])
    plt.xlabel('epoch')
    plt.ylabel('loss')
    plt.title('Losses')
    plt.show()

In [None]:
train = pd.read_csv('filmtrust/train.csv', header=0, index_col=[0])
test = pd.read_csv('filmtrust/test.csv', header=0, index_col=[0])
num_user = train.shape[1]
num_item = train.shape[0] + test.shape[0]
print(num_user, num_item)

pmf = PMF(learning_rate=0.01, latent_dim=5, regu_u=0.01, regu_i=0.01, num_user=num_user, num_item=num_item)
start = time.time()
i, losses, rmse_error, mae_error = pmf.train(train, test, 20)

print(f'Time elapsed: {time.time() - start}')
plot(i+1, losses, rmse_error, mae_error)