In [602]:
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
from sklearn.utils import shuffle

In [603]:
class Loss(nn.Module):
    def __init__(self, lambda_u, lambda_L):
        super(Loss, self).__init__()
        self.lambda_u = lambda_u
        self.lambda_L = lambda_L

    def forward(self, rating_mat, user_features, local_item_features, avg_item_features):
        '''
        rating_mat: (num_users, num_items)
        user_features: (num_users_per_client, num_latent_factors)
        local_item_features: (num_items, num_latent_factors)
        avg_item_features: (num_items, num_latent_factors)
        '''
        non_zero_mask = (rating_mat != -1).type(torch.FloatTensor)
        ratings_predicted = torch.sigmoid(torch.mm(user_features, local_item_features.t()))
        
        diff = (ratings_predicted - rating_mat) ** 2
        prediction_error = torch.sum(diff * non_zero_mask)

        user_regularization = torch.sum(user_features ** 2)  ## regularization term for user features
        item_loss = torch.sum((local_item_features - avg_item_features) ** 2) ## loss term for item features

        loss = prediction_error + self.lambda_u * user_regularization + self.lambda_L * item_loss

        return loss, prediction_error

In [604]:
rating_df = pd.read_csv('ml-100k.inter', sep='\t')
rating_df.columns = ['user_id', 'item_id', 'rating', 'timestamp']
rating_df = shuffle(rating_df)
rating_df.head()

Unnamed: 0,user_id,item_id,rating,timestamp
76045,696,520,5,886404617
10114,18,215,3,880130930
6746,160,175,4,876860808
97313,846,197,4,883948417
64595,617,174,1,883788820


In [605]:
# Split the data into training and testing sets
ratio = 0.8
train_size = int(len(rating_df) * ratio)

aggregate_rating_matrix = rating_df.pivot_table(index='user_id', columns='item_id', values='rating', aggfunc='mean')  # transform the dataframe into a matrix
num_users, num_items = aggregate_rating_matrix.shape
rating_matrix = aggregate_rating_matrix.copy()
test_rating_matrix = aggregate_rating_matrix.copy()
for i in range(len(rating_df)):
    user_id = rating_df.iloc[i,0]
    item_id = rating_df.iloc[i,1]
    if i < train_size:
        test_rating_matrix.loc[user_id,item_id] = None
    else:
        rating_matrix.loc[user_id,item_id] = None

In [606]:
# normalize the ratings using min-max normalization
min_rating, max_rating = rating_df['rating'].min(), rating_df['rating'].max()
rating_matrix = rating_matrix.apply(lambda x: (x - min_rating) / (max_rating - min_rating))
rating_matrix[rating_matrix.isnull()] = -1
rating_matrix = torch.FloatTensor(rating_matrix.values)

In [607]:
num_users, num_items

(943, 1682)

In [608]:
lr = 0.05
num_epochs = 100
latent_factors = 20
num_clients = 50
m = num_users // num_clients

In [609]:
# initializaiton

user_features = []
item_features = []
std = 0.01

for i in range(num_clients): # initialize user features and local item features
    user_features.append(torch.randn(m, latent_factors, requires_grad=True))  # multipyling std here will make the Tensor non-leaf, which will cause error
    item_features.append(torch.randn(num_items, latent_factors, requires_grad=True))
with torch.no_grad():
    for i in range(num_clients):
        user_features[i].data.mul_(std) # mul_ does not change requires_grad to False
        item_features[i].data.mul_(std)

avg_item_features = torch.randn(num_items, latent_factors).data.mul(std) # mul will change requires_grad to False
for i in range(num_clients):
    avg_item_features += item_features[i]
avg_item_features /= num_clients

RFRec_loss = Loss(lambda_u=0.1, lambda_L=10)

client_optimizers = []
for i in range(num_clients):
    optimizer = optim.Adam([user_features[i], item_features[i]], lr=lr)
    client_optimizers.append(optimizer)

In [610]:
def train(epoch):
    avg_loss = avg_prediction_error = 0
    tmp = torch.zeros(item_features[0].shape)
    global avg_item_features
    
    for i in range(num_clients):
        client_optimizers[i].zero_grad()
        loss, prediction_error = RFRec_loss(rating_matrix[i*m: (i+1)*m], user_features[i], item_features[i], avg_item_features)

        avg_loss += loss.item() / num_clients
        avg_prediction_error += prediction_error / num_clients

        loss.backward(retain_graph=True)
        client_optimizers[i].step()

        tmp += item_features[i]
    
    avg_item_features = tmp / num_clients  # update the global item features

    if epoch % 10 == 0:
        print('Epoch: {}, Loss: {:.4f}, Prediction Error: {:.4f}'.format(epoch, avg_loss, avg_prediction_error))

In [611]:
for epoch in range(num_epochs):
    train(epoch)

Epoch: 0, Loss: 182.3888, Prediction Error: 149.3723
Epoch: 10, Loss: 265.8792, Prediction Error: 148.8695
Epoch: 20, Loss: 174.7490, Prediction Error: 129.6026
Epoch: 30, Loss: 111.4726, Prediction Error: 86.8935
Epoch: 40, Loss: 94.1525, Prediction Error: 78.4755
Epoch: 50, Loss: 83.0906, Prediction Error: 68.8726
Epoch: 60, Loss: 75.0250, Prediction Error: 60.5348
Epoch: 70, Loss: 68.5111, Prediction Error: 53.9169
Epoch: 80, Loss: 63.2727, Prediction Error: 48.6967
Epoch: 90, Loss: 58.9758, Prediction Error: 44.7972


In [612]:
test_rating_matrix[test_rating_matrix.isnull()] = -1
test_rating_matrix = torch.FloatTensor(test_rating_matrix.values)
print(test_rating_matrix.shape)

nonzero_mask = (test_rating_matrix != -1).type(torch.FloatTensor)

torch.Size([943, 1682])


In [613]:
def evaluate(matrix, user_features, item_features, client_id):
    predicted_ratings = torch.sigmoid(torch.mm(user_features, item_features.t()))
    pred = (predicted_ratings * (max_rating - min_rating) + min_rating) * nonzero_mask[client_id*m: (client_id+1)*m]
    true_value = matrix * nonzero_mask[client_id*m: (client_id+1)*m]
    
    abs_error = torch.sum(torch.abs(pred - true_value))
    square_error = torch.sum((pred - true_value)**2)
    n_nonzero = torch.sum(nonzero_mask[client_id*m: (client_id+1)*m])
    return abs_error, square_error, n_nonzero

In [614]:
MAE = MSE = num_nonzero = 0

for i in range(num_clients):
    abs_error, square_error, n_nonzero = evaluate(test_rating_matrix[i*m: (i+1)*m], user_features[i], item_features[i], i)
    MAE += abs_error
    MSE += square_error
    num_nonzero += n_nonzero

MAE /= num_nonzero
RMSE = torch.sqrt(MSE / num_nonzero)
print("MAE: ", MAE.data.numpy())
print("RMSE: ", RMSE.data.numpy())

MAE:  0.74927455
RMSE:  0.95442826
