In [205]:
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import pickle
from sklearn.utils import shuffle

In [635]:
class Loss(nn.Module):
    def __init__(self, lambda_u, lambda_L, prob=0.5):
        super(Loss, self).__init__()
        self.lambda_u = lambda_u
        self.lambda_L = lambda_L
        self.prob = prob

    def compute_f_loss(self, rating_mat, user_features, local_item_features):
        non_zero_mask = (rating_mat != -1).type(torch.FloatTensor)
        ratings_predicted = torch.sigmoid(torch.mm(user_features, local_item_features.t()))
        
        diff = (ratings_predicted - rating_mat) ** 2
        prediction_error = torch.sum(diff * non_zero_mask)

        user_regularization = torch.sum(user_features ** 2)  ## regularization term for user features

        return (prediction_error + self.lambda_u * user_regularization) / (1 - self.prob)
    
    def compute_psi_loss(self, local_item_features, avg_item_features):
        item_loss = torch.sum((local_item_features - avg_item_features) ** 2) ## loss term for item features

        return self.lambda_L * item_loss / self.prob

    def forward(self, rating_mat, user_features, local_item_features, avg_item_features):
        '''
        rating_mat: (num_users, num_items)
        user_features: (num_users_per_client, num_latent_factors)
        local_item_features: (num_items, num_latent_factors)
        avg_item_features: (num_items, num_latent_factors)
        '''
        non_zero_mask = (rating_mat != -1).type(torch.FloatTensor)
        ratings_predicted = torch.sigmoid(torch.mm(user_features, local_item_features.t()))
        
        diff = (ratings_predicted - rating_mat) ** 2
        prediction_error = torch.sum(diff * non_zero_mask)

        user_regularization = torch.sum(user_features ** 2)  ## regularization term for user features
        item_loss = torch.sum((local_item_features - avg_item_features) ** 2) ## loss term for item features

        loss = prediction_error + self.lambda_u * user_regularization + self.lambda_L * item_loss

        return loss, prediction_error

In [192]:
rating_df = pd.read_csv('ml-1m.inter', sep='\t')
rating_df.columns = ['user_id', 'item_id', 'rating', 'timestamp']
rating_df = shuffle(rating_df)
rating_df.head()

Unnamed: 0,user_id,item_id,rating,timestamp
916931,5539,107,3,960662169
46608,312,3617,3,976477183
504652,3104,1617,5,969556952
443640,2736,1244,2,973396870
410451,2462,1196,4,974168782


In [193]:
# Split the data into training and testing sets
ratio = 0.8
train_size = int(len(rating_df) * ratio)

aggregate_rating_matrix = rating_df.pivot_table(index='user_id', columns='item_id', values='rating', aggfunc='mean')  # transform the dataframe into a matrix
num_users, num_items = aggregate_rating_matrix.shape
rating_matrix = aggregate_rating_matrix.copy()
test_rating_matrix = aggregate_rating_matrix.copy()
for i in range(len(rating_df)):
    user_id = rating_df.iloc[i,0]
    item_id = rating_df.iloc[i,1]
    if i < train_size:
        test_rating_matrix.loc[user_id,item_id] = None
    else:
        rating_matrix.loc[user_id,item_id] = None

In [194]:
# normalize the ratings using min-max normalization
min_rating, max_rating = rating_df['rating'].min(), rating_df['rating'].max()
rating_matrix = rating_matrix.apply(lambda x: (x - min_rating) / (max_rating - min_rating))
rating_matrix[rating_matrix.isnull()] = -1
rating_matrix = torch.FloatTensor(rating_matrix.values)

In [None]:
test_rating_matrix[test_rating_matrix.isnull()] = -1
test_rating_matrix = torch.FloatTensor(test_rating_matrix.values)
print(test_rating_matrix.shape)

nonzero_mask = (test_rating_matrix != -1).type(torch.FloatTensor)

In [660]:
torch.manual_seed(42)
np.random.seed(42)

In [813]:
with open('rating_matrix.pkl', 'rb') as f:
    rating_matrix = pickle.load(f)
with open('test_rating_matrix.pkl', 'rb') as f:
    test_rating_matrix = pickle.load(f)

nonzero_mask = (test_rating_matrix != -1).type(torch.FloatTensor)

In [814]:
num_users, num_items

(6040, 3706)

In [815]:
# hyperparameters

lr = 0.05
lambda_u = 0.1
lambda_L = 10
num_epochs = 200
latent_factors = 20
num_clients = 200
prob_threshold = 0.5
m = num_users // num_clients

In [816]:
# initializaiton

user_features = []
item_features = []
std = 0.01

for i in range(num_clients): # initialize user features and local item features
    user_features.append(torch.randn(m, latent_factors, requires_grad=True))  # multiplying std here will make the Tensor non-leaf, which will cause error
    item_features.append(torch.randn(num_items, latent_factors, requires_grad=True))
with torch.no_grad():
    for i in range(num_clients):
        user_features[i].data.mul_(std) # mul_ does not change requires_grad to False
        item_features[i].data.mul_(std)

avg_item_features = torch.randn(num_items, latent_factors).data.mul(std) # mul will change requires_grad to False
for i in range(num_clients):
    avg_item_features += item_features[i]
avg_item_features /= num_clients

# define the model
RFRecF_loss = Loss(lambda_u=lambda_u, lambda_L=lambda_L, prob=prob_threshold)

client_optimizers = []
for i in range(num_clients):
    optimizer = optim.Adam([user_features[i], item_features[i]], lr=lr)
    client_optimizers.append(optimizer)

In [817]:
def train(epoch, rand_num, last_num):
    avg_loss = 0
    global avg_item_features

    # update
    if rand_num > prob_threshold or epoch == 0:
        for i in range(num_clients):
            client_optimizers[i].zero_grad()
            if last_num > prob_threshold or epoch == 0:
                loss = RFRecF_loss.compute_f_loss(rating_matrix[i*m: (i+1)*m], user_features[i], item_features[i])
                avg_loss += loss.item() / num_clients
            else:
                loss = RFRecF_loss.compute_psi_loss(item_features[i], avg_item_features)
                
            loss.backward(retain_graph=True)
            client_optimizers[i].step()
    else:
        with torch.no_grad():
            avg_item_features = sum(item_features) / num_clients  # update the global item features

    print('Epoch: {}, Loss: {:.4f}, '.format(epoch, avg_loss))

In [818]:
last_num = 1

for epoch in range(num_epochs):
    rand_num = np.random.rand()
    train(epoch, rand_num, last_num)
    last_num = rand_num

Epoch: 0, Loss: 787.9926, 
Epoch: 1, Loss: 0.0000, 
Epoch: 2, Loss: 0.0000, 
Epoch: 3, Loss: 0.0000, 
Epoch: 4, Loss: 0.0000, 
Epoch: 5, Loss: 789.2048, 
Epoch: 6, Loss: 0.0000, 
Epoch: 7, Loss: 0.0000, 
Epoch: 8, Loss: 0.0000, 
Epoch: 9, Loss: 0.0000, 
Epoch: 10, Loss: 0.0000, 
Epoch: 11, Loss: 0.0000, 
Epoch: 12, Loss: 0.0000, 
Epoch: 13, Loss: 0.0000, 
Epoch: 14, Loss: 792.6638, 
Epoch: 15, Loss: 802.2932, 
Epoch: 16, Loss: 801.1841, 
Epoch: 17, Loss: 785.5033, 
Epoch: 18, Loss: 756.6974, 
Epoch: 19, Loss: 0.0000, 
Epoch: 20, Loss: 0.0000, 
Epoch: 21, Loss: 0.0000, 
Epoch: 22, Loss: 0.0000, 
Epoch: 23, Loss: 0.0000, 
Epoch: 24, Loss: 0.0000, 
Epoch: 25, Loss: 0.0000, 
Epoch: 26, Loss: 0.0000, 
Epoch: 27, Loss: 0.0000, 
Epoch: 28, Loss: 0.0000, 
Epoch: 29, Loss: 0.0000, 
Epoch: 30, Loss: 758.7466, 
Epoch: 31, Loss: 773.2893, 
Epoch: 32, Loss: 803.2160, 
Epoch: 33, Loss: 0.0000, 
Epoch: 34, Loss: 0.0000, 
Epoch: 35, Loss: 0.0000, 
Epoch: 36, Loss: 0.0000, 
Epoch: 37, Loss: 0.0000, 
Ep

In [819]:
def evaluate(matrix, user_features, item_features, mask):
    predicted_ratings = torch.sigmoid(torch.mm(user_features, item_features.t()))
    pred = (predicted_ratings * (max_rating - min_rating) + min_rating) * mask
    true_value = matrix * mask
    
    abs_error = torch.sum(torch.abs(pred - true_value))
    square_error = torch.sum((pred - true_value)**2)
    n_nonzero = torch.sum(mask)
    return abs_error, square_error, n_nonzero

In [820]:
MAE = MSE = num_nonzero = 0

for i in range(num_clients):
    abs_error, square_error, n_nonzero = evaluate(test_rating_matrix[i*m: (i+1)*m], user_features[i], item_features[i], nonzero_mask[i*m: (i+1)*m])
    MAE += abs_error
    MSE += square_error
    num_nonzero += n_nonzero

MAE /= num_nonzero
RMSE = torch.sqrt(MSE / num_nonzero)
print("MAE: ", MAE.data.numpy())
print("RMSE: ", RMSE.data.numpy())

MAE:  0.7839408
RMSE:  0.98345464
