In [833]:
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
import pickle
from sklearn.utils import shuffle

In [341]:
class Loss(nn.Module):
    def __init__(self, lambda_u, lambda_L):
        super(Loss, self).__init__()
        self.lambda_u = lambda_u
        self.lambda_L = lambda_L

    def forward(self, rating_mat, user_features, local_item_features, avg_item_features):
        '''
        rating_mat: (num_users, num_items)
        user_features: (num_users_per_client, num_latent_factors)
        local_item_features: (num_items, num_latent_factors)
        avg_item_features: (num_items, num_latent_factors)
        '''
        non_zero_mask = (rating_mat != -1).type(torch.FloatTensor)
        ratings_predicted = torch.sigmoid(torch.mm(user_features, local_item_features.t()))
        
        diff = (ratings_predicted - rating_mat) ** 2
        prediction_error = torch.sum(diff * non_zero_mask)

        user_regularization = torch.sum(user_features ** 2)  ## regularization term for user features
        item_loss = torch.sum((local_item_features - avg_item_features) ** 2) ## loss term for item features

        loss = prediction_error + self.lambda_u * user_regularization + self.lambda_L * item_loss

        return loss, prediction_error

In [491]:
rating_df = pd.read_csv('ml-1m.inter', sep='\t')
rating_df.columns = ['user_id', 'item_id', 'rating', 'timestamp']
rating_df = shuffle(rating_df)
rating_df.head()

Unnamed: 0,user_id,item_id,rating,timestamp
19453,148,3386,5,977352730
673072,4042,2,1,965581609
302717,1797,2925,5,974700734
442192,2721,1721,4,973278307
774028,4613,3257,4,964145344


In [492]:
# Split the data into training and testing sets
ratio = 0.8
train_size = int(len(rating_df) * ratio)

aggregate_rating_matrix = rating_df.pivot_table(index='user_id', columns='item_id', values='rating', aggfunc='mean')  # transform the dataframe into a matrix
num_users, num_items = aggregate_rating_matrix.shape
rating_matrix = aggregate_rating_matrix.copy()
test_rating_matrix = aggregate_rating_matrix.copy()
for i in range(len(rating_df)):
    user_id = rating_df.iloc[i,0]
    item_id = rating_df.iloc[i,1]
    if i < train_size:
        test_rating_matrix.loc[user_id,item_id] = None
    else:
        rating_matrix.loc[user_id,item_id] = None

In [493]:
# normalize the ratings using min-max normalization
min_rating, max_rating = rating_df['rating'].min(), rating_df['rating'].max()
rating_matrix = rating_matrix.apply(lambda x: (x - min_rating) / (max_rating - min_rating))
rating_matrix[rating_matrix.isnull()] = -1
rating_matrix = torch.FloatTensor(rating_matrix.values)

In [494]:
test_rating_matrix[test_rating_matrix.isnull()] = -1
test_rating_matrix = torch.FloatTensor(test_rating_matrix.values)

nonzero_mask = (test_rating_matrix != -1).type(torch.FloatTensor)

In [495]:
## pickle存储rating_matrix
with open('rating_matrix.pkl', 'wb') as f:
    pickle.dump(rating_matrix, f)

with open('test_rating_matrix.pkl', 'wb') as f:
    pickle.dump(test_rating_matrix, f)

In [662]:
torch.manual_seed(42)

<torch._C.Generator at 0x124b3c6d0>

In [824]:
with open('rating_matrix.pkl', 'rb') as f:
    rating_matrix = pickle.load(f)
with open('test_rating_matrix.pkl', 'rb') as f:
    test_rating_matrix = pickle.load(f)

  return torch.load(io.BytesIO(b))


In [825]:
num_users, num_items

(6040, 3706)

In [826]:
lr = 0.0125
lambda_u = 0.1
lambda_L = 20
num_epochs = 150
latent_factors = 20
num_clients = 200
m = num_users // num_clients

In [827]:
# initializaiton

user_features = []
item_features = []
std = 0.01

for i in range(num_clients): # initialize user features and local item features
    user_features.append(torch.randn(m, latent_factors, requires_grad=True))  # multipyling std here will make the Tensor non-leaf, which will cause error
    item_features.append(torch.randn(num_items, latent_factors, requires_grad=True))
with torch.no_grad():
    for i in range(num_clients):
        user_features[i].data.mul_(std) # mul_ does not change requires_grad to False
        item_features[i].data.mul_(std)

avg_item_features = torch.randn(num_items, latent_factors).data.mul(std) # mul will change requires_grad to False
for i in range(num_clients):
    avg_item_features += item_features[i]
avg_item_features /= num_clients

RFRec_loss = Loss(lambda_u=lambda_u, lambda_L=lambda_L)

client_optimizers = []
for i in range(num_clients):
    optimizer = optim.Adam([user_features[i], item_features[i]], lr=lr)
    client_optimizers.append(optimizer)

In [None]:
def train(epoch):
    avg_loss = avg_prediction_error = 0
    global avg_item_features
    
    for i in range(num_clients):
        client_optimizers[i].zero_grad()
        loss, prediction_error = RFRec_loss(rating_matrix[i*m: (i+1)*m], user_features[i], item_features[i], avg_item_features)

        avg_loss += loss.item() / num_clients
        avg_prediction_error += prediction_error / num_clients

        loss.backward(retain_graph=True)
        client_optimizers[i].step()
    
    with torch.no_grad():
        avg_item_features = sum(item_features) / num_clients  # update the global item features

    if epoch % 10 == 0:
        print('Epoch: {}, Loss: {:.4f}, Prediction Error: {:.4f}'.format(epoch, avg_loss, avg_prediction_error))

In [829]:
for epoch in range(num_epochs):
    train(epoch)

Epoch: 0, Loss: 541.4395, Prediction Error: 393.9899
Epoch: 10, Loss: 420.4923, Prediction Error: 393.9110
Epoch: 20, Loss: 401.3866, Prediction Error: 392.3230
Epoch: 30, Loss: 377.0950, Prediction Error: 371.8815
Epoch: 40, Loss: 306.3105, Prediction Error: 297.8666
Epoch: 50, Loss: 254.5391, Prediction Error: 241.1747
Epoch: 60, Loss: 238.3780, Prediction Error: 224.1911
Epoch: 70, Loss: 228.3599, Prediction Error: 215.1773
Epoch: 80, Loss: 220.3378, Prediction Error: 207.0414
Epoch: 90, Loss: 211.8927, Prediction Error: 197.5705
Epoch: 100, Loss: 204.3699, Prediction Error: 189.0035
Epoch: 110, Loss: 198.1189, Prediction Error: 182.0005
Epoch: 120, Loss: 192.8855, Prediction Error: 176.2057
Epoch: 130, Loss: 188.5144, Prediction Error: 171.4221
Epoch: 140, Loss: 184.6480, Prediction Error: 167.2883


In [830]:
def evaluate(matrix, user_features, item_features, client_id):
    predicted_ratings = torch.sigmoid(torch.mm(user_features, item_features.t()))
    pred = (predicted_ratings * (max_rating - min_rating) + min_rating) * nonzero_mask[client_id*m: (client_id+1)*m]
    true_value = matrix * nonzero_mask[client_id*m: (client_id+1)*m]
    
    abs_error = torch.sum(torch.abs(pred - true_value))
    square_error = torch.sum((pred - true_value)**2)
    n_nonzero = torch.sum(nonzero_mask[client_id*m: (client_id+1)*m])
    return abs_error, square_error, n_nonzero

In [831]:
MAE = MSE = num_nonzero = 0

for i in range(num_clients):
    abs_error, square_error, n_nonzero = evaluate(test_rating_matrix[i*m: (i+1)*m], user_features[i], item_features[i], i)
    MAE += abs_error
    MSE += square_error
    num_nonzero += n_nonzero

MAE /= num_nonzero
RMSE = torch.sqrt(MSE / num_nonzero)
print("MAE: ", MAE.data.numpy())
print("RMSE: ", RMSE.data.numpy())

MAE:  0.6922777
RMSE:  0.87316585
