In [26]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import pandas as pd
import numpy as np
from torch.utils.data import DataLoader, Dataset
from sklearn.model_selection import KFold
import time

In [27]:
class RateDataset(Dataset):
    def __init__(self, user_tensor, item_tensor, target_tensor):
        self.user_tensor = user_tensor
        self.item_tensor = item_tensor
        self.target_tensor = target_tensor

    def __getitem__(self, index):
        return self.user_tensor[index], self.item_tensor[index], self.target_tensor[index]
    
    def __len__(self):
        return self.user_tensor.size(0)

In [141]:
class BiasMF(torch.nn.Module):
    def __init__(self, params):
        super(BiasMF, self).__init__()
        self.num_users = params['num_users']
        self.num_items = params['num_items']
        self.latent_dim = params['latent_dim']
        self.mu = params['global_mean'] # mean of all the ratings 不需要学习
        self.mu = torch.tensor(self.mu).to(device='cuda')

        self.user_embedding = torch.nn.Embedding(self.num_users, self.latent_dim)
        self.item_embedding = torch.nn.Embedding(self.num_items, self.latent_dim)
        
        # 这里的bias是为了消除user的打分bias 需要学习（需要update）
        self.user_bias = torch.nn.Embedding(self.num_users, 1)
        self.user_bias.weight.data = torch.zeros(self.num_users, 1).float() #updaet
        self.item_bias = torch.nn.Embedding(self.num_items, 1)
        self.item_bias.weight.data = torch.zeros(self.num_items, 1).float()

    def forward(self, user_indices, item_indices):
        user_vec = self.user_embedding(user_indices)
        item_vec = self.item_embedding(item_indices)
        mu = self.mu.view(-1)
        
        dot = torch.mul(user_vec, item_vec).sum(dim=1)
        rating = dot + self.user_bias(user_indices).view(-1) + self.item_bias(item_indices).view(-1) + mu

        return rating
    # 最好可以直接把bias写在forward中的return里
    def get_bias(self, user_indices, item_indices):
        return self.user_bias(user_indices).view(-1), self.item_bias(item_indices).view(-1)

In [142]:
def train_and_predict(df, k, learning_rate):
    
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    num_users = df.userId.unique().shape[0]
    num_items = df.movieId.unique().shape[0]
    
    # user 不需要lookup table
    item_unique = set(df.movieId)
    item_lookup_table = {item:i for i,item in enumerate(item_unique)}
    
    for i in range(0,k):
        
        # k-folds validation
        kf = KFold(n_splits=5, shuffle=True, random_state = i)
        #loss_sum = 0
        for train_index , test_index in kf.split(df.loc[:,['userId','movieId','timestamp']]):
            X_train = df.iloc[train_index,[0,1]]
            y_train = df.iloc[train_index,[2]]
            X_test = df.iloc[test_index,[0,1]]
            y_test = df.iloc[test_index,[2]]

            params = {'num_users':num_users,'num_items':num_items,'latent_dim':30, 'global_mean':y_train.mean()}

            # tensors of training dataset
            train_user_tensor = torch.LongTensor(X_train.iloc[:,[0]].values - 1).squeeze()
            train_item_tensor = torch.LongTensor([item_lookup_table[X_train.iloc[i,1]] for i in range(len(X_train))])
            train_rating_tensor = torch.FloatTensor(y_train.values).squeeze()
            
            # tensors of test dataset
            test_user_tensor = torch.LongTensor(X_test.iloc[:,[0]].values - 1).squeeze()
            test_item_tensor = torch.LongTensor([item_lookup_table[X_test.iloc[i,1]] for i in range(len(X_test))])
            test_rating_tensor = torch.FloatTensor(y_test.values).squeeze()
            
            # transform to dataloader
            # batch_size need to be big(e.g. batch_size = 10 is not appropriate)
            train_dataset = RateDataset(train_user_tensor, train_item_tensor, train_rating_tensor)
            train_iter = DataLoader(train_dataset, batch_size = 64, shuffle = True) # train iter has to be shuffled
            
            test_dataset = RateDataset(test_user_tensor, test_item_tensor, test_rating_tensor)
            test_iter = DataLoader(test_dataset, batch_size = 64, shuffle = False)
            
            # resetting the BiasMF model
            model = BiasMF(params).to(device)
            criterion = nn.MSELoss()
            optimizer = torch.optim.Adam(model.parameters(), lr = learning_rate, weight_decay = 1e-5)
            #optimizer = optim.SGD(model.parameters(), lr = learning_rate, weight_decay = 1e-5, momentum = 0.9)
            
            # train the model-----------------------------------------
            num_epoch = 30
            loss_epoch = [0 for j in range(num_epoch)]
            for epoch in range(num_epoch):
                
                for bid, batch in enumerate(train_iter):
                    u, i, r = batch[0], batch[1], batch[2]
                    u = u.to(device)
                    i = i.to(device)
                    r = r.to(device)
                    r = r. view(-1)
                    
                    # forward pass
                    preds = model(u,i)
                    user_bias, item_bias = model.get_bias(u,i)
                    # 这里的错误是：
                    # 1. u 作为input没有经过embedding就放入正则项，没有意义
                    # 2. 还是用optimizer里通过weight decay来自动加正则项比价好
                    '''loss = torch.mean(torch.sum(torch.pow(r-preds,2) + 
                                                l*(torch.pow(u,2)+torch.pow(i,2)+
                                                   torch.pow(user_bias,2)+torch.pow(item_bias,2))))'''
                    loss = criterion(r, preds)
                    # backward and optimize
                    optimizer.zero_grad()
                    loss.backward()
                    optimizer.step()
                loss_epoch[epoch] = torch.sqrt(loss).item()
            
                print('Epoch [{}/{}], Loss: {:.4f}'.format(epoch + 1,num_epoch, loss.item()))
            #print('train rmse: ', torch.sqrt(criterion(r,preds)))
            
            # test the model---------------------------------------------

            pred_array = np.array([])
            r_array = np.array([])
            with torch.no_grad():
                for bid, batch in enumerate(test_iter):
                    u, i, r = batch[0], batch[1], batch[2] 
                    u = u.to(device)
                    i = i.to(device)
                    r = r.to(device)
                    r = r.view(-1,1)

                    preds = model(u,i).view(-1,1)
                    
                    pred_array = np.append(pred_array,np.array(preds.cpu()))
                    r_array = np.append(r_array,np.array(r.cpu()))
                    
                pred_tensor = torch.FloatTensor(pred_array).to(device)
                r_tensor = torch.FloatTensor(r_array).to(device)
                
                test_rmse = torch.sqrt(criterion(r_tensor, pred_tensor))
                print(test_rmse.item())
                    
    print('finish')

        

In [143]:
# lr = 1e-5, lambda= 1e-8, k folds = 3, random states = i, batch_size = 64, epoch = 70 with weight decay = 1e-5
torch.cuda.synchronize()
start = time.time()

df = pd.read_csv('/home/dm/Downloads/movielens100k/ratings.csv')
train_and_predict(df, 2, 0.002)

torch.cuda.synchronize()
end = time.time()
print('cost time:', end-start)

Epoch [1/30], Loss: 12.1688
Epoch [2/30], Loss: 7.8966
Epoch [3/30], Loss: 5.6886
Epoch [4/30], Loss: 2.6248
Epoch [5/30], Loss: 1.9083
Epoch [6/30], Loss: 1.5713
Epoch [7/30], Loss: 0.6572
Epoch [8/30], Loss: 0.3590
Epoch [9/30], Loss: 0.5182
Epoch [10/30], Loss: 0.2703
Epoch [11/30], Loss: 0.3564
Epoch [12/30], Loss: 0.2577
Epoch [13/30], Loss: 0.2307
Epoch [14/30], Loss: 0.2564
Epoch [15/30], Loss: 0.5213
Epoch [16/30], Loss: 0.1961
Epoch [17/30], Loss: 0.3075
Epoch [18/30], Loss: 0.1296
Epoch [19/30], Loss: 0.3125
Epoch [20/30], Loss: 0.2909
Epoch [21/30], Loss: 0.1467
Epoch [22/30], Loss: 0.2819
Epoch [23/30], Loss: 0.2608
Epoch [24/30], Loss: 0.1290
Epoch [25/30], Loss: 0.1394
Epoch [26/30], Loss: 0.0806
Epoch [27/30], Loss: 0.1853
Epoch [28/30], Loss: 0.1888
Epoch [29/30], Loss: 0.1201
Epoch [30/30], Loss: 0.1189
1.1208369731903076
Epoch [1/30], Loss: 13.0494
Epoch [2/30], Loss: 5.9902
Epoch [3/30], Loss: 4.7373
Epoch [4/30], Loss: 2.0696
Epoch [5/30], Loss: 1.0100
Epoch [6/30],

Epoch [21/30], Loss: 0.2844
Epoch [22/30], Loss: 0.1487
Epoch [23/30], Loss: 0.1600
Epoch [24/30], Loss: 0.2020
Epoch [25/30], Loss: 0.1035
Epoch [26/30], Loss: 0.1153
Epoch [27/30], Loss: 0.2608
Epoch [28/30], Loss: 0.1663
Epoch [29/30], Loss: 0.0722
Epoch [30/30], Loss: 0.0990
1.125557780265808
finish
cost time: 614.9383878707886
