In [1]:
from urllib import request
import pandas as pd
import zipfile
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from tqdm import tqdm_notebook
import tqdm
from torch.optim import SGD,Adam
from torch.nn import MSELoss
import torch.nn as nn
import matplotlib.pyplot as plt
from sklearn.model_selection import ParameterGrid, KFold

In [2]:
DATASET_URL = 'http://files.grouplens.org/datasets/movielens/ml-100k.zip'
DATASET_ARCHIVE = 'ml-100k.zip'

request.urlretrieve(DATASET_URL, DATASET_ARCHIVE)
with zipfile.ZipFile(DATASET_ARCHIVE) as archive:
    archive.extractall()

### Task 1

1. Go over the NCF paper (https://arxiv.org/abs/1708.05031) to understand the architecture of the model
2. Implement NCF class. In particular:  
    a. implement __init__ to create the model achtecture  
    b. initialisation is already implemented for you  
    c. implement forward. Forward should take u_id and i_id and return the propability of consumption given u_id and i_id (what is the output layer?)
    d. Test your forward by providing some random u_id and i_id

In [3]:
class NeuMF(nn.Module):
    def __init__(self, nb_users, nb_items,
                 mf_dim, mlp_layer_sizes, dropout=0):
        
        if mlp_layer_sizes[0] % 2 != 0:
            raise RuntimeError('u dummy, mlp_layer_sizes[0] % 2 != 0')
        super(NeuMF, self).__init__()
        nb_mlp_layers = len(mlp_layer_sizes)
        #YOUR TASK: ADD EMBEEDING LAYERS TO THE MODEL - be careful with sizes ~5 lines of code
        self.mf_user_embed = nn.Embedding(nb_users, mf_dim)
        self.mf_item_embed = nn.Embedding(nb_items, mf_dim)
        self.mlp_user_embed = nn.Embedding(nb_users, mlp_layer_sizes[0] // 2)
        self.mlp_item_embed = nn.Embedding(nb_items, mlp_layer_sizes[0] // 2)
        self.dropout = dropout
        #YOUR TASK: ADD MLPs and the final layer ~4-6 lines of code
        self.mlp = nn.ModuleList()
        for i in range(1, nb_mlp_layers):
            self.mlp.extend([nn.Linear(mlp_layer_sizes[i - 1], mlp_layer_sizes[i])])  
        self.final = nn.Linear(mlp_layer_sizes[-1] + mf_dim, 1)
        #YOUR TASK: Initialze weights ~4 lines of code - use glorot
        self.mf_user_embed.weight.data.normal_(0., 0.01)
        self.mf_item_embed.weight.data.normal_(0., 0.01)
        self.mlp_user_embed.weight.data.normal_(0., 0.01)
        self.mlp_item_embed.weight.data.normal_(0., 0.01)
        def glorot_uniform(layer):
            fan_in, fan_out = layer.in_features, layer.out_features
            limit = np.sqrt(6. / (fan_in + fan_out))
            layer.weight.data.uniform_(-limit, limit)

        def lecunn_uniform(layer):
            fan_in, fan_out = layer.in_features, layer.out_features  # noqa: F841, E501
            limit = np.sqrt(3. / fan_in)
            layer.weight.data.uniform_(-limit, limit)
        for layer in self.mlp:
            if type(layer) != nn.Linear:
                continue
            glorot_uniform(layer)
        lecunn_uniform(self.final)

    def forward(self, user, item, sigmoid=False):
        #YOUR TASK: Implement model forward. ~15 lines of code
        xmfu = self.mf_user_embed(user)
        xmfi = self.mf_item_embed(item)
        xmf = xmfu * xmfi

        xmlpu = self.mlp_user_embed(user)
        xmlpi = self.mlp_item_embed(item)
        xmlp = torch.cat((xmlpu, xmlpi), dim=1)
        for i, layer in enumerate(self.mlp):
            xmlp = layer(xmlp)
            xmlp = nn.functional.relu(xmlp)
            if self.dropout != 0:
                xmlp = nn.functional.dropout(xmlp, p=self.dropout, training=self.training)

        x = torch.cat((xmf, xmlp), dim=1)
        x = self.final(x)
        if sigmoid:
            x = torch.sigmoid(x)
        return x

### Task 2

1. Implement ML100kDataset. In particular:  
    a. implement all needed functions that overload PyTorch Dataset
    b. Implement negaive sampling. This can be approximated negative sampling negative_samples=n means that if user A has m training points (positive) it will have n*m negive points

In [4]:
class ML100kDataset(Dataset):
    def __init__(self,file_name,negative_samples=1):
        ratings_df=pd.read_csv(file_name, header=None, names=['user_id', 'item_id', 'rating', 'timestamp'],delim_whitespace=True)
        max_item = np.max(ratings_df['item_id'])
        ratings_df.drop(['rating','timestamp'],axis=1,inplace=True)
        ratings = torch.from_numpy(ratings_df.values)         
        self.negative_samples = negative_samples
        self.raw_dataset_length = len(ratings_df.index) 
        self.length_after_augmentation = self.raw_dataset_length * (self.negative_samples + 1)
        #YOUR TASK: Implement negative sampling. ~7 lines of code. NOTE: the dataset does not have to be shuffled, but you can do this!
        #Sample neg users
        USER,ITEM=0,1
        neg_users = ratings[:,USER].repeat(self.negative_samples)
        self.users = torch.cat((ratings[:,USER], neg_users))
        #sample neg items
        neg_items = torch.empty_like(ratings[:,ITEM]).repeat(self.negative_samples).random_(0,max_item)
        self.items = torch.cat((ratings[:,ITEM], neg_items))
        #labels
        neg_label = torch.zeros_like(ratings[:,ITEM], dtype=torch.float32).repeat(self.negative_samples)
        self.labels = torch.cat((torch.ones_like(ratings[:,ITEM], dtype=torch.float32), neg_label))

    def __len__(self):
        return self.length_after_augmentation
  
    def __getitem__(self,idx):
        return self.users[idx],self.items[idx],self.labels[idx]

### Task 3

1. Implement training loop. In particular:  
    a. Every epoch you make log average train loss and test/eval loss  
    b. use different loss/optimiser/other HP. Use dependency injection to play with them.  
    c. Find "best" HP during cross-validation.  
    d. Check when model overfits with learning curves.  
    e. What are the conclusions.  
    f*. You can user HR@n from previous excersise to look for overfitting.  
    g*. Check the influence of shuffling on your results

In [5]:
def compute_rmse(R, R_hat, index):
    X = np.multiply((R - R_hat), index)
    Y = np.sum(np.multiply(X, X))
    Z = np.sum(index)
    return np.sqrt(Y / Z)

def compute_hr_at_n(R, R_hat, R_exclude, n=10):
    hr_total = 0
    num_users = R.shape[0]
    for u in range(num_users):
        if np.sum(R_exclude[u]) >= n:
            continue
        top_n_indices = np.argsort(-R_hat[u])[:n]
        consumed_indices = np.where(R[u] > 0)[0]
        consumed_in_top_n = np.intersect1d(consumed_indices, top_n_indices)
        hr_total += len(consumed_in_top_n) / n
    hr_total /= num_users
    return hr_total

### Your task
1. Find best HP of the model by crossvalidation
2. for every user provide the recommendation, calculate HR@10 (as in the previous excersise)

In [8]:
users_num = 943 + 1
movies_num = 1682 + 1

ds = ML100kDataset('ml-100k/u.data', 2)
kf = KFold(n_splits=5, shuffle=True)

param_grid = {
    'mf_dim': [8, 16],
    'mlp_layer_sizes': [[64, 32], [128, 64]],
    'dropout': [0.2, 0.5],
    'learning_rate': [0.001, 0.01]
}

best_loss = float('inf')
best_params = {}

for params in ParameterGrid(param_grid):
    fold_losses = []
    for train_indices, test_indices in kf.split(ds):
        train_ds = torch.utils.data.Subset(ds, train_indices)
        test_ds = torch.utils.data.Subset(ds, test_indices)

        train_dl = DataLoader(train_ds, batch_size=16 * 1024, shuffle=True)
        test_dl = DataLoader(test_ds, batch_size=16 * 1024, shuffle=False)

        model = NeuMF(users_num, movies_num, params['mf_dim'], params['mlp_layer_sizes'], params['dropout'])
        criterion = nn.BCELoss()
        optimizer = optim.Adam(model.parameters(), lr=params['learning_rate'])

        train_loss = []
        test_loss = []
        for epoch in range(2): # Used fewer epochs to minimize runtime
            model.train()
            tr_loss = []
            print("Training " + str(epoch))
            for (u, i, l) in tqdm.tqdm(train_dl):
                optimizer.zero_grad()
                yhat = model(u, i, sigmoid=True)
                l = l.unsqueeze(1)
                loss = criterion(yhat, l)
                tr_loss.append(loss.detach().numpy())
                loss.backward()
                optimizer.step()
            train_loss.append(np.mean(tr_loss))

            model.eval()
            predictions = []
            with torch.no_grad():
                t_loss = []
                for i, (u, i, l) in enumerate(test_dl):
                    yhat = model(u, i, sigmoid=True)
                    predictions.append(yhat)
                    l = l.unsqueeze(1)
                    tl = criterion(yhat, l)
                    t_loss.append(tl)
                test_loss = np.mean(t_loss)

            fold_losses.append(test_loss)

    avg_loss = np.mean(fold_losses)

    if avg_loss < best_loss:
        best_loss = avg_loss
        best_params = params

# Calculate RMSE and HR@n using the best hyperparameters
model = NeuMF(users_num, movies_num, best_params['mf_dim'], best_params['mlp_layer_sizes'], best_params['dropout'])
criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=best_params['learning_rate'])

train_dl = DataLoader(train_ds, batch_size=16 * 1024, shuffle=True)
test_dl = DataLoader(test_ds, batch_size=16 * 1024, shuffle=False)

model.eval()
predictions = []
ground_truth = []
with torch.no_grad():
    for (u, i, l) in test_dl:
        yhat = model(u, i, sigmoid=True)
        predictions.append(yhat)
        ground_truth.append(l.unsqueeze(1).numpy())

R_hat = np.vstack(predictions)
R = np.vstack(ground_truth)
R_exclude = np.zeros_like(R)

rmse = compute_rmse(R, R_hat, np.logical_not(R_exclude))
hr_at_n = compute_hr_at_n(R, R_hat, R_exclude, n=10)

print("Best hyperparameters:", best_params)
print("RMSE:", rmse)
print("HR@10:", hr_at_n)

Training 0


100%|██████████| 15/15 [00:05<00:00,  2.71it/s]


Training 1


100%|██████████| 15/15 [00:07<00:00,  2.03it/s]


Training 0


100%|██████████| 15/15 [00:05<00:00,  2.77it/s]


Training 1


100%|██████████| 15/15 [00:06<00:00,  2.37it/s]


Training 0


100%|██████████| 15/15 [00:05<00:00,  2.79it/s]


Training 1


100%|██████████| 15/15 [00:06<00:00,  2.40it/s]


Training 0


100%|██████████| 15/15 [00:05<00:00,  2.73it/s]


Training 1


100%|██████████| 15/15 [00:05<00:00,  2.75it/s]


Training 0


100%|██████████| 15/15 [00:06<00:00,  2.38it/s]


Training 1


100%|██████████| 15/15 [00:05<00:00,  2.80it/s]


Training 0


100%|██████████| 15/15 [00:06<00:00,  2.25it/s]


Training 1


100%|██████████| 15/15 [00:05<00:00,  2.57it/s]


Training 0


100%|██████████| 15/15 [00:06<00:00,  2.34it/s]


Training 1


100%|██████████| 15/15 [00:06<00:00,  2.25it/s]


Training 0


100%|██████████| 15/15 [00:05<00:00,  2.55it/s]


Training 1


100%|██████████| 15/15 [00:06<00:00,  2.32it/s]


Training 0


100%|██████████| 15/15 [00:06<00:00,  2.41it/s]


Training 1


100%|██████████| 15/15 [00:05<00:00,  2.60it/s]


Training 0


100%|██████████| 15/15 [00:06<00:00,  2.34it/s]


Training 1


100%|██████████| 15/15 [00:05<00:00,  2.60it/s]


Training 0


100%|██████████| 15/15 [00:06<00:00,  2.37it/s]


Training 1


100%|██████████| 15/15 [00:05<00:00,  2.84it/s]


Training 0


100%|██████████| 15/15 [00:05<00:00,  2.68it/s]


Training 1


100%|██████████| 15/15 [00:06<00:00,  2.45it/s]


Training 0


100%|██████████| 15/15 [00:05<00:00,  2.84it/s]


Training 1


100%|██████████| 15/15 [00:06<00:00,  2.39it/s]


Training 0


100%|██████████| 15/15 [00:05<00:00,  2.74it/s]


Training 1


100%|██████████| 15/15 [00:05<00:00,  2.50it/s]


Training 0


100%|██████████| 15/15 [00:05<00:00,  2.73it/s]


Training 1


100%|██████████| 15/15 [00:05<00:00,  2.51it/s]


Training 0


100%|██████████| 15/15 [00:06<00:00,  2.37it/s]


Training 1


100%|██████████| 15/15 [00:05<00:00,  2.66it/s]


Training 0


100%|██████████| 15/15 [00:07<00:00,  2.14it/s]


Training 1


100%|██████████| 15/15 [00:05<00:00,  2.72it/s]


Training 0


100%|██████████| 15/15 [00:06<00:00,  2.34it/s]


Training 1


100%|██████████| 15/15 [00:06<00:00,  2.43it/s]


Training 0


100%|██████████| 15/15 [00:05<00:00,  2.71it/s]


Training 1


100%|██████████| 15/15 [00:06<00:00,  2.30it/s]


Training 0


100%|██████████| 15/15 [00:05<00:00,  2.60it/s]


Training 1


100%|██████████| 15/15 [00:06<00:00,  2.37it/s]


Training 0


100%|██████████| 15/15 [00:05<00:00,  2.77it/s]


Training 1


100%|██████████| 15/15 [00:05<00:00,  2.63it/s]


Training 0


100%|██████████| 15/15 [00:05<00:00,  2.59it/s]


Training 1


100%|██████████| 15/15 [00:05<00:00,  2.83it/s]


Training 0


100%|██████████| 15/15 [00:06<00:00,  2.43it/s]


Training 1


100%|██████████| 15/15 [00:05<00:00,  2.90it/s]


Training 0


100%|██████████| 15/15 [00:06<00:00,  2.39it/s]


Training 1


100%|██████████| 15/15 [00:05<00:00,  2.78it/s]


Training 0


100%|██████████| 15/15 [00:06<00:00,  2.48it/s]


Training 1


100%|██████████| 15/15 [00:05<00:00,  2.75it/s]


Training 0


100%|██████████| 15/15 [00:05<00:00,  2.52it/s]


Training 1


100%|██████████| 15/15 [00:06<00:00,  2.37it/s]


Training 0


100%|██████████| 15/15 [00:05<00:00,  2.58it/s]


Training 1


100%|██████████| 15/15 [00:06<00:00,  2.22it/s]


Training 0


100%|██████████| 15/15 [00:05<00:00,  2.68it/s]


Training 1


100%|██████████| 15/15 [00:06<00:00,  2.45it/s]


Training 0


100%|██████████| 15/15 [00:06<00:00,  2.28it/s]


Training 1


100%|██████████| 15/15 [00:05<00:00,  2.72it/s]


Training 0


100%|██████████| 15/15 [00:06<00:00,  2.30it/s]


Training 1


100%|██████████| 15/15 [00:05<00:00,  2.64it/s]


Training 0


100%|██████████| 15/15 [00:05<00:00,  2.65it/s]


Training 1


100%|██████████| 15/15 [00:05<00:00,  2.66it/s]


Training 0


100%|██████████| 15/15 [00:05<00:00,  2.80it/s]


Training 1


100%|██████████| 15/15 [00:06<00:00,  2.49it/s]


Training 0


100%|██████████| 15/15 [00:05<00:00,  2.79it/s]


Training 1


100%|██████████| 15/15 [00:06<00:00,  2.42it/s]


Training 0


100%|██████████| 15/15 [00:05<00:00,  2.88it/s]


Training 1


100%|██████████| 15/15 [00:06<00:00,  2.44it/s]


Training 0


100%|██████████| 15/15 [00:05<00:00,  2.82it/s]


Training 1


100%|██████████| 15/15 [00:05<00:00,  2.65it/s]


Training 0


100%|██████████| 15/15 [00:06<00:00,  2.36it/s]


Training 1


100%|██████████| 15/15 [00:05<00:00,  2.63it/s]


Training 0


100%|██████████| 15/15 [00:06<00:00,  2.38it/s]


Training 1


100%|██████████| 15/15 [00:05<00:00,  2.63it/s]


Training 0


100%|██████████| 15/15 [00:06<00:00,  2.33it/s]


Training 1


100%|██████████| 15/15 [00:05<00:00,  2.68it/s]


Training 0


100%|██████████| 15/15 [00:05<00:00,  2.59it/s]


Training 1


100%|██████████| 15/15 [00:06<00:00,  2.33it/s]


Training 0


100%|██████████| 15/15 [00:05<00:00,  2.70it/s]


Training 1


100%|██████████| 15/15 [00:06<00:00,  2.31it/s]


Training 0


100%|██████████| 15/15 [00:05<00:00,  2.80it/s]


Training 1


100%|██████████| 15/15 [00:05<00:00,  2.61it/s]


Training 0


100%|██████████| 15/15 [00:05<00:00,  2.75it/s]


Training 1


100%|██████████| 15/15 [00:05<00:00,  2.74it/s]


Training 0


100%|██████████| 15/15 [00:05<00:00,  2.52it/s]


Training 1


100%|██████████| 15/15 [00:05<00:00,  2.91it/s]


Training 0


100%|██████████| 15/15 [00:06<00:00,  2.41it/s]


Training 1


100%|██████████| 15/15 [00:05<00:00,  2.75it/s]


Training 0


100%|██████████| 15/15 [00:06<00:00,  2.40it/s]


Training 1


100%|██████████| 15/15 [00:05<00:00,  2.78it/s]


Training 0


100%|██████████| 15/15 [00:06<00:00,  2.35it/s]


Training 1


100%|██████████| 15/15 [00:06<00:00,  2.32it/s]


Training 0


100%|██████████| 15/15 [00:05<00:00,  2.60it/s]


Training 1


100%|██████████| 15/15 [00:06<00:00,  2.31it/s]


Training 0


100%|██████████| 15/15 [00:05<00:00,  2.66it/s]


Training 1


100%|██████████| 15/15 [00:06<00:00,  2.28it/s]


Training 0


100%|██████████| 15/15 [00:05<00:00,  2.52it/s]


Training 1


100%|██████████| 15/15 [00:05<00:00,  2.59it/s]


Training 0


100%|██████████| 15/15 [00:06<00:00,  2.29it/s]


Training 1


100%|██████████| 15/15 [00:05<00:00,  2.64it/s]


Training 0


100%|██████████| 15/15 [00:05<00:00,  2.51it/s]


Training 1


100%|██████████| 15/15 [00:05<00:00,  2.88it/s]


Training 0


100%|██████████| 15/15 [00:05<00:00,  2.53it/s]


Training 1


100%|██████████| 15/15 [00:05<00:00,  2.58it/s]


Training 0


100%|██████████| 15/15 [00:05<00:00,  2.84it/s]


Training 1


100%|██████████| 15/15 [00:06<00:00,  2.40it/s]


Training 0


100%|██████████| 15/15 [00:05<00:00,  2.80it/s]


Training 1


100%|██████████| 15/15 [00:06<00:00,  2.50it/s]


Training 0


100%|██████████| 15/15 [00:05<00:00,  2.77it/s]


Training 1


100%|██████████| 15/15 [00:06<00:00,  2.42it/s]


Training 0


100%|██████████| 15/15 [00:05<00:00,  2.64it/s]


Training 1


100%|██████████| 15/15 [00:05<00:00,  2.56it/s]


Training 0


100%|██████████| 15/15 [00:06<00:00,  2.28it/s]


Training 1


100%|██████████| 15/15 [00:05<00:00,  2.70it/s]


Training 0


100%|██████████| 15/15 [00:06<00:00,  2.32it/s]


Training 1


100%|██████████| 15/15 [00:05<00:00,  2.66it/s]


Training 0


100%|██████████| 15/15 [00:05<00:00,  2.54it/s]


Training 1


100%|██████████| 15/15 [00:06<00:00,  2.29it/s]


Training 0


100%|██████████| 15/15 [00:05<00:00,  2.61it/s]


Training 1


100%|██████████| 15/15 [00:06<00:00,  2.38it/s]


Training 0


100%|██████████| 15/15 [00:05<00:00,  2.77it/s]


Training 1


100%|██████████| 15/15 [00:06<00:00,  2.41it/s]


Training 0


100%|██████████| 15/15 [00:05<00:00,  2.89it/s]


Training 1


100%|██████████| 15/15 [00:05<00:00,  2.67it/s]


Training 0


100%|██████████| 15/15 [00:05<00:00,  2.51it/s]


Training 1


100%|██████████| 15/15 [00:05<00:00,  2.90it/s]


Training 0


100%|██████████| 15/15 [00:06<00:00,  2.43it/s]


Training 1


100%|██████████| 15/15 [00:05<00:00,  2.83it/s]


Training 0


100%|██████████| 15/15 [00:06<00:00,  2.50it/s]


Training 1


100%|██████████| 15/15 [00:05<00:00,  2.83it/s]


Training 0


100%|██████████| 15/15 [00:06<00:00,  2.32it/s]


Training 1


100%|██████████| 15/15 [00:05<00:00,  2.65it/s]


Training 0


100%|██████████| 15/15 [00:05<00:00,  2.61it/s]


Training 1


100%|██████████| 15/15 [00:06<00:00,  2.31it/s]


Training 0


100%|██████████| 15/15 [00:05<00:00,  2.68it/s]


Training 1


100%|██████████| 15/15 [00:06<00:00,  2.27it/s]


Training 0


100%|██████████| 15/15 [00:05<00:00,  2.59it/s]


Training 1


100%|██████████| 15/15 [00:05<00:00,  2.58it/s]


Training 0


100%|██████████| 15/15 [00:06<00:00,  2.27it/s]


Training 1


100%|██████████| 15/15 [00:05<00:00,  2.57it/s]


Training 0


100%|██████████| 15/15 [00:06<00:00,  2.46it/s]


Training 1


100%|██████████| 15/15 [00:05<00:00,  2.77it/s]


Training 0


100%|██████████| 15/15 [00:06<00:00,  2.42it/s]


Training 1


100%|██████████| 15/15 [00:05<00:00,  2.84it/s]


Training 0


100%|██████████| 15/15 [00:05<00:00,  2.72it/s]


Training 1


100%|██████████| 15/15 [00:06<00:00,  2.38it/s]


Training 0


100%|██████████| 15/15 [00:05<00:00,  2.77it/s]


Training 1


100%|██████████| 15/15 [00:06<00:00,  2.48it/s]


Training 0


100%|██████████| 15/15 [00:05<00:00,  2.89it/s]


Training 1


100%|██████████| 15/15 [00:06<00:00,  2.41it/s]


Training 0


100%|██████████| 15/15 [00:05<00:00,  2.60it/s]


Training 1


100%|██████████| 15/15 [00:05<00:00,  2.57it/s]


Training 0


100%|██████████| 15/15 [00:06<00:00,  2.28it/s]


Training 1


100%|██████████| 15/15 [00:05<00:00,  2.61it/s]


Training 0


100%|██████████| 15/15 [00:06<00:00,  2.37it/s]


Training 1


100%|██████████| 15/15 [00:05<00:00,  2.64it/s]


Training 0


100%|██████████| 15/15 [00:06<00:00,  2.35it/s]


Training 1


100%|██████████| 15/15 [00:06<00:00,  2.48it/s]


Training 0


100%|██████████| 15/15 [00:05<00:00,  2.64it/s]


Training 1


100%|██████████| 15/15 [00:06<00:00,  2.33it/s]


Best hyperparameters: {'dropout': 0.2, 'learning_rate': 0.01, 'mf_dim': 8, 'mlp_layer_sizes': [128, 64]}
RMSE: 0.5052408118855717
HR@10: 0.033388333333321224


### Plot results

In [None]:
plt.plot(range(1, 21), train_loss, label='Train Loss')
plt.plot(range(1, 21), test_loss, label='Test Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.show()