In [None]:
import numpy as np
from time import time

In [None]:
def load_and_split_data(path='./', validation_fraction=0.1, delimiter='::', random_seed=1234, should_transpose=False):
    '''
    Loads the MovieLens 1M dataset, splits it into training and validation sets, and returns them as dense matrices.

    :param path: path to the ratings file
    :param validation_fraction: fraction of data to use for validation
    :param delimiter: delimiter used in the data file
    :param random_seed: random seed for validation splitting
    :param should_transpose: flag to transpose output matrices (swapping users with movies)
    :return: train ratings (n_u, n_m), valid ratings (n_u, n_m)
    '''
    np.random.seed(random_seed)

    start_time = time()
    print('Reading data...')
    data = np.genfromtxt(path, delimiter=delimiter).astype('int32')
    print('Data read in', time() - start_time, 'seconds')

    num_users = np.unique(data[:, 0]).shape[0]
    num_movies = np.unique(data[:, 1]).shape[0]
    num_ratings = data.shape[0]

    # Create dictionaries that map user/movie IDs to contiguous user/movie numbers
    user_map = {user_id: index for index, user_id in enumerate(np.unique(data[:, 0]))}
    movie_map = {movie_id: index for index, movie_id in enumerate(np.unique(data[:, 1]))}

    # Shuffle indices
    indices = np.arange(num_ratings)
    np.random.shuffle(indices)

    train_ratings = np.zeros((num_users, num_movies), dtype='float32')
    valid_ratings = np.zeros((num_users, num_movies), dtype='float32')

    # Iterate through shuffled indices and assign ratings to training and validation sets
    for i in range(num_ratings):
        user_id = data[indices[i], 0]
        movie_id = data[indices[i], 1]
        rating = data[indices[i], 2]

        # The first few ratings of the shuffled data array are validation data
        if i <= validation_fraction * num_ratings:
            valid_ratings[user_map[user_id], movie_map[movie_id]] = int(rating)
        # The rest are training data
        else:
            train_ratings[user_map[user_id], movie_map[movie_id]] = int(rating)

    # Transpose the matrices if the transpose flag is set
    if should_transpose:
        train_ratings = train_ratings.T
        valid_ratings = valid_ratings.T

    print('Loaded dense data matrix')

    return train_ratings, valid_ratings


In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import sys
import os

In [None]:
seed = int(time())
np.random.seed(seed)
torch.manual_seed(seed)

<torch._C.Generator at 0x7f3fab1a4110>

In [None]:
# load data
tr, vr = load_and_split_data('ratings.dat', validation_fraction=0.1, delimiter='::',random_seed=seed, should_transpose=True)

tm = np.greater(tr, 1e-12).astype('float32')  # masks indicating non-zero entries
vm = np.greater(vr, 1e-12).astype('float32')

n_m = tr.shape[0]  # number of movies
n_u = tr.shape[1]  # number of users (may be switched depending on 'transpose' in loadData)


Reading data...
Data read in 3.1598000526428223 seconds
Loaded dense data matrix


In [None]:
# Set hyper-parameters
n_hid = 700
lambda_2 = 60.0
lambda_s = 0.013
n_layers = 3
output_every = 50  #evaluate performance on test set; breaks l-bfgs loop
n_epoch = n_layers * 10 * output_every
batch_size = 256  # new parameter to control the batch size

verbose_bfgs = True
use_gpu = True

if use_gpu and torch.cuda.is_available():
    device = torch.device('cuda')
else:
    device = torch.device('cpu')

# Convert data to PyTorch tensors
tr = torch.tensor(tr, dtype=torch.float32, device=device)
tm = torch.tensor(tm, dtype=torch.float32, device=device)
vr = torch.tensor(vr, dtype=torch.float32, device=device)
vm = torch.tensor(vm, dtype=torch.float32, device=device)

In [None]:
class KernelLayer(nn.Module):
    def __init__(self, n_in, n_hid=500, n_dim=5, activation=nn.Sigmoid(),
                 lambda_s=lambda_s, lambda_2=lambda_2):
        super(KernelLayer, self).__init__()

        self.W = nn.Parameter(torch.empty(n_in, n_hid).normal_(std=1e-3))
        self.u = nn.Parameter(torch.empty(n_in, 1, n_dim).normal_(std=1e-3))
        self.v = nn.Parameter(torch.empty(1, n_hid, n_dim).normal_(std=1e-3))
        self.b = nn.Parameter(torch.zeros(n_hid))
        self.activation = activation
        self.lambda_s = lambda_s
        self.lambda_2 = lambda_2

    def kernel(self, u, v):
        dist = torch.norm(u - v, p=2, dim=2)
        hat = torch.clamp(1.0 - dist ** 2, 0.0)
        return hat

    def forward(self, x):
        w_hat = self.kernel(self.u, self.v)
        W_eff = self.W * w_hat
        y = x @ W_eff + self.b
        y = self.activation(y)
        return y

    def regularization(self):
        w_hat = self.kernel(self.u, self.v)
        sparse_reg_term = self.lambda_s * torch.norm(w_hat, p=2)
        l2_reg_term = self.lambda_2 * torch.norm(self.W, p=2)
        return sparse_reg_term + l2_reg_term

In [None]:
class KernelNet(nn.Module):
    def __init__(self, n_u, n_hid, n_layers, lambda_s, lambda_2):
        super(KernelNet, self).__init__()

        # Create the hidden layers and output layer using KernelLayer class
        layers = [KernelLayer(n_u if i == 0 else n_hid, n_hid, lambda_s=lambda_s, lambda_2=lambda_2)
                  for i in range(n_layers)]
        layers.append(KernelLayer(n_hid, n_u, activation=nn.Identity(), lambda_s=lambda_s, lambda_2=lambda_2))

        self.layers = nn.ModuleList(layers)

    def forward(self, x):
            # Pass the input through all the layers in the network
            for layer in self.layers:
                x = layer(x)
            return x

    def regularization(self):
        # Compute the regularization loss for the entire network
        reg_losses = sum(layer.regularization() for layer in self.layers)
        return reg_losses


In [None]:
def train(model, tr, tm, vr, vm, n_epoch, output_every, device):
    tr = tr.float().to(device)
    tm = tm.float().to(device)
    vr = vr.float().to(device)
    vm = vm.float().to(device)

    model = model.to(device)
    optimizer = optim.LBFGS(model.parameters(), lr=0.01, max_iter=output_every)

    def closure():
        optimizer.zero_grad()
        prediction = model(tr)
        diff = tm * (tr - prediction)
        sqE = torch.norm(diff, p=2)
        loss = sqE + model.regularization()
        loss.backward()
        return loss

    for i in range(int(n_epoch / output_every)):
        optimizer.step(closure)
        with torch.no_grad():
            prediction = model(tr)
            error = ((vm * (torch.clamp(prediction, 1., 5.) - vr) ** 2).sum() / vm.sum()).sqrt().item()
            error_train = ((tm * (torch.clamp(prediction, 1., 5.) - tr) ** 2).sum() / tm.sum()).sqrt().item()

            print('.-^-._' * 12)
            print('epoch:', i, 'validation rmse:', error, 'train rmse:', error_train)
            print('.-^-._' * 12)

    with open('summary_ml1m.txt', 'a') as file:
        for a in sys.argv[1:]:
            file.write(a + ' ')
        file.write(str(error) + ' ' + str(error_train) + ' ' + str(seed) + '\n')
        file.close()

In [None]:
model = KernelNet(n_u, n_hid, n_layers, lambda_s, lambda_2)
train(model, tr, tm, vr, vm, n_epoch, output_every, device)

.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._
epoch: 0 validation rmse: 1.8169916868209839 train rmse: 1.811744213104248
.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._
.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._
epoch: 1 validation rmse: 1.3099744319915771 train rmse: 1.2994760274887085
.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._
.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._
epoch: 2 validation rmse: 1.1581028699874878 train rmse: 1.1482983827590942
.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._
.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._
epoch: 3 validation rmse: 1.2342067956924438 train rmse: 1.2272725105285645
.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._
.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._
epoch: 4 validation rmse: 1.127373695373