In [None]:
import pandas as pd
import xlrd
from sklearn.model_selection import train_test_split
import numpy as np
from time import time

In [None]:
def load_jester_data(file_path, delimiter='\t', valfrac=0.1, random_state=1234):
    data = pd.read_excel(file_path, header=None)
    data = data.replace(99, 0)  # Replace "99" (not rated) with "0"
    
    train_data, validation_data = train_test_split(data, test_size=valfrac, random_state=random_state)
    train_data, validation_data = train_data.values, validation_data.values
    
    return train_data, validation_data

# Load Jester dataset
file_path = 'jester-data-1.xls'
train_data, validation_data = load_jester_data(file_path)

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import sys
import os

In [None]:
seed = int(time())
np.random.seed(seed)
torch.manual_seed(seed)


<torch._C.Generator at 0x7fbd5b7abb50>

In [None]:
n_users, n_jokes = train_data.shape
n_hid = 800
n_layers = 2
lambda_s = 1e-3
lambda_2 = 1e-3
output_every = 50
n_epoch = n_layers * 10 * output_every


verbose_bfgs = True
use_gpu = True

if use_gpu and torch.cuda.is_available():
    device = torch.device('cuda')
else:
    device = torch.device('cpu')


In [None]:
class KernelLayer(nn.Module):
    def __init__(self, n_in, n_hid=800, n_dim=5, activation=nn.ReLU(),
                 lambda_s=lambda_s, lambda_2=lambda_2, dropout_p=0.5):
        super(KernelLayer, self).__init__()

        self.W = nn.Parameter(torch.empty(n_in, n_hid).normal_(std=1e-3))
        self.u = nn.Parameter(torch.empty(n_in, 1, n_dim).normal_(std=1e-3))
        self.v = nn.Parameter(torch.empty(1, n_hid, n_dim).normal_(std=1e-3))
        self.b = nn.Parameter(torch.zeros(n_hid))
        self.activation = activation
        self.batch_norm = nn.BatchNorm1d(n_hid)
        self.dropout = nn.Dropout(dropout_p)
        self.lambda_s = lambda_s
        self.lambda_2 = lambda_2

    def kernel(self, u, v):
        dist = torch.norm(u - v, p=2, dim=2)
        hat = torch.clamp(1.0 - dist ** 2, 0.0)
        return hat

    def forward(self, x):
        w_hat = self.kernel(self.u, self.v)
        W_eff = self.W * w_hat
        y = x @ W_eff + self.b
        y = self.batch_norm(y)
        y = self.activation(y)
        y = self.dropout(y)
        return y

    def regularization(self):
        w_hat = self.kernel(self.u, self.v)
        sparse_reg_term = self.lambda_s * torch.norm(w_hat, p=2)
        l2_reg_term = self.lambda_2 * torch.norm(self.W, p=2)
        return sparse_reg_term + l2_reg_term


In [None]:
class KernelNet(nn.Module):
    def __init__(self, n_u, n_hid, n_layers, lambda_s, lambda_2):
        super(KernelNet, self).__init__()

        # Create the hidden layers and output layer using KernelLayer class
        layers = [KernelLayer(n_u if i == 0 else n_hid, n_hid, lambda_s=lambda_s, lambda_2=lambda_2)
                  for i in range(n_layers)]
        layers.append(KernelLayer(n_hid, n_u, activation=nn.Identity(), lambda_s=lambda_s, lambda_2=lambda_2))

        self.layers = nn.ModuleList(layers)

    def forward(self, x):
        # Pass the input through all the layers in the network
        for layer in self.layers:
            x = layer(x)
        return x

    def regularization(self):
        # Compute the regularization loss for the entire network
        reg_losses = sum(layer.regularization() for layer in self.layers)
        return reg_losses



In [None]:
def create_sparse_matrix(data):
    n_users, n_items = data.shape
    ratings = np.zeros((n_users, n_items))
    
    for i in range(n_users):
        for j in range(n_items):
            rating = data[i, j]
            if rating != 0:
                ratings[i, j] = rating

    return ratings


In [None]:
def normalize_data(ratings):
    ratings = torch.tensor(ratings, dtype=torch.float32)  # Convert to PyTorch tensor
    non_zero_ratings_mask = ratings != 0
    mean_ratings = (ratings * non_zero_ratings_mask).sum(axis=1) / non_zero_ratings_mask.sum(axis=1)
    mean_ratings = mean_ratings.unsqueeze(1)  # Add a dimension for broadcasting
    normalized_ratings = ratings.where(non_zero_ratings_mask, torch.zeros_like(ratings)) - mean_ratings * non_zero_ratings_mask
    return normalized_ratings, mean_ratings


In [None]:
def train(model, tr, vr, n_epoch, output_every, device, tr_mean, vr_mean, num_users_sample=1000):
    tr = tr.float().to(device)
    vr = vr.float().to(device)

    tm = (tr != 0).float().to(device)  # Create mask for train data
    vm = (vr != 0).float().to(device)  # Create mask for validation data

    model = model.to(device)
    optimizer = optim.LBFGS(model.parameters(), lr=0.01, max_iter=output_every)

    def closure():
        optimizer.zero_grad()
        prediction = model(tr)
        diff = tm * (tr - prediction)
        sqE = torch.norm(diff, p=2)
        loss = sqE + model.regularization()
        loss.backward()
        return loss

    for i in range(int(n_epoch / output_every)):
        optimizer.step(closure)
        with torch.no_grad():
            tr_prediction = model(tr)
            tr_prediction += tr_mean  # Add the mean rating back

            # Select a random subset of users for validation
            user_indices = np.random.choice(vr.shape[0], num_users_sample, replace=False)
            user_indices = torch.tensor(user_indices, dtype=torch.long, device=device)  # Convert to PyTorch tensor
            vr_sub = vr[user_indices]
            vm_sub = vm[user_indices]
            
            vr_prediction = model(vr_sub)
            vr_prediction += vr_mean[user_indices].expand(-1, vr_prediction.shape[1])



            
            error = ((vm_sub * (torch.clamp(vr_prediction, -10., 10.) - vr_sub) ** 2).sum() / vm_sub.sum()).sqrt().item()
            error_train = ((tm * (torch.clamp(tr_prediction, -10., 10.) - tr) ** 2).sum() / tm.sum()).sqrt().item()

            print('.-^-._' * 12)
            print('epoch:', i, 'validation rmse:', error, 'train rmse:', error_train)
            print('.-^-._' * 12)


In [None]:
# tr = create_sparse_matrix(train_data)
# vr = create_sparse_matrix(validation_data)

tr, tr_mean = normalize_data(train_data)
vr, vr_mean = normalize_data(validation_data)


tr = tr.clone().detach().to(device)
vr = vr.clone().detach().to(device)
tr_mean = tr_mean.clone().detach().to(device)
vr_mean = vr_mean.clone().detach().to(device)




model = KernelNet(n_jokes, n_hid, n_layers, lambda_s, lambda_2)

train(model, tr, vr, n_epoch, output_every, device, tr_mean, vr_mean, num_users_sample=1000)





.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._
epoch: 0 validation rmse: 10.33069896697998 train rmse: 10.337873458862305
.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._
.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._
epoch: 1 validation rmse: 10.325453758239746 train rmse: 10.280762672424316
.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._
.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._
epoch: 2 validation rmse: 10.245012283325195 train rmse: 10.1530122756958
.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._
.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._
epoch: 3 validation rmse: 10.178176879882812 train rmse: 10.121927261352539
.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._
.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._
epoch: 4 validation rmse: 10.1300792694091