In [1]:
# This needs to be here because by default Jupyter only adds the pwd to sys.path
import os, sys
if os.path.abspath('..') not in sys.path: sys.path.append(os.path.abspath('..'))

import pandas as pd
import numpy as np
from pysrc.constants import datapath, N_ITEMS, N_USERS
from pysrc.constants import cachepath, datapath, chartpath
import torch
import time
from torch.utils.data import DataLoader
from torch.utils.data import TensorDataset

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
train_dict = {}
test_dict = {}

with open(datapath("train.txt")) as file:
    for line in file:
        (user_id, items) = line.split(maxsplit=1)
        train_dict[user_id] = items

with open(datapath("test.txt")) as file:
    for line in file:
        split = line.split(maxsplit=1)
        if (len(split) > 1):
            (user_id, items) = split
            test_dict[user_id] = items
        else:
            test_dict[split[0]] = ''


In [3]:
train = []
test = []
for key in train_dict:
    train.append([int(n) for n in train_dict[key].replace('\n', '').split(' ')])

for key in test_dict:
    if len(test_dict[key]) == 0:
        test.append([])
    else:
        test.append([int(n) for n in test_dict[key].replace('\n', '').split(' ')])
        

In [4]:
x = torch.rand((10,5))
x

tensor([[0.9811, 0.2539, 0.5083, 0.7178, 0.3479],
        [0.1886, 0.4130, 0.8252, 0.1679, 0.7756],
        [0.0905, 0.2620, 0.8830, 0.2841, 0.3204],
        [0.7884, 0.7458, 0.5503, 0.4397, 0.6396],
        [0.7081, 0.9715, 0.6819, 0.4225, 0.4962],
        [0.0483, 0.9642, 0.5841, 0.1368, 0.0916],
        [0.9087, 0.3463, 0.0519, 0.7991, 0.4298],
        [0.3684, 0.9790, 0.2057, 0.2134, 0.1930],
        [0.3068, 0.8943, 0.5927, 0.3756, 0.6256],
        [0.1642, 0.8763, 0.2987, 0.4170, 0.5732]])

In [5]:
x.sum(axis = 1)

tensor([2.8091, 2.3702, 1.8400, 3.1638, 3.2802, 1.8250, 2.5358, 1.9595, 2.7950,
        2.3293])

In [6]:
import torch
from torch import nn
from torch.nn import functional as F
from torch import Tensor


def log_likelihood_loss(y, yhat):
    return -torch.mean(torch.sum(torch.log(yhat+1e-6) * y, axis = 1))

def likelihood_loss(y, yhat):
    return -torch.mean(torch.sum(yhat * y, axis = 1))

class VanillaVAE(nn.Module):


    def __init__(self,
                 input_dim: int,
                 latent_dim: int,
                 hidden_dims = None,
                 kl_weight = .2
                 ):
        super(VanillaVAE, self).__init__()
        
        self.kl_weight = kl_weight

        self.latent_dim = latent_dim
        
        modules = []
        
        if hidden_dims is None:
            hidden_dims = [512, 128]
            
        self.hidden_dims = hidden_dims
            
        modules.append(nn.Linear(input_dim, hidden_dims[0]))
        modules.append(nn.BatchNorm1d(hidden_dims[0]))
        modules.append(nn.LeakyReLU())
        
        # Build Encoder
        for i in range(len(hidden_dims)-1):
            modules.append(nn.Linear(hidden_dims[i], hidden_dims[i+1]))
            modules.append(nn.BatchNorm1d(hidden_dims[i+1]))
            modules.append(nn.LeakyReLU())

        self.encoder = nn.Sequential(*modules)
        self.fc_mu = nn.Linear(hidden_dims[-1], latent_dim)
        self.fc_var = nn.Linear(hidden_dims[-1], latent_dim)

        # Build Decoder
        modules = []

        self.decoder_input = nn.Linear(latent_dim, hidden_dims[-1])

        hidden_dims.reverse()

        for i in range(len(hidden_dims) - 1):
            modules.append(nn.Linear(hidden_dims[i], hidden_dims[i+1]))
            modules.append(nn.BatchNorm1d(hidden_dims[i+1]))
            modules.append(nn.LeakyReLU())



        self.decoder = nn.Sequential(*modules)

        self.final_layer = nn.Sequential(nn.Linear(hidden_dims[-1], input_dim),
                                         nn.Sigmoid()
                                        )
        

    def encode(self, input: Tensor):
        """
        Encodes the input by passing through the encoder network
        and returns the latent codes.
        :param input: (Tensor) Input tensor to encoder [N x C x H x W]
        :return: (Tensor) List of latent codes
        """
        result = self.encoder(input)
        result = torch.flatten(result, start_dim=1)

        # Split the result into mu and var components
        # of the latent Gaussian distribution
        mu = self.fc_mu(result)
        log_var = self.fc_var(result)

        return [mu, log_var]

    def decode(self, z: Tensor):
        """
        Maps the given latent codes
        onto the image space.
        :param z: (Tensor) [B x D]
        :return: (Tensor) [B x C x H x W]
        """
        result = self.decoder_input(z)
        result = result.view(-1, self.hidden_dims[0])
        result = self.decoder(result)
        result = self.final_layer(result)
        result = F.normalize(result, p=1)
        return result

    def reparameterize(self, mu: Tensor, logvar: Tensor) -> Tensor:
        """
        Reparameterization trick to sample from N(mu, var) from
        N(0,1).
        :param mu: (Tensor) Mean of the latent Gaussian [B x D]
        :param logvar: (Tensor) Standard deviation of the latent Gaussian [B x D]
        :return: (Tensor) [B x D]
        """
        std = torch.exp(0.5 * logvar)
        eps = torch.randn_like(std)
        return eps * std + mu

    def forward(self, x: Tensor, **kwargs):
        mu, log_var = self.encode(x)
        z = self.reparameterize(mu, log_var)
        return  [F.normalize(self.decode(z), 1, dim=1), x, mu, log_var]

    def loss_function(self, recons, x, mu, log_var) -> dict:
        """
        Computes the VAE loss function.
        KL(N(\mu, \sigma), N(0, 1)) = \log \frac{1}{\sigma} + \frac{\sigma^2 + \mu^2}{2} - \frac{1}{2}
        :param args:
        :param kwargs:
        :return:
        """
        

        kld_weight = self.kl_weight
        recons_loss = log_likelihood_loss(x, recons)

        kld_loss = torch.mean(-0.5 * torch.sum(1 + log_var - mu ** 2 - log_var.exp(), dim = 1), dim = 0)

        loss = recons_loss + kld_weight * kld_loss
        return {'loss': loss, 'Reconstruction_Loss':recons_loss.detach(), 'KLD':-kld_loss.detach()}

    def sample(self,
               num_samples:int,
               current_device: int, **kwargs) -> Tensor:
        """
        Samples from the latent space and return the corresponding
        image space map.
        :param num_samples: (Int) Number of samples
        :param current_device: (Int) Device to run the model
        :return: (Tensor)
        """
        z = torch.randn(num_samples,
                        self.latent_dim)

        z = z.to(current_device)

        samples = self.decode(z)
        return samples

    def generate(self, x: Tensor, **kwargs) -> Tensor:
        """
        Given an input image x, returns the reconstructed image
        :param x: (Tensor) [B x C x H x W]
        :return: (Tensor) [B x C x H x W]
        """

        return self.forward(x)[0]

In [7]:
item_dim = 91599
np.random.seed(0)
test_ids = np.random.choice(np.array(list(train_dict.keys())).astype(int), int(.2*len(train_dict.keys())), replace=False)

In [8]:
def sparsify_uniform(data, p_relative = .1):
    current_sparse = None
    batch_size = 1000
    X = []
    for row in data:
        X.append(torch.zeros(item_dim).bool())
        for item in row:
            if np.random.random() > p_relative:
                X[-1][item] = 1
        X[-1] = X[-1]
    X = torch.stack(X)
    return X
    
def sparsify_items(data, m, sigma):
    return

def sparsify_users(data, m, sigma):
    return

def list_batch_to_ohe(data):
    return sparsify_uniform(data, 0)



In [9]:
def train_model(model, train, val, n_epochs = 100):
    for epoch in range(n_epochs):
        
        model.train()
        start_time = time.time()
        total_loading_time = 0
        for batch_X, batch_Y in train:
            batch_X = batch_X.float().cuda()
            batch_Y = batch_Y.float().cuda()
            recons, x, mu, log_var = model.forward(batch_X)
            train_loss = model.loss_function(recons, batch_Y, mu, log_var)['loss']
            optimizer.zero_grad()
            train_loss.backward()
            optimizer.step()
        scheduler.step()
        if epoch % 10 == 0:
            print(f"Done Epoch {epoch}")
            with torch.no_grad():
                model.eval()
                total_train_loss = 0
                r_train_loss = 0
                kl_train_loss = 0
                train_batches = 0
                for batch_X, batch_Y in train:
                    batch_X = batch_X.float().cuda()
                    batch_Y = batch_Y.float().cuda()
                    recons, x, mu, log_var = model.forward(batch_X)
                    loss = model.loss_function(recons, batch_Y, mu, log_var)
                    total_train_loss += loss['loss']
                    r_train_loss += loss['Reconstruction_Loss']
                    kl_train_loss += loss['KLD']
                    train_batches += 1

                total_train_loss /= train_batches
                r_train_loss /= train_batches
                kl_train_loss /= train_batches

                total_loss = 0
                r_loss = 0
                kl_loss = 0
                batches = 0
                for batch_X, batch_Y in val:
                    batch_X = batch_X.float().cuda()
                    batch_Y = batch_Y.float().cuda()
                    recons, x, mu, log_var = model.forward(batch_X)
                    loss = model.loss_function(recons, batch_Y, mu, log_var)
                    total_loss += loss['loss']
                    r_loss += loss['Reconstruction_Loss']
                    kl_loss += loss['KLD']
                    batches += 1

                total_loss /= batches
                r_loss /= batches
                kl_loss /= batches
            print("Train Loss", total_train_loss)
            print("Val Loss", total_loss)
            
def train_model_old(model, train, val, n_epochs = 100):
    for epoch in range(n_epochs):
        if epoch % 10 == 0:
            print(f"Start Epoch {epoch}")
        model.train()
        start_time = time.time()
        total_loading_time = 0
        for batch_X, batch_Y in train:
            batch_X = batch_X.float().cuda()
            recons, x, mu, log_var = model.forward(batch_X)
            train_loss = model.loss_function(recons, batch_X, mu, log_var)['loss']
            optimizer.zero_grad()
            train_loss.backward()
            optimizer.step()
        scheduler.step()
        '''
        with torch.no_grad():
            model.eval()
            total_train_loss = 0
            r_train_loss = 0
            kl_train_loss = 0
            train_batches = 0
            for batch_X, batch_Y in train:
                batch_X = batch_X.float().cuda()
                recons, x, mu, log_var = model.forward(batch_X)
                loss = model.loss_function(recons, batch_X, mu, log_var)
                total_train_loss += loss['loss']
                r_train_loss += loss['Reconstruction_Loss']
                kl_train_loss += loss['KLD']
                train_batches += 1

            total_train_loss /= train_batches
            r_train_loss /= train_batches
            kl_train_loss /= train_batches

            total_loss = 0
            r_loss = 0
            kl_loss = 0
            batches = 0
            for batch_X, batch_Y in val:
                batch_X = batch_X.float().cuda()
                recons, x, mu, log_var = model.forward(batch_X)
                loss = model.loss_function(recons, batch_X, mu, log_var)
                total_loss += loss['loss']
                r_loss += loss['Reconstruction_Loss']
                kl_loss += loss['KLD']
                batches += 1

            total_loss /= batches
            r_loss /= batches
            kl_loss /= batches
        '''
def top_k_recall(X_in, X_out, X_target, k = 20, mask_in = True):
    if mask_in:
        mask = X_in == False
        X_out = X_out * mask
    topk = torch.topk(X_out, k)
    n = 0
    total_recall = 0
    for i in range(len(X_in)):
        if int(X_target[i].sum()) == 0:
            continue
        selected = topk.indices[i]
        total_recall += X_target[i][selected].sum() / X_target[i].sum()
        n += 1
    
    return total_recall / n

def n_recall(X_in, X_out, X_target, mask_in = True):
    if mask_in:
        mask = X_in == False
        X_out = X_out * mask
        X_target = X_target * mask
    topk = torch.topk(X_out, int(X_target.sum(axis=1).max()), sorted=True)
    n = 0
    total_recall = 0
    for i in range(len(X_in)):
        if int(X_target[i].sum()) == 0:
            continue
        selected = topk.indices[i]
        total_recall += X_target[i][selected[:int(X_target[i].sum())]].sum() / int(X_target[i].sum())
        n += 1
    #print(total_recall,n)
    #print(len(X_in))
    return total_recall / len(X_in)

In [10]:
# This needs to be here because by default Jupyter only adds the pwd to sys.path
import os, sys
if os.path.abspath('..') not in sys.path: sys.path.append(os.path.abspath('..'))

import torch
import time
import scipy
import pickle 
import pandas as pd
import numpy as np

from tqdm.auto import tqdm
from pysrc.constants import datapath#, N_ITEMS, N_USERS
from torch.utils.data import Dataset
from scipy.sparse import csr_matrix, coo_matrix
from pathlib import Path
from collections import defaultdict

In [11]:
def pairwise_jaccard_sparse(csr):
    """Computes the Jaccard distance between the rows of `csr`,
    smaller than the cut-off distance `epsilon`.
    """
    csr = csr_matrix(csr).astype(bool).astype(int)

    csr_rownnz = csr.getnnz(axis=1)
    intrsct = csr.dot(csr.T)

    nnz_i = np.repeat(csr_rownnz, intrsct.getnnz(axis=1))
    unions = nnz_i + csr_rownnz[intrsct.indices] - intrsct.data
    dists = intrsct.data / unions

    out = csr_matrix((dists, intrsct.indices, intrsct.indptr), intrsct.shape)
    return out

In [45]:
sparse_train_data = torch.load(datapath(f"train/uniform/uniform100_data.pt"))

In [47]:
with open(datapath(f"test/uniform/uniform100_test_indices.pickle"), "rb") as f:
    sparse_test_data = pickle.load(f)

In [48]:
sim = pairwise_jaccard_sparse(sparse_train_data.to_dense())
for i in range(sim.shape[0]):
    sim[i,i] = 0

In [49]:
def sparsify_sparse_uniform(sparse_X, p_relative = .1):
    X = []
    for i in range(sparse_train_data.shape[0]):
        if i % 1000 == 0:
            print(i)
        X.append(torch.zeros(item_dim).bool())
        row = sparse_X[i].coalesce().indices()[0]
        X[-1][row] = 1
    X = torch.stack(X)
    return X
    
def sparse_to_ohe(data):
    return sparsify_sparse_uniform(data, 0)

In [50]:
dev_df = sparse_train_data.bool().to_dense()
test_df = list_batch_to_ohe(pd.Series(sparse_test_data))

In [51]:
len(dev_df)

52643

In [56]:
test_mask = torch.zeros(N_USERS)
test_mask[test_ids] = 1

In [53]:
dev_in = dev_df[test_mask == 0]
dev_out = test_df[test_mask == 0]
test_in_tensor = dev_df[test_mask == 1]
test_out_tensor = test_df[test_mask == 1]

train_uids = (1-test_mask).nonzero().reshape(-1)[:int(.7*len(dev_in))]
val_uids = (1-test_mask).nonzero().reshape(-1)[int(.7*len(dev_in)):]
test_uids = test_mask.nonzero().reshape(-1)

train_in_tensor = dev_in[:int(.7*len(dev_in))]
val_in_tensor = dev_in[int(.7*len(dev_in)):]

train_out_tensor = dev_out[:int(.7*len(dev_in))]
val_out_tensor = dev_out[int(.7*len(dev_in)):]


In [54]:
import torch
from torch import nn
from torch.nn import functional as F
from torch import Tensor

def log_likelihood_loss(y, yhat):
    return -torch.mean(torch.sum(torch.log(yhat+1e-6) * y, axis = 1))

def likelihood_loss(y, yhat):
    return -torch.mean(torch.sum(yhat * y, axis = 1))

class GraphVAE(nn.Module):

    def __init__(self,
                 input_dim: int,
                 latent_dim: int,
                 hidden_dims = None,
                 kl_weight = .2,
                 sim = None
                 ):
        super(GraphVAE, self).__init__()
        
        self.kl_weight = kl_weight

        self.latent_dim = latent_dim
        
        self.embeddings = torch.zeros(N_USERS, self.latent_dim).cuda()
        
        coo = sim.tocoo()
        
        values = coo.data
        indices = np.vstack((coo.row, coo.col))

        i = torch.LongTensor(indices)
        v = torch.FloatTensor(values)
        shape = coo.shape

        self.neighbors = torch.sparse.FloatTensor(i, v, torch.Size(shape)).cuda()

        
        modules = []
        
        if hidden_dims is None:
            hidden_dims = [512, 128]
            
        self.hidden_dims = hidden_dims
            
        modules.append(nn.Linear(input_dim, hidden_dims[0]))
        modules.append(nn.BatchNorm1d(hidden_dims[0]))
        modules.append(nn.LeakyReLU())
        
        # Build Encoder
        for i in range(len(hidden_dims)-1):
            modules.append(nn.Linear(hidden_dims[i], hidden_dims[i+1]))
            modules.append(nn.BatchNorm1d(hidden_dims[i+1]))
            modules.append(nn.LeakyReLU())

        self.encoder = nn.Sequential(*modules)
        self.fc_mu = nn.Linear(hidden_dims[-1], latent_dim)
        self.fc_var = nn.Linear(hidden_dims[-1], latent_dim)

        # Build Decoder
        modules = []

        self.decoder_input = nn.Linear(2 * latent_dim, hidden_dims[-1])

        hidden_dims.reverse()

        for i in range(len(hidden_dims) - 1):
            modules.append(nn.Linear(hidden_dims[i], hidden_dims[i+1]))
            modules.append(nn.BatchNorm1d(hidden_dims[i+1]))
            modules.append(nn.LeakyReLU())



        self.decoder = nn.Sequential(*modules)

        self.final_layer = nn.Sequential(nn.Linear(hidden_dims[-1], input_dim),
                                         nn.Sigmoid()
                                        )
        

    def encode(self, input: Tensor):
        """
        Encodes the input by passing through the encoder network
        and returns the latent codes.
        :param input: (Tensor) Input tensor to encoder [N x C x H x W]
        :return: (Tensor) List of latent codes
        """
        result = self.encoder(input)
        result = torch.flatten(result, start_dim=1)
        # Split the result into mu and var components
        # of the latent Gaussian distribution
        mu = self.fc_mu(result)
        log_var = self.fc_var(result)

        return [mu, log_var]

    def decode(self, z: Tensor):
        """
        Maps the given latent codes
        onto the image space.
        :param z: (Tensor) [B x D]
        :return: (Tensor) [B x C x H x W]
        """
        result = self.decoder_input(z)
        result = result.view(-1, self.hidden_dims[0])
        result = self.decoder(result)
        result = self.final_layer(result)
        result = F.normalize(result, p=1)
        return result

    def reparameterize(self, mu: Tensor, logvar: Tensor) -> Tensor:
        """
        Reparameterization trick to sample from N(mu, var) from
        N(0,1).
        :param mu: (Tensor) Mean of the latent Gaussian [B x D]
        :param logvar: (Tensor) Standard deviation of the latent Gaussian [B x D]
        :return: (Tensor) [B x D]
        """
        std = torch.exp(0.5 * logvar)
        eps = torch.randn_like(std)
        return eps * std + mu

    def forward(self, x: Tensor, uids):
        mu, log_var = self.encode(x)
        z = self.reparameterize(mu, log_var)
        self.embeddings[uids] = z.detach()
        neighbors = torch.stack([self.neighbors[uid].to_dense() for uid in uids])
        for i in range(len(uids)):
            neighbors[i, uids[i]] = 0
            
        neighbors[:,test_ids] = 0
        
        neighbor_embeds = ((neighbors @ self.embeddings).T / (torch.sum(neighbors, axis = 1) + 1e-5)).T
        
        z = torch.cat([z, neighbor_embeds], axis=1)
        
        return  [F.normalize(self.decode(z), 1, dim=1), x, mu, log_var]
    
        
    def set_embeddings(self, x, uids):
        mu, log_var = self.encode(x)
        z = self.reparameterize(mu, log_var)
        self.embeddings[uids] = z.detach()

    def loss_function(self, recons, x, mu, log_var) -> dict:
        """
        Computes the VAE loss function.
        KL(N(\mu, \sigma), N(0, 1)) = \log \frac{1}{\sigma} + \frac{\sigma^2 + \mu^2}{2} - \frac{1}{2}
        :param args:
        :param kwargs:
        :return:
        """
        

        kld_weight = self.kl_weight
        recons_loss = log_likelihood_loss(x, recons)

        kld_loss = torch.mean(-0.5 * torch.sum(1 + log_var - mu ** 2 - log_var.exp(), dim = 1), dim = 0)

        loss = recons_loss + kld_weight * kld_loss
        return {'loss': loss, 'Reconstruction_Loss':recons_loss.detach(), 'KLD':-kld_loss.detach()}

    def sample(self,
               num_samples:int,
               current_device: int, **kwargs) -> Tensor:
        """
        Samples from the latent space and return the corresponding
        image space map.
        :param num_samples: (Int) Number of samples
        :param current_device: (Int) Device to run the model
        :return: (Tensor)
        """
        z = torch.randn(num_samples,
                        self.latent_dim)

        z = z.to(current_device)

        samples = self.decode(z)
        return samples

    def generate(self, x: Tensor, **kwargs) -> Tensor:
        """
        Given an input image x, returns the reconstructed image
        :param x: (Tensor) [B x C x H x W]
        :return: (Tensor) [B x C x H x W]
        """

        return self.forward(x)[0]

In [55]:
def train_graph_model(model, train, val, n_epochs = 100):
    for epoch in range(n_epochs):
        
        model.train()
        start_time = time.time()
        total_loading_time = 0
        for batch_X, batch_Y, batch_ind in train:
            batch_X = batch_X.float().cuda()
            batch_Y = batch_Y.float().cuda()
            recons, x, mu, log_var = model.forward(batch_X, batch_ind)
            train_loss = model.loss_function(recons, batch_Y, mu, log_var)['loss']
            optimizer.zero_grad()
            train_loss.backward()
            optimizer.step()
        scheduler.step()
        if epoch % 10 == 0:
            print(f"Done Epoch {epoch}")

            with torch.no_grad():
                model.eval()
                total_train_loss = 0
                r_train_loss = 0
                kl_train_loss = 0
                train_batches = 0
                for batch_X, batch_Y, batch_ind in train:
                    batch_X = batch_X.float().cuda()
                    batch_Y = batch_Y.float().cuda()
                    recons, x, mu, log_var = model.forward(batch_X, batch_ind)
                    loss = model.loss_function(recons, batch_Y, mu, log_var)
                    total_train_loss += loss['loss']
                    r_train_loss += loss['Reconstruction_Loss']
                    kl_train_loss += loss['KLD']
                    train_batches += 1

                total_train_loss /= train_batches
                r_train_loss /= train_batches
                kl_train_loss /= train_batches

                total_loss = 0
                r_loss = 0
                kl_loss = 0
                batches = 0
                for batch_X, batch_Y, batch_ind in val:
                    batch_X = batch_X.float().cuda()
                    batch_Y = batch_Y.float().cuda()
                    recons, x, mu, log_var = model.forward(batch_X, batch_ind)
                    loss = model.loss_function(recons, batch_Y, mu, log_var)
                    total_loss += loss['loss']
                    r_loss += loss['Reconstruction_Loss']
                    kl_loss += loss['KLD']
                    batches += 1

                total_loss /= batches
                r_loss /= batches
                kl_loss /= batches
                
            print("Train Loss", total_train_loss)
            print("Val Loss", total_loss)

In [57]:
n_epochs = 30
batch_size = 256

sparsity = 0

results = {}

for kl_weight in [1]:#, .01, .02, .03, .05, .1, .2, .3, .4, .5, .6, .7, .9]:
    print()
    print("Starting kl", kl_weight)
    start = time.time()
    
    train_dataset = TensorDataset(train_in_tensor, train_in_tensor + train_out_tensor)
    val_dataset = TensorDataset(val_in_tensor, val_in_tensor + val_out_tensor)
    test_dataset = TensorDataset(test_in_tensor, test_in_tensor + test_out_tensor)
    # Create a data loader from the dataset
    # Type of sampling and batch size are specified at this step
    train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_dataloader = DataLoader(val_dataset, batch_size=batch_size, shuffle=True)
    test_dataloader = DataLoader(test_dataset, batch_size=batch_size, shuffle=True)

    lr = 1e-3
    model = VanillaVAE(input_dim = item_dim, latent_dim = 256, hidden_dims = [512, 256], kl_weight = kl_weight).cuda()
    optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=1e-5)
    scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.98)
    train_model(model, train_dataloader, val_dataloader, n_epochs = n_epochs)
    
    model.eval()
    test_ind_dataloader = DataLoader(torch.Tensor(range(len(test_in_tensor))).long(), batch_size=batch_size, shuffle=True)
    with torch.no_grad():
        total_n_recall_train = 0
        total_n_train = 0
        for batch_X, batch_Y in train_dataloader:
            batch_out = model.forward(batch_X.float().cuda())[0].detach().cpu()
            n_recall_batch = n_recall(batch_X, batch_out, batch_X, False)
            total_n_recall_train += n_recall_batch * len(batch_X)
            total_n_train += len(batch_X)
        print("Train recons recall:", total_n_recall_train / total_n_train)
        train_recons_recall = total_n_recall_train / total_n_train
        
        total_n_recall_out = 0
        total_n_out = 0
        for batch_X, batch_Y in train_dataloader:
            batch_out = model.forward(batch_X.float().cuda())[0].detach().cpu()
            n_recall_batch = n_recall(batch_X, batch_out, batch_Y, True)
            total_n_recall_out += n_recall_batch * len(batch_X)
            total_n_out += len(batch_X)
        print("Train pred recall:", total_n_recall_out / total_n_out)
        train_out_recall = total_n_recall_out / total_n_out
        
        total_n_recall_recons = 0
        total_n_recons = 0
        for batch_X, batch_Y in test_dataloader:
            batch_out = model.forward(batch_X.float().cuda())[0].detach().cpu()
            n_recall_batch = n_recall(batch_X, batch_out, batch_X, False)
            total_n_recall_recons += n_recall_batch * len(batch_ind)
            total_n_recons += len(batch_ind)
        print("Test recons recall:", total_n_recall_recons / total_n_recons)
        test_recons_recall = total_n_recall_recons / total_n_recons
        
        
        total_n_recall_out = 0
        total_n_out = 0
        for batch_X, batch_Y in test_dataloader:
            batch_out = model.forward(batch_X.float().cuda())[0].detach().cpu()
            n_recall_batch = n_recall(batch_X, batch_out, batch_Y, True)
            total_n_recall_out += n_recall_batch * len(batch_ind)
            total_n_out += len(batch_ind)
        print("Test outsample recall:", total_n_recall_out / total_n_out)
        test_outsample_recall = total_n_recall_out / total_n_out
    results[sparsity] = {"train_recons": train_recons_recall, "test_recons": test_recons_recall, "test_out": test_outsample_recall}
    print(time.time() - start)


Starting kl 1
Done Epoch 0
Train Loss tensor(674.4882, device='cuda:0')
Val Loss tensor(496.8293, device='cuda:0')
Done Epoch 10
Train Loss tensor(629.7322, device='cuda:0')
Val Loss tensor(423.8629, device='cuda:0')
Done Epoch 20
Train Loss tensor(525.0468, device='cuda:0')
Val Loss tensor(406.8112, device='cuda:0')
Train recons recall: tensor(0.1500)
Train pred recall: tensor(0.0492)


RuntimeError: mat1 and mat2 must have the same dtype

In [60]:
total_n_recall_recons = 0
total_n_recons = 0
for batch_X, batch_Y in test_dataloader:
    batch_out = model.forward(batch_X.float().cuda())[0].detach().cpu()
    n_recall_batch = n_recall(batch_X, batch_out, batch_X, False)
    total_n_recall_recons += n_recall_batch * len(batch_X)
    total_n_recons += len(batch_X)
print("Test recons recall:", total_n_recall_recons / total_n_recons)
test_recons_recall = total_n_recall_recons / total_n_recons


total_n_recall_out = 0
total_n_out = 0
for batch_X, batch_Y in test_dataloader:
    batch_out = model.forward(batch_X.float().cuda())[0].detach().cpu()
    n_recall_batch = n_recall(batch_X, batch_out, batch_Y, True)
    total_n_recall_out += n_recall_batch * len(batch_X)
    total_n_out += len(batch_X)
print("Test outsample recall:", total_n_recall_out / total_n_out)

Test recons recall: tensor(0.1049)
Test outsample recall: tensor(0.0238)


In [61]:
n_epochs = 40
batch_size = 256

sparsity = 0

results = {}

for kl_weight in [1]:#, .01, .02, .03, .05, .1, .2, .3, .4, .5, .6, .7, .9]:
    print()
    print("Starting kl", kl_weight)
    start = time.time()
    
    train_dataset = TensorDataset(train_in_tensor, train_in_tensor + train_out_tensor, train_uids)
    val_dataset = TensorDataset(val_in_tensor, val_in_tensor + val_out_tensor, val_uids)
    test_dataset = TensorDataset(test_in_tensor, test_in_tensor + test_out_tensor, test_uids)
    # Create a data loader from the dataset
    # Type of sampling and batch size are specified at this step
    train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_dataloader = DataLoader(val_dataset, batch_size=batch_size, shuffle=True)
    test_dataloader = DataLoader(test_dataset, batch_size=batch_size, shuffle=True)


    lr = 2e-3
    graph_model = GraphVAE(input_dim = item_dim, latent_dim = 256, hidden_dims = [512, 256], kl_weight = kl_weight, sim = sim).cuda()
    optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=1e-5)
    scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.98)
    train_graph_model(graph_model, train_dataloader, val_dataloader, n_epochs = n_epochs)
    


Starting kl 1
Done Epoch 0
Train Loss tensor(5867.1064, device='cuda:0')
Val Loss tensor(536.0150, device='cuda:0')


ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.



Traceback (most recent call last):
  File "c:\users\nick\appdata\local\programs\python\python37\lib\site-packages\IPython\core\interactiveshell.py", line 3553, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "C:\Users\Nick\AppData\Local\Temp\ipykernel_17616\643451530.py", line 27, in <module>
    train_graph_model(graph_model, train_dataloader, val_dataloader, n_epochs = n_epochs)
  File "C:\Users\Nick\AppData\Local\Temp\ipykernel_17616\3366922158.py", line 10, in train_graph_model
    recons, x, mu, log_var = model.forward(batch_X, batch_ind)
  File "C:\Users\Nick\AppData\Local\Temp\ipykernel_17616\2008905878.py", line 129, in forward
    neighbors = torch.stack([self.neighbors[uid].to_dense() for uid in uids])
  File "C:\Users\Nick\AppData\Local\Temp\ipykernel_17616\2008905878.py", line 129, in <listcomp>
    neighbors = torch.stack([self.neighbors[uid].to_dense() for uid in uids])
KeyboardInterrupt

During handling of the above exception, another exception o

TypeError: object of type 'NoneType' has no len()

In [None]:
model.eval()
with torch.no_grad():
    for batch_X, batch_Y, batch_ind in train_dataloader:
        model.set_embeddings(batch_X, batch_ind)
    for batch_X, batch_Y, batch_ind in val_dataloader:
        model.set_embeddings(batch_X, batch_ind)
    for batch_X, batch_Y, batch_ind in test_dataloader:
        model.set_embeddings(batch_X, batch_ind)
    
    total_n_recall_recons = 0
    total_n_recons = 0
    for batch_X, batch_Y, batch_ind in val_dataloader:
        batch_out = model.forward(batch_X.float().cuda(), batch_ind)[0].detach().cpu()
        n_recall_batch = n_recall(batch_X, batch_out, batch_X, False)
        total_n_recall_recons += n_recall_batch * len(batch_ind)
        total_n_recons += len(batch_ind)
    print("Val recons recall:", total_n_recall_recons / total_n_recons)
    test_recons_recall = total_n_recall_recons / total_n_recons


    total_n_recall_out = 0
    total_n_out = 0
    for batch_X, batch_Y, batch_ind in val_dataloader:
        batch_out = model.forward(batch_X.float().cuda(), batch_ind)[0].detach().cpu()
        n_recall_batch = n_recall(batch_X, batch_out, batch_Y, True)
        total_n_recall_out += n_recall_batch * len(batch_ind)
        total_n_out += len(batch_ind)
    print("Val outsample recall:", total_n_recall_out / total_n_out)
    test_outsample_recall = total_n_recall_out / total_n_out
    
    total_n_recall_recons = 0
    total_n_recons = 0
    for batch_X, batch_Y, batch_ind in test_dataloader:
        batch_out = model.forward(batch_X.float().cuda(), batch_ind)[0].detach().cpu()
        n_recall_batch = n_recall(batch_X, batch_out, batch_X, False)
        total_n_recall_recons += n_recall_batch * len(batch_ind)
        total_n_recons += len(batch_ind)
    print("Test recons recall:", total_n_recall_recons / total_n_recons)
    test_recons_recall = total_n_recall_recons / total_n_recons


    total_n_recall_out = 0
    total_n_out = 0
    for batch_X, batch_Y, batch_ind in test_dataloader:
        batch_out = model.forward(batch_X.float().cuda(), batch_ind)[0].detach().cpu()
        n_recall_batch = n_recall(batch_X, batch_out, batch_Y, True)
        total_n_recall_out += n_recall_batch * len(batch_ind)
        total_n_out += len(batch_ind)
    print("Test outsample recall:", total_n_recall_out / total_n_out)
    test_outsample_recall = total_n_recall_out / total_n_out

In [36]:
def train_and_eval_base(train_in_tensor, train_out_tensor, val_in_tensor, val_out_tensor, test_in_tensor, test_out_tensor):
    train_dataset = TensorDataset(train_in_tensor, train_in_tensor + train_out_tensor)
    val_dataset = TensorDataset(val_in_tensor, val_in_tensor + val_out_tensor)
    test_dataset = TensorDataset(test_in_tensor, test_in_tensor + test_out_tensor)
    # Create a data loader from the dataset
    # Type of sampling and batch size are specified at this step
    train_dataloader = DataLoader(train_dataset, batch_size=256, shuffle=True)
    val_dataloader = DataLoader(val_dataset, batch_size=256, shuffle=True)
    test_dataloader = DataLoader(test_dataset, batch_size=256, shuffle=True)
    
    lr = 1e-3
    model = VanillaVAE(input_dim = item_dim, latent_dim = 256, hidden_dims = [512, 256], kl_weight = 1).cuda()
    optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=1e-5)
    scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.98)
    train_model(model, train_dataloader, val_dataloader, n_epochs = 30)
    
    model.eval()
    test_ind_dataloader = DataLoader(torch.Tensor(range(len(test_in_tensor))).long(), batch_size=batch_size, shuffle=True)
    with torch.no_grad():
        total_n_recall_train = 0
        total_n_train = 0
        for batch_X, batch_Y in train_dataloader:
            batch_out = model.forward(batch_X.float().cuda())[0].detach().cpu()
            n_recall_batch = n_recall(batch_X, batch_out, batch_X, False)
            total_n_recall_train += n_recall_batch * len(batch_X)
            total_n_train += len(batch_X)
        print("Train recons recall:", total_n_recall_train / total_n_train)
        train_recons_recall = total_n_recall_train / total_n_train
        
        total_n_recall_out = 0
        total_n_out = 0
        for batch_X, batch_Y in train_dataloader:
            batch_out = model.forward(batch_X.float().cuda())[0].detach().cpu()
            n_recall_batch = n_recall(batch_X, batch_out, batch_Y, True)
            total_n_recall_out += n_recall_batch * len(batch_X)
            total_n_out += len(batch_X)
        print("Train pred recall:", total_n_recall_out / total_n_out)
        train_out_recall = total_n_recall_out / total_n_out
        
        total_n_recall_recons = 0
        total_n_recons = 0
        for batch_X, batch_Y in test_dataloader:
            batch_out = model.forward(batch_X.cuda())[0].detach().cpu()
            n_recall_batch = n_recall(batch_X, batch_out, batch_X, False)
            total_n_recall_recons += n_recall_batch * len(batch_ind)
            total_n_recons += len(batch_ind)
        print("Test recons recall:", total_n_recall_recons / total_n_recons)
        test_recons_recall = total_n_recall_recons / total_n_recons
        
        total_n_recall_out = 0
        total_n_out = 0
        for batch_X, batch_Y in test_dataloader:
            batch_out = model.forward(batch_X.cuda())[0].detach().cpu()
            n_recall_batch = n_recall(batch_X, batch_out, batch_Y, True)
            total_n_recall_out += n_recall_batch * len(batch_ind)
            total_n_out += len(batch_ind)
        print("Test outsample recall:", total_n_recall_out / total_n_out)
        test_outsample_recall = total_n_recall_out / total_n_out
    return model, {"train_recons": train_recons_recall, "train_out": train_out_recall, "test_recons": test_recons_recall, "test_out": test_outsample_recall}

def train_and_eval_graph(sim, train_in_tensor, train_out_tensor, train_uids, val_in_tensor, val_out_tensor, val_uids, test_in_tensor, test_out_tensor, test_uids):
    train_dataset = TensorDataset(train_in_tensor, train_in_tensor + train_out_tensor, train_uids)
    val_dataset = TensorDataset(val_in_tensor, val_in_tensor + val_out_tensor, val_uids)
    test_dataset = TensorDataset(test_in_tensor, test_in_tensor + test_out_tensor, test_uids)
    # Create a data loader from the dataset
    # Type of sampling and batch size are specified at this step
    train_dataloader = DataLoader(train_dataset, batch_size=256, shuffle=True)
    val_dataloader = DataLoader(val_dataset, batch_size=256, shuffle=True)
    test_dataloader = DataLoader(test_dataset, batch_size=256, shuffle=True)
    lr = 2e-3
    graph_model = GraphVAE(input_dim = item_dim, latent_dim = 256, hidden_dims = [512, 256], kl_weight = 1, sim = sim).cuda()
    optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=1e-5)
    scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.98)
    train_graph_model(graph_model, train_dataloader, val_dataloader, n_epochs = 40)
    
    model.eval()
    with torch.no_grad():
        for batch_X, batch_Y, batch_ind in train_dataloader:
            model.set_embeddings(batch_X, batch_ind)
        for batch_X, batch_Y, batch_ind in val_dataloader:
            model.set_embeddings(batch_X, batch_ind)
        for batch_X, batch_Y, batch_ind in test_dataloader:
            model.set_embeddings(batch_X, batch_ind)

        total_n_recall_recons = 0
        total_n_recons = 0
        for batch_X, batch_Y, batch_ind in val_dataloader:
            batch_out = model.forward(batch_X.float().cuda(), batch_ind)[0].detach().cpu()
            n_recall_batch = n_recall(batch_X, batch_out, batch_X, False)
            total_n_recall_recons += n_recall_batch * len(batch_ind)
            total_n_recons += len(batch_ind)
        print("Val recons recall:", total_n_recall_recons / total_n_recons)
        test_recons_recall = total_n_recall_recons / total_n_recons


        total_n_recall_out = 0
        total_n_out = 0
        for batch_X, batch_Y, batch_ind in val_dataloader:
            batch_out = model.forward(batch_X.float().cuda(), batch_ind)[0].detach().cpu()
            n_recall_batch = n_recall(batch_X, batch_out, batch_Y, True)
            total_n_recall_out += n_recall_batch * len(batch_ind)
            total_n_out += len(batch_ind)
        print("Val outsample recall:", total_n_recall_out / total_n_out)
        test_outsample_recall = total_n_recall_out / total_n_out

        total_n_recall_recons = 0
        total_n_recons = 0
        for batch_X, batch_Y, batch_ind in test_dataloader:
            batch_out = model.forward(batch_X.float().cuda(), batch_ind)[0].detach().cpu()
            n_recall_batch = n_recall(batch_X, batch_out, batch_X, False)
            total_n_recall_recons += n_recall_batch * len(batch_ind)
            total_n_recons += len(batch_ind)
        print("Test recons recall:", total_n_recall_recons / total_n_recons)
        test_recons_recall = total_n_recall_recons / total_n_recons


        total_n_recall_out = 0
        total_n_out = 0
        for batch_X, batch_Y, batch_ind in test_dataloader:
            batch_out = model.forward(batch_X.float().cuda(), batch_ind)[0].detach().cpu()
            n_recall_batch = n_recall(batch_X, batch_out, batch_Y, True)
            total_n_recall_out += n_recall_batch * len(batch_ind)
            total_n_out += len(batch_ind)
        print("Test outsample recall:", total_n_recall_out / total_n_out)
        test_outsample_recall = total_n_recall_out / total_n_out
    return model, {"train_recons": train_recons_recall, "train_out": train_out_recall, "test_recons": test_recons_recall, "test_out": test_outsample_recall}

def compare_on_data(train_path, test_path):
    sparse_train_data = torch.load(train_path)
    sparse_test_data = pickle.load(open(test_path, "rb"))
    sim = pairwise_jaccard_sparse(sparse_train_data.to_dense())
    for i in range(sim.shape[0]):
        sim[i,i] = 0
    dev_df = sparse_train_data.bool().to_dense()
    test_df = list_batch_to_ohe(pd.Series(sparse_test_data))
    test_mask = torch.zeros(N_USERS)
    test_mask[test_ids] = 1
    
    dev_in = dev_df[test_mask == 0]
    dev_out = test_df[test_mask == 0]
    test_in_tensor = dev_df[test_mask == 1]
    test_out_tensor = test_df[test_mask == 1]

    train_uids = (1-test_mask).nonzero().reshape(-1)[:int(.7*len(dev_in))]
    val_uids = (1-test_mask).nonzero().reshape(-1)[int(.7*len(dev_in)):]
    test_uids = test_mask.nonzero().reshape(-1)

    train_in_tensor = dev_in[:int(.7*len(dev_in))]
    val_in_tensor = dev_in[int(.7*len(dev_in)):]

    train_out_tensor = dev_out[:int(.7*len(dev_in))]
    val_out_tensor = dev_out[int(.7*len(dev_in)):]

    base_model, base_results = train_and_eval_base(train_in_tensor, train_out_tensor, val_in_tensor, val_out_tensor, test_in_tensor, test_out_tensor)
    
    graph_model, graph_results = train_and_eval_graph(sim, train_in_tensor, train_out_tensor, train_uids, val_in_tensor, val_out_tensor, val_uids, test_in_tensor, test_out_tensor, test_uids)
    
    return base_model, base_results, graph_model, graph_results

In [37]:
compare_on_data(datapath(f"train/uniform/uniform100_data.pt"), datapath(f"test/uniform/uniform100_test_indices.pickle"))

Done Epoch 0
Train Loss tensor(4357.1841, device='cuda:0')
Val Loss tensor(533.2257, device='cuda:0')
Done Epoch 10
Train Loss tensor(3947.9111, device='cuda:0')
Val Loss tensor(531.8627, device='cuda:0')
Done Epoch 20
Train Loss tensor(3766.6699, device='cuda:0')
Val Loss tensor(533.0829, device='cuda:0')
Train recons recall: tensor(0.0005)
Train pred recall: tensor(0.0001)


RuntimeError: mat1 and mat2 must have the same dtype