In [1]:
# This needs to be here because by default Jupyter only adds the pwd to sys.path
import os, sys
if os.path.abspath('..') not in sys.path: sys.path.append(os.path.abspath('..'))

import pandas as pd
import numpy as np
from pysrc.constants import datapath, N_ITEMS, N_USERS
from pysrc.constants import cachepath, datapath, chartpath
import torch
import time
from torch.utils.data import DataLoader
from torch.utils.data import TensorDataset

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
train_dict = {}
test_dict = {}

with open(datapath("train.txt")) as file:
    for line in file:
        (user_id, items) = line.split(maxsplit=1)
        train_dict[user_id] = items

with open(datapath("test.txt")) as file:
    for line in file:
        split = line.split(maxsplit=1)
        if (len(split) > 1):
            (user_id, items) = split
            test_dict[user_id] = items
        else:
            test_dict[split[0]] = ''


In [3]:
train = []
test = []
for key in train_dict:
    train.append([int(n) for n in train_dict[key].replace('\n', '').split(' ')])

for key in test_dict:
    if len(test_dict[key]) == 0:
        test.append([])
    else:
        test.append([int(n) for n in test_dict[key].replace('\n', '').split(' ')])
        

In [4]:
x = torch.rand((10,5))
x

tensor([[0.6531, 0.8720, 0.9038, 0.5044, 0.4807],
        [0.4298, 0.1258, 0.7266, 0.5757, 0.0359],
        [0.7359, 0.5284, 0.3511, 0.9357, 0.6824],
        [0.5313, 0.4289, 0.7721, 0.8900, 0.9140],
        [0.5219, 0.6540, 0.8513, 0.8602, 0.7988],
        [0.5735, 0.5862, 0.4771, 0.3313, 0.9801],
        [0.4987, 0.7887, 0.4873, 0.8532, 0.7021],
        [0.5870, 0.2700, 0.1193, 0.8724, 0.3943],
        [0.1641, 0.0973, 0.7888, 0.1286, 0.8808],
        [0.9629, 0.5835, 0.2702, 0.8758, 0.2452]])

In [5]:
import torch
from torch import nn
from torch.nn import functional as F
from torch import Tensor


def log_likelihood_loss(y, yhat):
    return -torch.mean(torch.sum(torch.log(yhat+1e-6) * y, axis = 1))

def likelihood_loss(y, yhat):
    return -torch.mean(torch.sum(yhat * y, axis = 1))

class VanillaVAE(nn.Module):


    def __init__(self,
                 input_dim: int,
                 latent_dim: int,
                 hidden_dims = None,
                 kl_weight = .2
                 ):
        super(VanillaVAE, self).__init__()
        
        self.kl_weight = kl_weight

        self.latent_dim = latent_dim
        
        modules = []
        
        if hidden_dims is None:
            hidden_dims = [512, 128]
            
        self.hidden_dims = hidden_dims
            
        modules.append(nn.Linear(input_dim, hidden_dims[0]))
        modules.append(nn.BatchNorm1d(hidden_dims[0]))
        modules.append(nn.LeakyReLU())
        
        # Build Encoder
        for i in range(len(hidden_dims)-1):
            modules.append(nn.Linear(hidden_dims[i], hidden_dims[i+1]))
            modules.append(nn.BatchNorm1d(hidden_dims[i+1]))
            modules.append(nn.LeakyReLU())

        self.encoder = nn.Sequential(*modules)
        self.fc_mu = nn.Linear(hidden_dims[-1], latent_dim)
        self.fc_var = nn.Linear(hidden_dims[-1], latent_dim)

        # Build Decoder
        modules = []

        self.decoder_input = nn.Linear(latent_dim, hidden_dims[-1])

        hidden_dims.reverse()

        for i in range(len(hidden_dims) - 1):
            modules.append(nn.Linear(hidden_dims[i], hidden_dims[i+1]))
            modules.append(nn.BatchNorm1d(hidden_dims[i+1]))
            modules.append(nn.LeakyReLU())



        self.decoder = nn.Sequential(*modules)

        self.final_layer = nn.Sequential(nn.Linear(hidden_dims[-1], input_dim),
                                         nn.Sigmoid()
                                        )
        

    def encode(self, input: Tensor):
        """
        Encodes the input by passing through the encoder network
        and returns the latent codes.
        :param input: (Tensor) Input tensor to encoder [N x C x H x W]
        :return: (Tensor) List of latent codes
        """
        result = self.encoder(input)
        result = torch.flatten(result, start_dim=1)

        # Split the result into mu and var components
        # of the latent Gaussian distribution
        mu = self.fc_mu(result)
        log_var = self.fc_var(result)

        return [mu, log_var]

    def decode(self, z: Tensor):
        """
        Maps the given latent codes
        onto the image space.
        :param z: (Tensor) [B x D]
        :return: (Tensor) [B x C x H x W]
        """
        result = self.decoder_input(z)
        result = result.view(-1, self.hidden_dims[0])
        result = self.decoder(result)
        result = self.final_layer(result)
        result = F.normalize(result, p=1)
        return result

    def reparameterize(self, mu: Tensor, logvar: Tensor) -> Tensor:
        """
        Reparameterization trick to sample from N(mu, var) from
        N(0,1).
        :param mu: (Tensor) Mean of the latent Gaussian [B x D]
        :param logvar: (Tensor) Standard deviation of the latent Gaussian [B x D]
        :return: (Tensor) [B x D]
        """
        std = torch.exp(0.5 * logvar)
        eps = torch.randn_like(std)
        return eps * std + mu

    def forward(self, x: Tensor, **kwargs):
        mu, log_var = self.encode(x)
        z = self.reparameterize(mu, log_var)
        return  [F.normalize(self.decode(z), 1, dim=1), x, mu, log_var]

    def loss_function(self, recons, x, mu, log_var) -> dict:
        """
        Computes the VAE loss function.
        KL(N(\mu, \sigma), N(0, 1)) = \log \frac{1}{\sigma} + \frac{\sigma^2 + \mu^2}{2} - \frac{1}{2}
        :param args:
        :param kwargs:
        :return:
        """
        

        kld_weight = self.kl_weight
        recons_loss = log_likelihood_loss(x, recons)

        kld_loss = torch.mean(-0.5 * torch.sum(1 + log_var - mu ** 2 - log_var.exp(), dim = 1), dim = 0)

        loss = recons_loss + kld_weight * kld_loss
        return {'loss': loss, 'Reconstruction_Loss':recons_loss.detach(), 'KLD':-kld_loss.detach()}

    def sample(self,
               num_samples:int,
               current_device: int, **kwargs) -> Tensor:
        """
        Samples from the latent space and return the corresponding
        image space map.
        :param num_samples: (Int) Number of samples
        :param current_device: (Int) Device to run the model
        :return: (Tensor)
        """
        z = torch.randn(num_samples,
                        self.latent_dim)

        z = z.to(current_device)

        samples = self.decode(z)
        return samples

    def generate(self, x: Tensor, **kwargs) -> Tensor:
        """
        Given an input image x, returns the reconstructed image
        :param x: (Tensor) [B x C x H x W]
        :return: (Tensor) [B x C x H x W]
        """

        return self.forward(x)[0]

In [6]:
item_dim = 91599
np.random.seed(0)
test_ids = np.random.choice(np.array(list(train_dict.keys())).astype(int), int(.2*len(train_dict.keys())), replace=False)

In [7]:
def sparsify_uniform(data, p_relative = .1):
    current_sparse = None
    batch_size = 1000
    X = []
    for row in data:
        X.append(torch.zeros(item_dim).bool())
        for item in row:
            if np.random.random() > p_relative:
                X[-1][item] = 1
        X[-1] = X[-1]
    X = torch.stack(X)
    return X
    
def sparsify_items(data, m, sigma):
    return

def sparsify_users(data, m, sigma):
    return

def list_batch_to_ohe(data):
    return sparsify_uniform(data, 0)



In [8]:
def train_model(model, train, val, n_epochs = 100):
    
    optimizer = torch.optim.Adam(model.parameters(), lr=1e-3, weight_decay=1e-5)
    scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.98)
    
    for epoch in range(n_epochs):
        
        model.train()
        start_time = time.time()
        total_loading_time = 0
        for batch_X, batch_Y in train:
            batch_X = batch_X.float()
            batch_Y = batch_Y.float()
            recons, x, mu, log_var = model.forward(batch_X)
            train_loss = model.loss_function(recons, batch_Y, mu, log_var)['loss']
            optimizer.zero_grad()
            train_loss.backward()
            optimizer.step()
        scheduler.step()
        if epoch % 100 == 0:
            print(f"Done Epoch {epoch}")
            with torch.no_grad():
                model.eval()
                total_train_loss = 0
                r_train_loss = 0
                kl_train_loss = 0
                train_batches = 0
                for batch_X, batch_Y in train:
                    batch_X = batch_X.float()
                    batch_Y = batch_Y.float()
                    recons, x, mu, log_var = model.forward(batch_X)
                    loss = model.loss_function(recons, batch_Y, mu, log_var)
                    total_train_loss += loss['loss']
                    r_train_loss += loss['Reconstruction_Loss']
                    kl_train_loss += loss['KLD']
                    train_batches += 1

                total_train_loss /= train_batches
                r_train_loss /= train_batches
                kl_train_loss /= train_batches

                total_loss = 0
                r_loss = 0
                kl_loss = 0
                batches = 0
                for batch_X, batch_Y in val:
                    batch_X = batch_X.float().cuda()
                    batch_Y = batch_Y.float().cuda()
                    recons, x, mu, log_var = model.forward(batch_X)
                    loss = model.loss_function(recons, batch_Y, mu, log_var)
                    total_loss += loss['loss']
                    r_loss += loss['Reconstruction_Loss']
                    kl_loss += loss['KLD']
                    batches += 1

                total_loss /= batches
                r_loss /= batches
                kl_loss /= batches
            print("Train Loss", total_train_loss)
            print("Val Loss", total_loss)
            
def train_model_old(model, train, val, n_epochs = 100):
    for epoch in range(n_epochs):
        if epoch % 10 == 0:
            print(f"Start Epoch {epoch}")
        model.train()
        start_time = time.time()
        total_loading_time = 0
        for batch_X, batch_Y in train:
            batch_X = batch_X.float().cuda()
            recons, x, mu, log_var = model.forward(batch_X)
            train_loss = model.loss_function(recons, batch_X, mu, log_var)['loss']
            optimizer.zero_grad()
            train_loss.backward()
            optimizer.step()
        scheduler.step()
        '''
        with torch.no_grad():
            model.eval()
            total_train_loss = 0
            r_train_loss = 0
            kl_train_loss = 0
            train_batches = 0
            for batch_X, batch_Y in train:
                batch_X = batch_X.float().cuda()
                recons, x, mu, log_var = model.forward(batch_X)
                loss = model.loss_function(recons, batch_X, mu, log_var)
                total_train_loss += loss['loss']
                r_train_loss += loss['Reconstruction_Loss']
                kl_train_loss += loss['KLD']
                train_batches += 1

            total_train_loss /= train_batches
            r_train_loss /= train_batches
            kl_train_loss /= train_batches

            total_loss = 0
            r_loss = 0
            kl_loss = 0
            batches = 0
            for batch_X, batch_Y in val:
                batch_X = batch_X.float().cuda()
                recons, x, mu, log_var = model.forward(batch_X)
                loss = model.loss_function(recons, batch_X, mu, log_var)
                total_loss += loss['loss']
                r_loss += loss['Reconstruction_Loss']
                kl_loss += loss['KLD']
                batches += 1

            total_loss /= batches
            r_loss /= batches
            kl_loss /= batches
        '''
def top_k_recall(X_in, X_out, X_target, k = 20, mask_in = True):
    if mask_in:
        mask = X_in == False
        X_out = X_out * mask
    topk = torch.topk(X_out, k)
    n = 0
    total_recall = 0
    for i in range(len(X_in)):
        if int(X_target[i].sum()) == 0:
            continue
        selected = topk.indices[i]
        total_recall += X_target[i][selected].sum() / X_target[i].sum()
        n += 1
    
    return total_recall / n

def n_recall(X_in, X_out, X_target, mask_in = True):
    if mask_in:
        mask = X_in == False
        X_out = X_out * mask
        X_target = X_target * mask
    topk = torch.topk(X_out, int(X_target.sum(axis=1).max()), sorted=True)
    n = 0
    total_recall = 0
    for i in range(len(X_in)):
        if int(X_target[i].sum()) == 0:
            continue
        selected = topk.indices[i]
        total_recall += X_target[i][selected[:int(X_target[i].sum())]].sum() / int(X_target[i].sum())
        n += 1
    #print(total_recall,n)
    #print(len(X_in))
    return total_recall / len(X_in)

In [9]:
# This needs to be here because by default Jupyter only adds the pwd to sys.path
import os, sys
if os.path.abspath('..') not in sys.path: sys.path.append(os.path.abspath('..'))

import torch
import time
import scipy
import pickle 
import pandas as pd
import numpy as np

from tqdm.auto import tqdm
from pysrc.constants import datapath#, N_ITEMS, N_USERS
from torch.utils.data import Dataset
from scipy.sparse import csr_matrix, coo_matrix
from pathlib import Path
from collections import defaultdict

In [10]:
def pairwise_jaccard_sparse(csr):
    """Computes the Jaccard distance between the rows of `csr`,
    smaller than the cut-off distance `epsilon`.
    """
    csr = csr_matrix(csr).astype(bool).astype(int)

    csr_rownnz = csr.getnnz(axis=1)
    intrsct = csr.dot(csr.T)

    nnz_i = np.repeat(csr_rownnz, intrsct.getnnz(axis=1))
    unions = nnz_i + csr_rownnz[intrsct.indices] - intrsct.data
    dists = intrsct.data / unions

    out = csr_matrix((dists, intrsct.indices, intrsct.indptr), intrsct.shape)
    return out

In [11]:
def sparsify_sparse_uniform(sparse_X, p_relative = .1):
    X = []
    for i in range(sparse_train_data.shape[0]):
        if i % 1000 == 0:
            print(i)
        X.append(torch.zeros(item_dim).bool())
        row = sparse_X[i].coalesce().indices()[0]
        X[-1][row] = 1
    X = torch.stack(X)
    return X
    
def sparse_to_ohe(data):
    return sparsify_sparse_uniform(data, 0)

In [12]:
import torch
from torch import nn
from torch.nn import functional as F
from torch import Tensor

def log_likelihood_loss(y, yhat):
    return -torch.mean(torch.sum(torch.log(yhat+1e-6) * y, axis = 1))

def likelihood_loss(y, yhat):
    return -torch.mean(torch.sum(yhat * y, axis = 1))

class GraphVAE(nn.Module):

    def __init__(self,
                 input_dim: int,
                 latent_dim: int,
                 hidden_dims = None,
                 kl_weight = .2,
                 sim = None
                 ):
        super(GraphVAE, self).__init__()
        
        self.kl_weight = kl_weight

        self.latent_dim = latent_dim
        
        self.embeddings = torch.zeros(N_USERS, self.latent_dim).cuda()
        
        coo = sim.tocoo()
        
        values = coo.data
        indices = np.vstack((coo.row, coo.col))

        i = torch.LongTensor(indices)
        v = torch.FloatTensor(values)
        shape = coo.shape

        self.neighbors = torch.sparse.FloatTensor(i, v, torch.Size(shape)).cuda()

        self.neighbors_norms = torch.zeros(self.neighbors.shape[0]).cuda()
        
        for i in range(self.neighbors.shape[0]):
            self.neighbors_norms[i] = torch.sum(self.neighbors[i].coalesce().values())
        
        modules = []
        
        if hidden_dims is None:
            hidden_dims = [512, 128]
            
        self.hidden_dims = hidden_dims
            
        modules.append(nn.Linear(input_dim, hidden_dims[0]))
        modules.append(nn.BatchNorm1d(hidden_dims[0]))
        modules.append(nn.LeakyReLU())
        
        # Build Encoder
        for i in range(len(hidden_dims)-1):
            modules.append(nn.Linear(hidden_dims[i], hidden_dims[i+1]))
            modules.append(nn.BatchNorm1d(hidden_dims[i+1]))
            modules.append(nn.LeakyReLU())

        self.encoder = nn.Sequential(*modules)
        self.fc_mu = nn.Linear(hidden_dims[-1], latent_dim)
        self.fc_var = nn.Linear(hidden_dims[-1], latent_dim)

        # Build Decoder
        modules = []

        self.decoder_input = nn.Linear(2 * latent_dim, hidden_dims[-1])

        hidden_dims.reverse()

        for i in range(len(hidden_dims) - 1):
            modules.append(nn.Linear(hidden_dims[i], hidden_dims[i+1]))
            modules.append(nn.BatchNorm1d(hidden_dims[i+1]))
            modules.append(nn.LeakyReLU())



        self.decoder = nn.Sequential(*modules)

        self.final_layer = nn.Sequential(nn.Linear(hidden_dims[-1], input_dim),
                                         nn.Sigmoid()
                                        )
        

    def encode(self, input: Tensor):
        """
        Encodes the input by passing through the encoder network
        and returns the latent codes.
        :param input: (Tensor) Input tensor to encoder [N x C x H x W]
        :return: (Tensor) List of latent codes
        """
        result = self.encoder(input)
        result = torch.flatten(result, start_dim=1)
        # Split the result into mu and var components
        # of the latent Gaussian distribution
        mu = self.fc_mu(result)
        log_var = self.fc_var(result)

        return [mu, log_var]

    def decode(self, z: Tensor):
        """
        Maps the given latent codes
        onto the image space.
        :param z: (Tensor) [B x D]
        :return: (Tensor) [B x C x H x W]
        """
        result = self.decoder_input(z)
        result = result.view(-1, self.hidden_dims[0])
        result = self.decoder(result)
        result = self.final_layer(result)
        result = F.normalize(result, p=1)
        return result

    def reparameterize(self, mu: Tensor, logvar: Tensor) -> Tensor:
        """
        Reparameterization trick to sample from N(mu, var) from
        N(0,1).
        :param mu: (Tensor) Mean of the latent Gaussian [B x D]
        :param logvar: (Tensor) Standard deviation of the latent Gaussian [B x D]
        :return: (Tensor) [B x D]
        """
        std = torch.exp(0.5 * logvar)
        eps = torch.randn_like(std)
        return eps * std + mu

    def forward(self, x: Tensor, uids):
        mu, log_var = self.encode(x)
        z = self.reparameterize(mu, log_var)
        self.embeddings[uids] = z.detach()
        neighbors = torch.stack([self.neighbors[uid].to_dense() for uid in uids])
        
        neighbor_embeds = ((neighbors @ self.embeddings).T / (self.neighbors_norms[uids] + 1e-5)).T
        
        z = torch.cat([z, neighbor_embeds], axis=1)
        
        return  [F.normalize(self.decode(z), 1, dim=1), x, mu, log_var]
    
        
    def set_embeddings(self, x, uids):
        mu, log_var = self.encode(x)
        z = self.reparameterize(mu, log_var)
        self.embeddings[uids] = z.detach()

    def loss_function(self, recons, x, mu, log_var) -> dict:
        """
        Computes the VAE loss function.
        KL(N(\mu, \sigma), N(0, 1)) = \log \frac{1}{\sigma} + \frac{\sigma^2 + \mu^2}{2} - \frac{1}{2}
        :param args:
        :param kwargs:
        :return:
        """
        

        kld_weight = self.kl_weight
        recons_loss = log_likelihood_loss(x, recons)

        kld_loss = torch.mean(-0.5 * torch.sum(1 + log_var - mu ** 2 - log_var.exp(), dim = 1), dim = 0)

        loss = recons_loss + kld_weight * kld_loss
        return {'loss': loss, 'Reconstruction_Loss':recons_loss.detach(), 'KLD':-kld_loss.detach()}

    def sample(self,
               num_samples:int,
               current_device: int, **kwargs) -> Tensor:
        """
        Samples from the latent space and return the corresponding
        image space map.
        :param num_samples: (Int) Number of samples
        :param current_device: (Int) Device to run the model
        :return: (Tensor)
        """
        z = torch.randn(num_samples,
                        self.latent_dim)

        z = z.to(current_device)

        samples = self.decode(z)
        return samples

    def generate(self, x: Tensor, **kwargs) -> Tensor:
        """
        Given an input image x, returns the reconstructed image
        :param x: (Tensor) [B x C x H x W]
        :return: (Tensor) [B x C x H x W]
        """

        return self.forward(x)[0]

In [13]:
def train_graph_model(model, train, val, n_epochs = 100):
    
    optimizer = torch.optim.Adam(model.parameters(), lr=1e-3, weight_decay=1e-5)
    scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.98)
    
    
    for epoch in range(n_epochs):
        model.train()
        start_time = time.time()
        total_loading_time = 0
        for batch_X, batch_Y, batch_ind in train:
            batch_X = batch_X.float()
            batch_Y = batch_Y.float()
            recons, x, mu, log_var = model.forward(batch_X, batch_ind)
            train_loss = model.loss_function(recons, batch_Y, mu, log_var)['loss']
            optimizer.zero_grad()
            train_loss.backward()
            optimizer.step()
        scheduler.step()
        print(time.time() - start_time)
        if epoch % 100 == 0:
            print(f"Done Epoch {epoch}")

            with torch.no_grad():
                model.eval()
                total_train_loss = 0
                r_train_loss = 0
                kl_train_loss = 0
                train_batches = 0
                for batch_X, batch_Y, batch_ind in train:
                    batch_X = batch_X.float()
                    batch_Y = batch_Y.float()
                    recons, x, mu, log_var = model.forward(batch_X, batch_ind)
                    loss = model.loss_function(recons, batch_Y, mu, log_var)
                    total_train_loss += loss['loss']
                    r_train_loss += loss['Reconstruction_Loss']
                    kl_train_loss += loss['KLD']
                    train_batches += 1

                total_train_loss /= train_batches
                r_train_loss /= train_batches
                kl_train_loss /= train_batches

                total_loss = 0
                r_loss = 0
                kl_loss = 0
                batches = 0
                for batch_X, batch_Y, batch_ind in val:
                    batch_X = batch_X.float().cuda()
                    batch_Y = batch_Y.float().cuda()
                    recons, x, mu, log_var = model.forward(batch_X, batch_ind)
                    loss = model.loss_function(recons, batch_Y, mu, log_var)
                    total_loss += loss['loss']
                    r_loss += loss['Reconstruction_Loss']
                    kl_loss += loss['KLD']
                    batches += 1

                total_loss /= batches
                r_loss /= batches
                kl_loss /= batches
                
            print("Train Loss", total_train_loss)
            print("Val Loss", total_loss)

In [16]:
batch_size=256 
def train_and_eval_base(train_in_tensor, train_out_tensor, val_in_tensor, val_out_tensor, test_in_tensor, test_out_tensor):
    train_dataset = TensorDataset(train_in_tensor.cuda(), (train_in_tensor + train_out_tensor).cuda(), train_uids.cuda())
    val_dataset = TensorDataset(val_in_tensor, val_in_tensor + val_out_tensor)
    test_dataset = TensorDataset(test_in_tensor, test_in_tensor + test_out_tensor)
    # Create a data loader from the dataset
    # Type of sampling and batch size are specified at this step
    train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_dataloader = DataLoader(val_dataset, batch_size=batch_size, shuffle=True)
    test_dataloader = DataLoader(test_dataset, batch_size=batch_size, shuffle=True)
    
    lr = 1e-3
    model = VanillaVAE(input_dim = item_dim, latent_dim = 256, hidden_dims = [512, 256], kl_weight = 1).cuda()
    
    train_model(model, train_dataloader, val_dataloader, n_epochs = 3)
    
    model.eval()
    with torch.no_grad():
        total_n_recall_train = 0
        total_n_train = 0
        for batch_X, batch_Y in train_dataloader:
            batch_out = model.forward(batch_X.float().cuda())[0].detach().cpu()
            n_recall_batch = n_recall(batch_X, batch_out, batch_X, False)
            total_n_recall_train += n_recall_batch * len(batch_X)
            total_n_train += len(batch_X)
        print("Train recons recall:", total_n_recall_train / total_n_train)
        train_recons_recall = total_n_recall_train / total_n_train
        
        total_n_recall_out = 0
        total_n_out = 0
        for batch_X, batch_Y in train_dataloader:
            batch_out = model.forward(batch_X.float().cuda())[0].detach().cpu()
            n_recall_batch = n_recall(batch_X, batch_out, batch_Y, True)
            total_n_recall_out += n_recall_batch * len(batch_X)
            total_n_out += len(batch_X)
        print("Train pred recall:", total_n_recall_out / total_n_out)
        train_out_recall = total_n_recall_out / total_n_out
        
        total_n_recall_recons = 0
        total_n_recons = 0
        for batch_X, batch_Y in test_dataloader:
            batch_out = model.forward(batch_X.float().cuda())[0].detach().cpu()
            n_recall_batch = n_recall(batch_X, batch_out, batch_X, False)
            total_n_recall_recons += n_recall_batch * len(batch_X)
            total_n_recons += len(batch_X)
        print("Test recons recall:", total_n_recall_recons / total_n_recons)
        test_recons_recall = total_n_recall_recons / total_n_recons
        
        total_n_recall_out = 0
        total_n_out = 0
        for batch_X, batch_Y in test_dataloader:
            batch_out = model.forward(batch_X.float().cuda())[0].detach().cpu()
            n_recall_batch = n_recall(batch_X, batch_out, batch_Y, True)
            total_n_recall_out += n_recall_batch * len(batch_X)
            total_n_out += len(batch_X)
        print("Test outsample recall:", total_n_recall_out / total_n_out)
        test_outsample_recall = total_n_recall_out / total_n_out
    return model, {"train_recons": train_recons_recall, "train_out": train_out_recall, "test_recons": test_recons_recall, "test_out": test_outsample_recall}

def train_and_eval_graph(sim, train_in_tensor, train_out_tensor, train_uids, val_in_tensor, val_out_tensor, val_uids, test_in_tensor, test_out_tensor, test_uids):
    train_dataset = TensorDataset(train_in_tensor.cuda(), (train_in_tensor + train_out_tensor).cuda(), train_uids.cuda())
    val_dataset = TensorDataset(val_in_tensor, val_in_tensor + val_out_tensor, val_uids)
    test_dataset = TensorDataset(test_in_tensor, test_in_tensor + test_out_tensor, test_uids)
    # Create a data loader from the dataset
    # Type of sampling and batch size are specified at this step
    train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_dataloader = DataLoader(val_dataset, batch_size=batch_size, shuffle=True)
    test_dataloader = DataLoader(test_dataset, batch_size=batch_size, shuffle=True)
    lr = 2e-3
    model = GraphVAE(input_dim = item_dim, latent_dim = 256, hidden_dims = [512, 256], kl_weight = 1, sim = sim).cuda()
    
    train_graph_model(model, train_dataloader, val_dataloader, n_epochs = 3)
    
    model.eval()
    with torch.no_grad():
        for batch_X, batch_Y, batch_ind in train_dataloader:
            model.set_embeddings(batch_X.float().cuda(), batch_ind)
        for batch_X, batch_Y, batch_ind in val_dataloader:
            model.set_embeddings(batch_X.float().cuda(), batch_ind)
        for batch_X, batch_Y, batch_ind in test_dataloader:
            model.set_embeddings(batch_X.float().cuda(), batch_ind)

        total_n_recall_recons = 0
        total_n_recons = 0
        for batch_X, batch_Y, batch_ind in train_dataloader:
            batch_out = model.forward(batch_X.float().cuda(), batch_ind)[0].detach().cpu()
            n_recall_batch = n_recall(batch_X, batch_out, batch_X, False)
            total_n_recall_recons += n_recall_batch * len(batch_ind)
            total_n_recons += len(batch_ind)
        print("Train recons recall:", total_n_recall_recons / total_n_recons)
        test_recons_recall = total_n_recall_recons / total_n_recons


        total_n_recall_out = 0
        total_n_out = 0
        for batch_X, batch_Y, batch_ind in train_dataloader:
            batch_out = model.forward(batch_X.float().cuda(), batch_ind)[0].detach().cpu()
            n_recall_batch = n_recall(batch_X, batch_out, batch_Y, True)
            total_n_recall_out += n_recall_batch * len(batch_X)
            total_n_out += len(batch_X)
        print("Train outsample recall:", total_n_recall_out / total_n_out)
        test_outsample_recall = total_n_recall_out / total_n_out

        total_n_recall_recons = 0
        total_n_recons = 0
        for batch_X, batch_Y, batch_ind in test_dataloader:
            batch_out = model.forward(batch_X.float().cuda(), batch_ind)[0].detach().cpu()
            n_recall_batch = n_recall(batch_X, batch_out, batch_X, False)
            total_n_recall_recons += n_recall_batch * len(batch_X)
            total_n_recons += len(batch_X)
        print("Test recons recall:", total_n_recall_recons / total_n_recons)
        test_recons_recall = total_n_recall_recons / total_n_recons


        total_n_recall_out = 0
        total_n_out = 0
        for batch_X, batch_Y, batch_ind in test_dataloader:
            batch_out = model.forward(batch_X.float().cuda(), batch_ind)[0].detach().cpu()
            n_recall_batch = n_recall(batch_X, batch_out, batch_Y, True)
            total_n_recall_out += n_recall_batch * len(batch_ind)
            total_n_out += len(batch_ind)
        print("Test outsample recall:", total_n_recall_out / total_n_out)
        test_outsample_recall = total_n_recall_out / total_n_out
    return model, {"train_recons": train_recons_recall, "train_out": train_out_recall, "test_recons": test_recons_recall, "test_out": test_outsample_recall}

def compare_on_data(train_path, test_path):
    sparse_train_data = torch.load(train_path)
    sparse_test_data = pickle.load(open(test_path, "rb"))
    sim = pairwise_jaccard_sparse(sparse_train_data.to_dense())
    for i in range(sim.shape[0]):
        sim[i,i] = 0
    dev_df = sparse_train_data.bool().to_dense()
    test_df = list_batch_to_ohe(pd.Series(sparse_test_data))
    test_mask = torch.zeros(dev_df.shape[0])
    test_mask[test_ids[test_ids < dev_df.shape[0]]] = 1
    
    dev_in = dev_df[test_mask == 0]
    dev_out = test_df[test_mask == 0]
    test_in_tensor = dev_df[test_mask == 1]
    test_out_tensor = test_df[test_mask == 1]

    train_uids = (1-test_mask).nonzero().reshape(-1)[:int(.7*len(dev_in))]
    val_uids = (1-test_mask).nonzero().reshape(-1)[int(.7*len(dev_in)):]
    test_uids = test_mask.nonzero().reshape(-1)

    train_in_tensor = dev_in[:int(.7*len(dev_in))]
    val_in_tensor = dev_in[int(.7*len(dev_in)):]

    train_out_tensor = dev_out[:int(.7*len(dev_in))]
    val_out_tensor = dev_out[int(.7*len(dev_in)):]

    graph_model, graph_results = train_and_eval_graph(sim, train_in_tensor, train_out_tensor, train_uids, val_in_tensor, val_out_tensor, val_uids, test_in_tensor, test_out_tensor, test_uids)
    
    base_model, base_results = train_and_eval_base(train_in_tensor, train_out_tensor, val_in_tensor, val_out_tensor, test_in_tensor, test_out_tensor)
    
    return base_model, base_results, graph_model, graph_results

In [None]:
uniform_results = {}

for s in [100, 95]:#, 80, 50]:
    bm, br, gm, gr = compare_on_data(datapath(f"train/uniform/uniform{s}_data.pt"), datapath(f"test/uniform/uniform{s}_test_indices.pickle"))
        
    uniform_results[s] = (br, gr)

In [None]:
compare_on_data(datapath(f"full_data.pt"), datapath(f"full_data_test_indices.pickle"))

In [None]:
x = torch.zeros(10).cuda()

In [None]:
x.bool()