## Variational AutoEncoder

### Import packages

In [1]:
import pandas as pd
import numpy as np
from scipy.sparse import load_npz
from scipy import sparse
from torch import nn
import torch.optim as optim
from torch.utils.data import Dataset,DataLoader 
import torch
import torch.nn.functional as F

### Loading data

In [2]:
df_val = pd.read_csv('df_val.csv')

In [3]:
R = load_npz('R_train.npz')

In [4]:
n_users,n_movies = R.shape

### Prepare data for PyTorch model

In [5]:
class MovieDataset(Dataset):
    def __init__(self,utility_matrix):
        self.utility  = utility_matrix
        
    def __len__(self):
        return self.utility.shape[0]
    def __getitem__(self,idx):
        
        user_vector = self.utility[[idx],:].toarray()[0]
        user_vector = torch.tensor(user_vector,dtype=torch.float32)
        
        return user_vector

In [6]:
train_dataset = MovieDataset(R)

In [7]:
BATCH_SIZE = 128
train_loader = DataLoader(dataset=train_dataset,
                         batch_size=BATCH_SIZE,
                         shuffle=True,
                         )

### Constructing accuracy function

In [8]:
one_idxs=[]
minus_one_idxs=[]
hold_out=[]
hold_out_minus=[]

In [9]:
for i in range(n_users):
    one_idxs.append(np.where(R[[i],:].toarray()[0] == 1)[0])
    minus_one_idxs.append(np.where(R[[i],:].toarray()[0] == -1)[0])
    hold_out.append(df_val.query(f"userId=={i} & rating==1").movieId.values)
    hold_out_minus.append(df_val.query(f"userId=={i} & rating==-1").movieId.values)

In [10]:
def accuracy_func(model,k=10):
    accuracy = []
    for i in range(n_users):
        output,_,_,_  = model(torch.unsqueeze(train_dataset[i].to(torch.float32).to(device),dim=0))
        output = output.to('cpu').detach().numpy()[0]
        np.put(output,one_idxs[i],-np.inf)
        np.put(output,minus_one_idxs[i],-np.inf)
        c = len(np.intersect1d(np.argsort(output)[::-1][:k],hold_out[i]))
        nc = len(np.intersect1d(np.argsort(output)[::-1][:k],hold_out_minus[i]))
#         acc = np.max([0,(c-nc)/(np.min([k,len(hold_out[i])+1]))]) ## Recal@K
        acc = np.max([0,(c-nc)/k]) ## HR@K
        accuracy.append(acc)
    return np.mean(accuracy)

### VAE model

In [11]:
class Encoder(nn.Module):
    def __init__(self, input_dim, hidden_dim, latent_dim,dropout_rate=0.5):
        super().__init__()
        
        self.fc1 = nn.Linear(input_dim,hidden_dim)
        self.fc_mu = nn.Linear(hidden_dim,latent_dim)
        self.fc_logvar = nn.Linear(hidden_dim,latent_dim)
        self.activation = nn.Tanh()
        self.dropout = nn.Dropout(p=dropout_rate)
        
    def forward(self,x):
        x = F.normalize(x)
        x = self.dropout(x)
        
        h1 = self.activation(self.fc1(x))
        mu = self.fc_mu(h1)
        logvar = self.fc_logvar(h1)
        
        return mu,logvar

In [12]:
class Decoder(nn.Module):
    def __init__(self, input_dim, hidden_dim, latent_dim):
        super().__init__()
        
        self.fc1 = nn.Linear(latent_dim,hidden_dim)
        self.fc_out = nn.Linear(hidden_dim,input_dim)
        self.activation = nn.Tanh()
        self.dropout = nn.Dropout(p=0.5)
        
    def forward(self,z):
        
        
        h1 = self.activation(self.fc1(self.dropout(z)))
        x_rec = self.fc_out(h1)
        
        return x_rec

In [13]:
class VAE(nn.Module):
    def __init__(self, input_dim, hidden_dim, latent_dim):
        super().__init__()
        
        self.encoder = Encoder(input_dim, hidden_dim, latent_dim)
        self.decoder = Decoder(input_dim, hidden_dim, latent_dim)
        
    def reparameterize(self, mu, logvar):
        if self.training:
            std = torch.exp(0.5*logvar)
            epsilon = torch.randn_like(std)
            z = mu + epsilon * std
            return z
        else:
            z = mu
            return z
        
        
    def forward(self,x):
        mu, logvar = self.encoder(x)
        z = self.reparameterize(mu,logvar)
        x_rec = self.decoder(z)
        
        return x_rec,z, mu, logvar

In [14]:
class ELBOLoss(nn.Module):
    def __init__(self):
        super().__init__()
        
    def kl_divergence(self, z, mu, logvar):
        # --------------------------
        # Monte carlo KL divergence
        # --------------------------
        # 1. define the first two probabilities (in this case Normal for both)
        std = torch.exp(logvar / 2)
        p = torch.distributions.Normal(torch.zeros_like(mu), torch.ones_like(std))
        q = torch.distributions.Normal(mu, std)

        # 2. get the probabilities from the equation
        log_qzx = q.log_prob(z)
        log_pz = p.log_prob(z)

        # kl
        kl = (log_qzx - log_pz)
        kl = kl.sum(-1)
        return kl

    def forward(self,x_rec,x,z,mu,logvar,beta=0.2):
#         construction_error = -torch.mean(torch.sum(F.log_softmax(x_rec,dim=1) * x, dim=-1)) ## multinomial loss 
        construction_error = F.mse_loss(x_rec,x) ### mean square error loss
        kl = self.kl_divergence(z,mu,logvar) ## kl divergence
#         kl_divergence = torch.mean(-0.5*torch.sum(1 + logvar - mu.pow(2)-logvar.exp(),dim=1),dim=0) ## kl divergence
        return construction_error + (beta*kl).mean()

In [15]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [16]:
loss_func = ELBOLoss()

In [17]:
model_vae = VAE(n_movies,256,128).to(device)

In [18]:
optimizer = optim.Adam(model_vae.parameters(),lr=0.0001)

### Training Loop

In [19]:
epochs = 20

for epoch in range(epochs):
    model_vae.train()
    train_losses = []
    for i,x in enumerate(train_loader):
        x = x.to(device).to(torch.float32)
        x_rec,z,mu,logvar = model_vae(x)
        
        cost = loss_func(x_rec,x,z,mu,logvar)
        optimizer.zero_grad()
        cost.backward()
        optimizer.step()
        
        train_losses.append(cost.item())
    
    model_vae.eval()
    acc = accuracy_func(model_vae,10)
    

    print(f"Epoch {epoch + 1},train loss: {torch.tensor(train_losses).mean():.4f}, val accuracy: {acc:.4f}")


Epoch 1,train loss: 0.1507, val accuracy: 0.0168
Epoch 2,train loss: 0.1182, val accuracy: 0.0722
Epoch 3,train loss: 0.0971, val accuracy: 0.1026
Epoch 4,train loss: 0.0796, val accuracy: 0.1180
Epoch 5,train loss: 0.0665, val accuracy: 0.1259
Epoch 6,train loss: 0.0584, val accuracy: 0.1341
Epoch 7,train loss: 0.0522, val accuracy: 0.1427
Epoch 8,train loss: 0.0484, val accuracy: 0.1484
Epoch 9,train loss: 0.0451, val accuracy: 0.1544
Epoch 10,train loss: 0.0434, val accuracy: 0.1595
Epoch 11,train loss: 0.0410, val accuracy: 0.1631
Epoch 12,train loss: 0.0399, val accuracy: 0.1639
Epoch 13,train loss: 0.0392, val accuracy: 0.1654
Epoch 14,train loss: 0.0385, val accuracy: 0.1666
Epoch 15,train loss: 0.0380, val accuracy: 0.1669
Epoch 16,train loss: 0.0382, val accuracy: 0.1672
Epoch 17,train loss: 0.0374, val accuracy: 0.1673
Epoch 18,train loss: 0.0375, val accuracy: 0.1672
Epoch 19,train loss: 0.0361, val accuracy: 0.1676
Epoch 20,train loss: 0.0372, val accuracy: 0.1676
