In [1]:
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
import torch.optim as optim
from sklearn.utils import shuffle
from torch.autograd import Variable

In [2]:
df=pd.read_csv('ratings.csv',names=['userID','movieID','rating','time'])
df.drop('time',axis=1,inplace=True)
users=[k for k,v in df['userID'].value_counts().iteritems() if v>2]
movies=[k for k,v in df['movieID'].value_counts().iteritems() if v>10]
df=df[(df['userID'].isin(users)) & (df['movieID'].isin(movies))]
#df=df.sample(frac=1).reset_index(drop=True)
df=df.pivot(index='userID',columns='movieID',values='rating')
df.reset_index(drop=True,inplace=True)
print ('Dataframe size: {}'.format(df.shape))

Dataframe size: (240447, 14277)


In [3]:
train_df=(df.loc[:220000-1])
print ('Train dataframe size: {}'.format(train_df.shape))
test_df=df.loc[230000:].reset_index(drop=True)
print ('Test dataframe size: {}'.format(test_df.shape))
val_df=df.loc[220000:230000-1].reset_index(drop=True)
print ('Validation dataframe size: {}'.format(val_df.shape))

Train dataframe size: (220000, 14277)
Test dataframe size: (10447, 14277)
Validation dataframe size: (10000, 14277)


In [4]:
class VAE(nn.Module):
    def __init__(self, input_size, hidden_size_1, hidden_size_2):
        super(VAE,self).__init__()
        self.input_size=input_size
        self.hidden_size_1=hidden_size_1
        self.hidden_size_2=hidden_size_2
        
        self.encoder_linear_l1=nn.Linear(input_size, hidden_size_1)
        self.encoder_linear_l2=nn.Linear(hidden_size_1, 2*hidden_size_2)
        
        self.decode_linear_l1=nn.Linear(hidden_size_2, hidden_size_1)
        self.decode_linear_l2=nn.Linear(hidden_size_1, input_size)
        
        self.sigmoid=nn.Sigmoid()
        
        
    def forward(self, input_ratings):
        mu,logvar=self.encode(input_ratings)
        param=self.reparameterize(mu,logvar)
        decoded=self.decode(param)
        return decoded,mu,logvar
    
    def encode(self, input_ratings):
        enc_out=F.relu(self.encoder_linear_l1(input_ratings))
        enc_out=self.encoder_linear_l2(enc_out)
        log_var=enc_out[:,self.hidden_size_2:]
        mu=enc_out[:,:self.hidden_size_2]
        return mu, log_var
    
    def reparameterize(self, mu, logvar):
        if self.training:
            std=torch.exp(0.5*logvar)
            eps = torch.randn_like(std)
            return eps.mul(std).add_(mu)
        else:
            return mu
    
    def decode(self,param):
        dec_out=F.relu(self.decode_linear_l1(param))
        dec_out=self.decode_linear_l2(dec_out)
        return dec_out


In [5]:
def loss_criterion(decoded,input_ratings,mu,logvar,annealing_coef):
    #bce_loss=-torch.mean(torch.sum(input_ratings*decoded,-1))
    mse_loss=criterion(decoded,input_ratings)
    kl_divg=-0.5*torch.mean(torch.sum(mu.pow(2).add_(logvar.exp()).mul_(-1).add_(1).add_(logvar), dim=1))
    return mse_loss+annealing_coef*kl_divg

In [11]:
def train_minibatch(input_ratings, vae, optimizer):
    vae.train()
    optimizer.zero_grad()
    input_ratings=input_ratings.type(torch.cuda.FloatTensor)
    mask=input_ratings!=0
    mask=mask.type(torch.cuda.FloatTensor)
    #input_ratings=F.normalize(input_ratings,p=1)
    output_ratings,mu,logvar=vae(input_ratings)
    loss=loss_criterion(output_ratings*mask,input_ratings,mu,logvar,annealing_coef=0.002)
    loss.backward()
    optimizer.step()
    return loss

In [7]:
def validation(input_ratings, vae):
    with torch.no_grad():
        vae.eval()
        mask=input_ratings!=0
        mask=mask.type(torch.cuda.FloatTensor)
        input_ratings=input_ratings.type(torch.cuda.FloatTensor)
        output_ratings,mu,logvar=vae(input_ratings)
        output_ratings=output_ratings*mask
        #input_ratings=F.normalize(input_ratings,p=1)
        loss=0
        for i in range(output_ratings.size(0)):
            indices=torch.nonzero(mask[i])
            l=0
            for idx in indices:
                l+=(input_ratings[i][idx]-output_ratings[i][idx])**2
            loss+=l/indices.size(0)
        #loss=torch.mean(torch.sum((output_ratings-input_ratings)**2,-1)/torch.sum(mask,-1))
        return (torch.sqrt(loss/mask.size(0))).item()

In [8]:
vae=VAE(input_size=train_df.shape[1], hidden_size_1=512, hidden_size_2=256)
optimizer=optim.Adam(vae.parameters())
device=torch.device('cuda')
criterion=nn.MSELoss()
vae=vae.to(device)

In [9]:
#val_mask=val_df.copy()
#val_mask[~val_mask.isnull()] = 1  # not nan
#val_mask[val_mask.isnull()] = 0   # nan
val_df[val_df.isnull()] = 0   # nan

In [10]:
input_users_val=torch.from_numpy(val_df.values).to(device).detach()
#mask_val=torch.from_numpy(val_mask.values).to(device).detach()

In [None]:
checkpoint = torch.load('model_VAE.pth')
autorec.load_state_dict(checkpoint['autorec_state_dict'])
optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
autorec.eval()

In [12]:
num_batches=60
val_benchmark=10



for batch in range(0,num_batches):
    running_loss=0
    #train_df = shuffle(train_df)
    for i in range(0,train_df.shape[0],100):
        #print(i)
        tdf=train_df.loc[i:i+100-1].copy()
        #train_mask=tdf.copy()
        #train_mask[~train_mask.isnull()] = 1  # not nan
        #train_mask[train_mask.isnull()] = 0   # nan
        tdf[tdf.isnull()] = 0
        
        assert tdf.shape[0]==100
        input_users=Variable(torch.from_numpy(tdf.values)).to(device)
        #input_mask=torch.from_numpy(train_mask.values).to(device)
        loss=train_minibatch(input_users, vae, optimizer)
        running_loss+=loss.item()
        if (i+100)%44000==0:
            val_rmse=validation(input_users_val, vae)
            print ('Batch: {} | Step: {}/{} | Training Loss: {} | Validation RMSE: {}'.format(batch+1,int((i+100)/44000),5,running_loss,round(val_rmse,4) ))
            running_loss=0
            if(val_rmse<val_benchmark):
                print ('%---Saving the model---%')
                torch.save({
                    'step':i+1,
                    'autorec_state_dict': vae.state_dict(),
                    'optimizer_state_dict': optimizer.state_dict(),
                    'batch':batch,
                    'loss':val_rmse
                    },'model_VAE.pth')
                val_benchmark=val_rmse

Batch: 1 | Step: 1/5 | Training Loss: 0.6869275914505124 | Validation RMSE: 2.9772
%---Saving the model---%
Batch: 1 | Step: 2/5 | Training Loss: 0.32552036418928765 | Validation RMSE: 3.151
Batch: 1 | Step: 3/5 | Training Loss: 0.30914287065388635 | Validation RMSE: 3.4288
Batch: 1 | Step: 4/5 | Training Loss: 0.30104814129299484 | Validation RMSE: 3.6827
Batch: 1 | Step: 5/5 | Training Loss: 0.3041689707606565 | Validation RMSE: 3.9071
Batch: 2 | Step: 1/5 | Training Loss: 0.2977231444674544 | Validation RMSE: 4.01
Batch: 2 | Step: 2/5 | Training Loss: 0.30149137281114236 | Validation RMSE: 4.0952
Batch: 2 | Step: 3/5 | Training Loss: 0.2985427894454915 | Validation RMSE: 4.1102
Batch: 2 | Step: 4/5 | Training Loss: 0.29397519427584484 | Validation RMSE: 4.1139
Batch: 2 | Step: 5/5 | Training Loss: 0.2993569487298373 | Validation RMSE: 4.1198
Batch: 3 | Step: 1/5 | Training Loss: 0.29696795280324295 | Validation RMSE: 4.1105
Batch: 3 | Step: 2/5 | Training Loss: 0.29940205957973376 |

KeyboardInterrupt: 