In [9]:
import pandas as pd
import torch
import torch.nn as nn
import torch.functional as F
import numpy as np
import torch.optim as optim

In [10]:
df=pd.read_csv('ratings.csv',names=['userID','movieID','rating','time'])
df.drop('time',axis=1,inplace=True)
users=[k for k,v in df['userID'].value_counts().iteritems() if v>5]
movies=[k for k,v in df['movieID'].value_counts().iteritems() if v>10]
df=df[(df['userID'].isin(users)) & (df['movieID'].isin(movies))]
df=df.pivot(index='userID',columns='movieID',values='rating')
print (df.shape)

(66182, 14162)


In [3]:
mask=df.copy()
mask[~mask.isnull()] = 1  # not nan
mask[mask.isnull()] = 0   # nan
df[df.isnull()] = 0   # nan

In [4]:
df_matrix=df.values
mask_matrix=mask.values
train_matrix=df_matrix[0:50000]
val_matrix=df_matrix[50000:60000]
test_matrix=df_matrix[60000:]
train_mask=mask_matrix[0:50000]
val_mask=mask_matrix[50000:60000]
test_mask=mask_matrix[60000:]

In [5]:
class Autorec(nn.Module):
    def __init__(self, hidden_size, input_size):
        super(Autorec, self).__init__()
        self.input_size=input_size
        self.hidden_size=hidden_size
        
        self.encoder=nn.Linear(self.input_size, self.hidden_size)
        self.decoder=nn.Linear(self.hidden_size, self.input_size)
        self.decoder.weight.data = self.encoder.weight.data.transpose(0,1)
        self.register_buffer('input', torch.zeros(input_size))
        
    def forward(self, input_ratings):
        self.input=input_ratings
        enc_out = self.encoder(input_ratings)
        dec_out = 5*F.sigmoid(self.decoder(enc_out),dim=1)
        return dec_out

In [12]:
def train_minibatch(input_ratings, mask, autorec, optimizer, criterion):
    optimizer.zero_grad()
    output_ratings=autorec(input_ratings)*mask
    loss=criterion(output_ratings,input_ratings)
    loss.backward()
    optimizer.step()
    return torch.sqrt(loss)

In [7]:
def validation(input_ratings, mask, autorec,criterion):
    with torch.no_grad():
        output_ratings=autorec(input_ratings)*mask
        loss=torch.sqrt(criterion(output_ratings,input_ratings))
    return loss

In [11]:
autorec=Autorec(hidden_size=500,input_size=train_matrix.shape[1])
optimizer=optim.Adam(autorec.parameters())
criterion=nn.MSELoss()
device=torch.device('cuda')
autorec=autorec.to(device)

In [None]:
for batch in range(0,num_batches):
    running_loss=0
    input_users_val=torch.from_numpy(val_matrix).to(device).detach()
    mask_val=torch.from_numpy(val_mask).to(device).detach()
    for i in range(0,train_matrix.shape[0],50):
        input_users=Variable(torch.from_numpy(train_matrix[i:i+50])).to(device)
        input_mask=torch.from_numpy(train_mask[i:i+50]).to(device)
        loss=train_minibatch(input_users, input_mask, autorec, optimizer, criterion)
        running_loss+=loss.item()
        if (i+1)%5000==0:
            val_loss=validation(input_users_val, mask_val, autorec, criterion)
            print ('Batch:{} | Step: {}/{} | Training Loss: {} | Validation Loss: {}'.format(batch,i,10,round(running_loss,4),round(val_loss.item(),4)))
            running_loss=0