In [1]:
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
import torch.optim as optim
from sklearn.utils import shuffle
from torch.autograd import Variable

In [2]:
df=pd.read_csv('ratings.csv',names=['userID','movieID','rating','time'])
df.drop('time',axis=1,inplace=True)
users=[k for k,v in df['userID'].value_counts().iteritems() if v>2]
movies=[k for k,v in df['movieID'].value_counts().iteritems() if v>10]
df=df[(df['userID'].isin(users)) & (df['movieID'].isin(movies))]
df=df.pivot(index='userID',columns='movieID',values='rating')
print (df.shape)

(240447, 14277)


In [3]:
#df_matrix=df.values
#mask_matrix=mask.values
train_df=df.iloc[0:220000]
#val_matrix=df_matrix[120000:130000]
test_df=df.iloc[230000:].reset_index(drop=True)
#train_mask=mask_matrix[0:120000]
val_df=df.iloc[220000:230000].reset_index(drop=True)
#test_mask=mask_matrix[130000:]

In [4]:
class Autorec(nn.Module):
    def __init__(self, hidden_size, input_size):
        super(Autorec, self).__init__()
        self.input_size=input_size
        self.hidden_size=hidden_size
        
        self.encoder=nn.Linear(self.input_size, self.hidden_size)
        self.decoder=nn.Linear(self.hidden_size, self.input_size)
        self.sigmoid=nn.Sigmoid()
        self.decoder.weight.data = self.encoder.weight.data.transpose(0,1)
        #self.register_buffer('input', torch.zeros(input_size))
        
    def forward(self, input_ratings):
        self.input=input_ratings
        enc_out = self.encoder(input_ratings)
        dec_out = 5*self.sigmoid(self.decoder(enc_out))
        return dec_out

In [5]:
def train_minibatch(input_ratings, mask, autorec, optimizer, criterion):
    optimizer.zero_grad()
    output_ratings=autorec(input_ratings.type(torch.cuda.FloatTensor))*mask.type(torch.cuda.FloatTensor)
    loss=criterion(output_ratings,input_ratings.type(torch.cuda.FloatTensor))
    loss.backward()
    optimizer.step()
    return torch.sqrt(loss)

In [6]:
def validation(input_ratings, mask, autorec):
    with torch.no_grad():
        input_ratings=input_ratings.type(torch.cuda.FloatTensor)
        output_ratings=autorec(input_ratings)*mask.type(torch.cuda.FloatTensor)
        #loss=torch.sqrt(criterion(output_ratings,input_ratings.type(torch.cuda.FloatTensor)))
        idx=torch.nonzero(mask)
        loss=0
        for i in idx:
            loss+=((output_ratings[i[0]][i[1]]-input_ratings[i[0]][i[1]]).item())**2
    return np.sqrt(loss/idx.size(0))

In [7]:
autorec=Autorec(hidden_size=500,input_size=train_df.shape[1])
optimizer=optim.Adam(autorec.parameters())
criterion=nn.MSELoss()
device=torch.device('cuda')
autorec=autorec.to(device)

In [8]:
val_mask=val_df.copy()
val_mask[~val_mask.isnull()] = 1  # not nan
val_mask[val_mask.isnull()] = 0   # nan
val_df[val_df.isnull()] = 0   # nan

In [None]:
num_batches=5
val_benchmark=10

input_users_val=torch.from_numpy(val_df.values).to(device).detach()
mask_val=torch.from_numpy(val_mask.values).to(device).detach()

for batch in range(0,num_batches):
    running_loss=0
    #train_df = shuffle(train_df)
    for i in range(0,train_df.shape[0],100):
        #print(i)
        tdf=train_df.iloc[i:i+100].copy()
        train_mask=tdf.copy()
        train_mask[~train_mask.isnull()] = 1  # not nan
        train_mask[train_mask.isnull()] = 0   # nan
        tdf[tdf.isnull()] = 0
        input_users=Variable(torch.from_numpy(tdf.values)).to(device)
        input_mask=torch.from_numpy(train_mask.values).to(device)
        loss=train_minibatch(input_users, input_mask, autorec, optimizer, criterion)
        running_loss+=loss.item()
        if (i)%44000==0:
            val_loss=validation(input_users_val, mask_val, autorec)
            print ('Batch: {} | Step: {}/{} | Training Loss: {} | Validation Loss: {}'.format(batch+1,int(i/44000)+1,5,round(running_loss,4),round(val_loss,4)))
            running_loss=0
            if(val_loss<val_benchmark):
                print ('%---Saving the model---%')
                torch.save({
                    'step':i+1,
                    'autorec_state_dict': autorec.state_dict(),
                    'optimizer_state_dict': optimizer.state_dict(),
                    'batch':batch,
                    'loss':val_loss
                    },'model.pth')
                val_benchmark=val_loss

In [8]:
checkpoint = torch.load('model.pth')
autorec.load_state_dict(checkpoint['autorec_state_dict'])
optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
autorec.eval()

RuntimeError: Error(s) in loading state_dict for Autorec:
	size mismatch for input: copying a param of torch.Size([14277]) from checkpoint, where the shape is torch.Size([10000, 14277]) in current model.

In [11]:
test_mask=test_df.copy()
test_mask[~test_mask.isnull()] = 1  # not nan
test_mask[test_mask.isnull()] = 0   # nan
test_df[test_df.isnull()] = 0   # nan

In [12]:
input_users_val=torch.from_numpy(test_df.values).to(device).detach()
mask_test=torch.from_numpy(test_mask.values).to(device).detach()
validation(input_users_val, mask_test, autorec)

0.8465419929098972

In [15]:
autorec.encoder.weight.data.transpose(0,1)

tensor([[ 0.0036, -0.0464,  0.0041,  ...,  0.0403, -0.0139,  0.0173],
        [ 0.0017,  0.1003, -0.0075,  ..., -0.0125,  0.0245, -0.0524],
        [-0.0517,  0.0076, -0.0221,  ...,  0.0615,  0.0768, -0.0070],
        ...,
        [-0.0299, -0.0065, -0.0376,  ...,  0.0551,  0.0400,  0.0846],
        [ 0.0404, -0.0047, -0.0087,  ...,  0.0545,  0.0042, -0.0347],
        [ 0.0006,  0.0204, -0.0200,  ...,  0.0152, -0.0204,  0.0281]],
       device='cuda:0')

In [14]:
autorec.decoder.weight.data

tensor([[ 0.0377, -0.0119,  0.0262,  ...,  0.0343, -0.0206, -0.0137],
        [-0.0417,  0.0707, -0.0390,  ...,  0.0310,  0.0063,  0.0040],
        [ 0.0016, -0.0208, -0.0117,  ...,  0.0706,  0.1028, -0.0254],
        ...,
        [-0.0088,  0.0209,  0.0469,  ..., -0.0302, -0.0400,  0.0570],
        [-0.0276, -0.0037,  0.0601,  ...,  0.0496, -0.0669,  0.0181],
        [ 0.0279, -0.0256, -0.0178,  ..., -0.0262,  0.0239, -0.0159]],
       device='cuda:0')