In [1]:
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
import torch.optim as optim
from torch.autograd import Variable

In [2]:
df=pd.read_csv('ratings.csv',names=['userID','movieID','rating','time'])
df.drop('time',axis=1,inplace=True)
users=[k for k,v in df['userID'].value_counts().iteritems() if v>3]
movies=[k for k,v in df['movieID'].value_counts().iteritems() if v>10]
df=df[(df['userID'].isin(users)) & (df['movieID'].isin(movies))]
df=df.pivot(index='userID',columns='movieID',values='rating')
print (df.shape)

(143990, 14251)


In [3]:
#df_matrix=df.values
#mask_matrix=mask.values
train_df=df.iloc[0:120000]
#val_matrix=df_matrix[120000:130000]
test_df=df.iloc[130000:]
#train_mask=mask_matrix[0:120000]
val_df=df.iloc[120000:130000]
#test_mask=mask_matrix[130000:]

In [4]:
class Autorec(nn.Module):
    def __init__(self, hidden_size, input_size):
        super(Autorec, self).__init__()
        self.input_size=input_size
        self.hidden_size=hidden_size
        
        self.encoder=nn.Linear(self.input_size, self.hidden_size)
        self.decoder=nn.Linear(self.hidden_size, self.input_size)
        self.sigmoid=nn.Sigmoid()
        self.decoder.weight.data = self.encoder.weight.data.transpose(0,1)
        self.register_buffer('input', torch.zeros(input_size))
        
    def forward(self, input_ratings):
        self.input=input_ratings
        enc_out = self.encoder(input_ratings)
        dec_out = 5*self.sigmoid(self.decoder(enc_out))
        return dec_out

In [5]:
def train_minibatch(input_ratings, mask, autorec, optimizer, criterion):
    optimizer.zero_grad()
    output_ratings=autorec(input_ratings.type(torch.cuda.FloatTensor))*mask.type(torch.cuda.FloatTensor)
    loss=criterion(output_ratings,input_ratings.type(torch.cuda.FloatTensor))
    loss.backward()
    optimizer.step()
    return torch.sqrt(loss)

In [None]:
def validation(input_ratings, mask, autorec):
    with torch.no_grad():
        input_ratings=input_ratings.type(torch.cuda.FloatTensor)
        output_ratings=autorec(input_ratings)*mask.type(torch.cuda.FloatTensor)
        #loss=torch.sqrt(criterion(output_ratings,input_ratings.type(torch.cuda.FloatTensor)))
        idx=torch.nonzero(mask)
        loss=0
        for i in idx:
            loss+=((output_ratings[i[0]][i[1]]-input_ratings[i[0]][i[1]]).item())**2
    return np.sqrt(loss/idx.size(0))

In [None]:
autorec=Autorec(hidden_size=500,input_size=train_df.shape[1])
optimizer=optim.Adam(autorec.parameters())
criterion=nn.MSELoss()
device=torch.device('cuda')
autorec=autorec.to(device)

In [10]:
num_batches=50
val_benchmark=10

val_mask=val_df.copy()
val_mask[~val_mask.isnull()] = 1  # not nan
val_mask[val_mask.isnull()] = 0   # nan
val_df[val_df.isnull()] = 0   # nan

input_users_val=torch.from_numpy(val_df.values).to(device).detach()
mask_val=torch.from_numpy(val_mask.values).to(device).detach()

for batch in range(0,num_batches):
    running_loss=0
    for i in range(0,train_df.shape[0],50):
        #print(i)
        tdf=train_df.iloc[i:i+50].copy()
        train_mask=tdf.copy()
        train_mask[~train_mask.isnull()] = 1  # not nan
        train_mask[train_mask.isnull()] = 0   # nan
        tdf[tdf.isnull()] = 0
        input_users=Variable(torch.from_numpy(tdf.values)).to(device)
        input_mask=torch.from_numpy(train_mask.values).to(device)
        loss=train_minibatch(input_users, input_mask, autorec, optimizer, criterion)
        running_loss+=loss.item()
        if (i)%24000==0:
            val_loss=validation(input_users_val, mask_val, autorec)
            print ('Batch: {} | Step: {}/{} | Training Loss: {} | Validation Loss: {}'.format(batch+1,int(i/10000)+1,5,round(running_loss,4),round(val_loss,4)))
            running_loss=0
            if(val_loss<val_benchmark):
                print ('%---Saving the model---%')
                torch.save({
                    'step':i+1,
                    'autorec_state_dict': autorec.state_dict(),
                    'optimizer_state_dict': optimizer.state_dict(),
                    'batch':batch,
                    'loss':val_loss
                    },'model.pth')
                val_benchmark=val_loss

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  import sys
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self._where(-key, value, inplace=True)


Batch: 1 | Step: 1/5 | Training Loss: 0.0408 | Validation Loss: 1.944
%---Saving the model---%
Batch: 1 | Step: 2/5 | Training Loss: 6.2235 | Validation Loss: 1.248
%---Saving the model---%


KeyboardInterrupt: 