# Importing libraries


In [1]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.nn as parallel
import torch.optim as optim
import torch.utils.data
from torch.autograd import Variable

# Importing dataset

In [2]:
movies=pd.read_csv('ml-1m/movies.dat',sep='::',header=None,engine='python',encoding='latin-1')
users=pd.read_csv('ml-1m/users.dat',sep='::',header=None,engine='python',encoding='latin-1')
ratings=pd.read_csv('ml-1m/ratings.dat',sep='::',header=None,engine='python',encoding='latin-1')

# Preparing training set and test set

In [3]:
training_set=pd.read_csv('C:/Users/DELL/ml-100k/u1.base',delimiter='\t')
training_set=np.array(training_set,dtype='int')

test_set=pd.read_csv('C:/Users/DELL/ml-100k/u1.test',delimiter='\t')
test_set=np.array(test_set,dtype='int')

# Getting the number of users and movies

In [4]:
nb_users=int(max(max(training_set[:,0]),max(test_set[:,0])))
nb_movies=int(max(max(training_set[:,1]),max(test_set[:,1])))

# Converting the data into an array with users in lines& movies in columns

In [5]:
def convert(data):
    new_data=[]
    
    for id_users in range(1,nb_users+1):
        id_movies=data[:,1][data[:,0]==id_users]
        id_ratings=data[:,1][data[:,0]==id_users]
        ratings=np.zeros(nb_movies)
        ratings[id_movies-1]=id_ratings
        new_data.append(list(ratings))
    return new_data

training_set=convert(training_set)
test_set=convert(test_set)

# Converting the data into Torchsensors

In [6]:
training_set=torch.FloatTensor(training_set)
test_set=torch.FloatTensor(test_set)

# Creating the architecture of neural network

In [8]:
class SAE(nn.Module):
    def __init__(self, ):
        super(SAE,self).__init__()
        self.fc1=nn.Linear(nb_movies,20)
        self.fc2=nn.Linear(20,10)
        self.fc3=nn.Linear(10,20)
        self.fc4=nn.Linear(20,nb_movies)
        self.activation=nn.Sigmoid()
        
    def forward(self,x):
        x=self.activation(self.fc1(x))
        x=self.activation(self.fc2(x))
        x=self.activation(self.fc3(x))
        x=self.fc4(x)
        return x
    
sae=SAE()
criterion=nn.MSELoss()
optimizer=optim.RMSprop(sae.parameters(),lr=0.01,weight_decay=0.5)

# Training the SAE

In [9]:
nb_epoch = 200
for epoch in range(1, nb_epoch + 1):
    train_loss = 0
    s = 0.
    for id_user in range(nb_users):
        input = Variable(training_set[id_user]).unsqueeze(0)
        target = input.clone()
        if torch.sum(target.data > 0) > 0:
            output = sae(input)
            target.require_grad = False
            output[target == 0] = 0
            loss = criterion(output, target)
            mean_corrector = nb_movies/float(torch.sum(target.data > 0) + 1e-10)
            loss.backward()
            train_loss += np.sqrt(loss.data*mean_corrector)
            s += 1.
            optimizer.step()
    print('epoch: '+str(epoch)+'loss: '+ str(train_loss/s))

epoch: 1loss: tensor(395.6605)
epoch: 2loss: tensor(266.8954)
epoch: 3loss: tensor(184.7113)
epoch: 4loss: tensor(143.4373)
epoch: 5loss: tensor(135.1012)
epoch: 6loss: tensor(155.7924)
epoch: 7loss: tensor(152.0426)
epoch: 8loss: tensor(132.5025)
epoch: 9loss: tensor(132.9315)
epoch: 10loss: tensor(125.2810)
epoch: 11loss: tensor(105.3850)
epoch: 12loss: tensor(78.9706)
epoch: 13loss: tensor(66.8387)
epoch: 14loss: tensor(62.1048)
epoch: 15loss: tensor(56.2652)
epoch: 16loss: tensor(51.7755)
epoch: 17loss: tensor(46.8818)
epoch: 18loss: tensor(39.8914)
epoch: 19loss: tensor(27.3421)
epoch: 20loss: tensor(23.7596)
epoch: 21loss: tensor(22.2375)
epoch: 22loss: tensor(21.3877)
epoch: 23loss: tensor(17.5207)
epoch: 24loss: tensor(12.1817)
epoch: 25loss: tensor(8.2791)
epoch: 26loss: tensor(11.2466)
epoch: 27loss: tensor(9.5856)
epoch: 28loss: tensor(6.1593)
epoch: 29loss: tensor(7.5583)
epoch: 30loss: tensor(5.4689)
epoch: 31loss: tensor(4.6712)
epoch: 32loss: tensor(3.8125)
epoch: 33loss

# Testing the SAE

In [10]:
test_loss = 0
s = 0.
for id_user in range(nb_users):
    input = Variable(training_set[id_user]).unsqueeze(0)
    target = Variable(test_set[id_user]).unsqueeze(0)
    if torch.sum(target.data > 0) > 0:
        output = sae(input)
        target.require_grad = False
        output[target == 0] = 0
        loss = criterion(output, target)
        mean_corrector = nb_movies/float(torch.sum(target.data > 0) + 1e-10)
        test_loss += np.sqrt(loss.data*mean_corrector)
        s += 1.
print('test loss: '+str(test_loss/s))

test loss: tensor(6.0803)
