### Gdrive Path mounting

In [1]:
from google.colab import drive
drive.mount('/content/gdrive', force_remount=True)

Mounted at /content/gdrive


Importing the libs & mod.

In [2]:
import numpy as np
import pandas as pd
import torch
from torch import nn,optim
from torch.autograd import Variable

importing the datasets

In [4]:
#We need to take the datasets one by one , it reduces the complexity!
movies = pd.read_csv('./AutoEncoders/ml-1m/movies.dat', header = None , sep = '::' , engine = 'python', encoding = 'latin-1')
ratings = pd.read_csv('./AutoEncoders/ml-1m/ratings.dat', header = None , sep = '::' , engine = 'python', encoding = 'latin-1')
users = pd.read_csv('./AutoEncoders/ml-1m/users.dat', header = None , sep = '::' , engine = 'python', encoding = 'latin-1')

 Preparing the training set and the test set

In [5]:
training_set = pd.read_csv('./AutoEncoders/ml-100k/u1.base', delimiter= '\t')
test_set = pd.read_csv('./AutoEncoders/ml-100k/u1.test', delimiter= '\t')

##Data Preprocessing

Getting the number of users and movies

In [6]:
training_set = np.array(training_set , dtype = 'int')
test_set = np.array(test_set , dtype = 'int')

In [7]:
nb_users  = int(max(max(training_set[:, 0]), max(test_set[:, 0])))
nb_movies  = int(max(max(training_set[:, 1]), max(test_set[:, 1])))

Converting the data into an array with users in lines and movies in columns

In [8]:
def convert(data):
  new_data = []
  for id_user in range(1 , nb_users + 1):
    #We specify that we just want the movies and ratigns of users(id_user)
    id_movies = data[:, 1][data[:, 0] == id_user]
    id_ratings = data[:, 2][data[:, 0] == id_user]
    ratings = np.zeros(nb_movies)
    ratings[id_movies - 1] = id_ratings
    #We specify that this is going to be a appended as a list
    new_data.append(list(ratings))
  return new_data

In [9]:
train = convert(training_set)
test = convert(test_set)

Converting the data into Torch tensors




In [10]:
train = torch.FloatTensor(train)
test = torch.FloatTensor(test)

Creating the architecture of the Neural Network

In [20]:
class SAE(nn.Module):
    def __init__(self, ):
        super(SAE, self).__init__()
        #We are taking the number  of movies as an input for the network and it'll predict the same movies as an output
        self.fc1 = nn.Linear(nb_movies, 20)
        self.fc2 = nn.Linear(20, 10)
        self.fc3 = nn.Linear(10, 20)
        self.fc4 = nn.Linear(20, nb_movies)
        self.activation = nn.Sigmoid()
    def forward(self, x):
        x = self.activation(self.fc1(x))
        x = self.activation(self.fc2(x))
        x = self.activation(self.fc3(x))
        x = self.fc4(x)
        return x


##Training the SAE

In [23]:
sae = SAE()
n_epochs = 200
lr = 1e-2
crition = nn.MSELoss()
optimizer = optim.RMSprop(sae.parameters() , lr = lr , weight_decay = 0.5)

##Training Loop
for epoch in range(1, n_epochs + 1):
  train_loss = 0
  s = 0.

  for id_user in range(nb_users):
    input = Variable(train[id_user]).unsqueeze(0)
    target = input.clone()
    if torch.sum(target.data > 0) > 0:
      output = sae(input)
      target.requires_grad = False
      output[target == 0] = 0
      loss = crition(output,target)
      mean_corrector = nb_movies / float(torch.sum(target.data > 0) + 1e-10) ##We want to follow the min max rule
      loss.backward()
      train_loss += np.sqrt(loss.data * mean_corrector)
      s += 1.
      optimizer.step()
  print('Epoch : ' + str(epoch) + '|| Train Loss : ' + str(train_loss/s))

Epoch : 1|| Train Loss : tensor(1.7714)
Epoch : 2|| Train Loss : tensor(1.0966)
Epoch : 3|| Train Loss : tensor(1.0536)
Epoch : 4|| Train Loss : tensor(1.0384)
Epoch : 5|| Train Loss : tensor(1.0309)
Epoch : 6|| Train Loss : tensor(1.0266)
Epoch : 7|| Train Loss : tensor(1.0239)
Epoch : 8|| Train Loss : tensor(1.0218)
Epoch : 9|| Train Loss : tensor(1.0211)
Epoch : 10|| Train Loss : tensor(1.0197)
Epoch : 11|| Train Loss : tensor(1.0188)
Epoch : 12|| Train Loss : tensor(1.0185)
Epoch : 13|| Train Loss : tensor(1.0178)
Epoch : 14|| Train Loss : tensor(1.0176)
Epoch : 15|| Train Loss : tensor(1.0171)
Epoch : 16|| Train Loss : tensor(1.0170)
Epoch : 17|| Train Loss : tensor(1.0165)
Epoch : 18|| Train Loss : tensor(1.0166)
Epoch : 19|| Train Loss : tensor(1.0165)
Epoch : 20|| Train Loss : tensor(1.0161)
Epoch : 21|| Train Loss : tensor(1.0161)
Epoch : 22|| Train Loss : tensor(1.0159)
Epoch : 23|| Train Loss : tensor(1.0158)
Epoch : 24|| Train Loss : tensor(1.0157)
Epoch : 25|| Train Loss :

##Testing the *SAE*

In [25]:
test_loss = 0
s = 0.

for id_user in range(nb_users):
  input = Variable(train[id_user]).unsqueeze(0)
  target = Variable(test[id_user]).unsqueeze(0)
  if torch.sum(target.data > 0)> 0:
    output = sae(input)
    target.required_grad = False
    output[target == 0] = 0
    loss = crition(output,target)
    mean_corrector = nb_movies / float(torch.sum(target.data > 0) + 1e-10)
    test_loss += np.sqrt(loss.data * mean_corrector)
    s += 1.

print('Test Loss : ' + str(test_loss/s))


##SDC

Test Loss : tensor(0.9561)
