<a href="https://colab.research.google.com/github/GeniGaus/DeepLearningA-Z/blob/master/RecommenderSystem_Autoencoder.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Autoencoder
----


In [0]:
# Install a Drive FUSE wrapper.
# https://github.com/astrada/google-drive-ocamlfuse
!apt-get install -y -qq software-properties-common python-software-properties module-init-tools
!add-apt-repository -y ppa:alessandro-strada/ppa 2>&1 > /dev/null
!apt-get update -qq 2>&1 > /dev/null
!apt-get -y install -qq google-drive-ocamlfuse fuse

E: Package 'python-software-properties' has no installation candidate
Selecting previously unselected package libfuse2:amd64.
(Reading database ... 22278 files and directories currently installed.)
Preparing to unpack .../libfuse2_2.9.7-1ubuntu1_amd64.deb ...
Unpacking libfuse2:amd64 (2.9.7-1ubuntu1) ...
Selecting previously unselected package fuse.
Preparing to unpack .../fuse_2.9.7-1ubuntu1_amd64.deb ...
Unpacking fuse (2.9.7-1ubuntu1) ...
Selecting previously unselected package google-drive-ocamlfuse.
Preparing to unpack .../google-drive-ocamlfuse_0.7.0-0ubuntu1~ubuntu18.04.1_amd64.deb ...
Unpacking google-drive-ocamlfuse (0.7.0-0ubuntu1~ubuntu18.04.1) ...
Setting up libfuse2:amd64 (2.9.7-1ubuntu1) ...
Processing triggers for libc-bin (2.27-3ubuntu1) ...
Setting up fuse (2.9.7-1ubuntu1) ...
Setting up google-drive-ocamlfuse (0.7.0-0ubuntu1~ubuntu18.04.1) ...


In [0]:
# Generate auth tokens for Colab
from google.colab import auth
auth.authenticate_user()

In [0]:
# Generate creds for the Drive FUSE library.
from oauth2client.client import GoogleCredentials
creds = GoogleCredentials.get_application_default()
import getpass
!google-drive-ocamlfuse -headless -id={creds.client_id} -secret={creds.client_secret} < /dev/null 2>&1 | grep URL
vcode = getpass.getpass()
!echo {vcode} | google-drive-ocamlfuse -headless -id={creds.client_id} -secret={creds.client_secret}

Please, open the following URL in a web browser: https://accounts.google.com/o/oauth2/auth?client_id=32555940559.apps.googleusercontent.com&redirect_uri=urn%3Aietf%3Awg%3Aoauth%3A2.0%3Aoob&scope=https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive&response_type=code&access_type=offline&approval_prompt=force
··········
Please, open the following URL in a web browser: https://accounts.google.com/o/oauth2/auth?client_id=32555940559.apps.googleusercontent.com&redirect_uri=urn%3Aietf%3Awg%3Aoauth%3A2.0%3Aoob&scope=https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive&response_type=code&access_type=offline&approval_prompt=force
Please enter the verification code: Access token retrieved correctly.


In [0]:
# Create a directory and mount Google Drive using that directory.
!mkdir -p drive
!google-drive-ocamlfuse drive

In [0]:
## Install Pytorch

# http://pytorch.org/
from os import path
from wheel.pep425tags import get_abbr_impl, get_impl_ver, get_abi_tag
platform = '{}{}-{}'.format(get_abbr_impl(), get_impl_ver(), get_abi_tag())

accelerator = 'cu80' if path.exists('/opt/bin/nvidia-smi') else 'cpu'

!pip install -q http://download.pytorch.org/whl/{accelerator}/torch-0.4.0-{platform}-linux_x86_64.whl torchvision

tcmalloc: large alloc 1073750016 bytes == 0x5b424000 @  0x7f0ee33f92a4 0x594e17 0x626104 0x51190a 0x4f5277 0x510c78 0x5119bd 0x4f5277 0x4f3338 0x510fb0 0x5119bd 0x4f5277 0x4f3338 0x510fb0 0x5119bd 0x4f5277 0x4f3338 0x510fb0 0x5119bd 0x4f6070 0x510c78 0x5119bd 0x4f5277 0x4f3338 0x510fb0 0x5119bd 0x4f6070 0x4f3338 0x510fb0 0x5119bd 0x4f6070


In [0]:
import os
os.chdir('drive/BoltzmannMachine')

###  Building autoencoder model using Pytorch
----




#### Data preprocessing


In [0]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.parallel
import torch.optim as optim
import torch.utils.data
from torch.autograd import Variable

In [0]:
# importing dataset
movies = pd.read_csv('ml-1m/movies.dat', sep='::', engine='python', encoding='latin-1')
users = pd.read_csv('ml-1m/users.dat', sep='::', engine='python', encoding='latin-1')
ratings = pd.read_csv('ml-1m/ratings.dat', sep='::', engine='python', encoding='latin-1')

In [0]:
# forming training and test set
training_set = pd.read_csv('ml-100k/u1.base', delimiter='\t')
training_set = np.array(training_set, dtype='int')
test_set = pd.read_csv('ml-100k/u1.test', delimiter='\t')
test_set = np.array(test_set, dtype='int')

In [0]:
# total no. of users and movies
nb_users = int(max(max(training_set[:, 0]), max(test_set[:, 0])))
nb_movies = int(max(max(training_set[:, 1]), max(test_set[:, 1])))

In [0]:
# Create training and test matrices where each line is user, each column is movie and each cell contains the rating which that user gave for the movie.
# If the user gave no rating, then 0 is placed in that cell.
# This will create the input structure which the Boltzmann machine expects,i.e. observations in lines and features in columns.

'''rating_matrix = np.zeros((nb_users, nb_movies), dtype='int')
full_set = np.concatenate((training_set, test_set), axis=0)
for i in range(len(full_set)):
  rating_matrix[full_set[i][0]][full_set[i][1]] = full_set[i][2]
'''

def convert(data):
  converted_data = []
  for user_id in range(nb_users + 1):
    movies_ids = data[:, 1][data[:, 0] == user_id]
    ratings_data = data[:, 2][data[:, 0] == user_id]
    ratings = np.zeros((nb_movies))
    ratings[movies_ids - 1] = ratings_data
    converted_data.append(ratings)
  
  return converted_data
 
training_set = convert(training_set)
test_set = convert(test_set)

training_set = torch.FloatTensor(training_set)
test_set = torch.FloatTensor(test_set)

### Model Creation

In [0]:
class SAE(nn.Module):
  
  def __init__(self,):
    super(SAE, self).__init__()
    self.fc1 = nn.Linear(nb_movies, 128)
    self.fc2 = nn.Linear(128, 64)
    self.fc3 = nn.Linear(64, 32)
    self.fc4 = nn.Linear(32, 64)
    self.fc5 = nn.Linear(64, 128)
    self.fc6 = nn.Linear(128, nb_movies)
    self.activation = nn.Sigmoid()
 
  def forward(self, x):
    x = self.activation(self.fc1(x))
    x = self.activation(self.fc2(x))
    x = self.activation(self.fc3(x))
    x = self.activation(self.fc4(x))
    x = self.activation(self.fc5(x))
    x = self.fc6(x)
    return x

In [0]:
sae = SAE()
criterion = nn.MSELoss()
optimizer = optim.RMSprop(sae.parameters(), lr = 0.01, weight_decay = 0.5)

In [0]:
# training the model

nb_epochs = 200
for epoch in range(1, nb_epochs + 1):
  training_loss = 0.
  s = 0.
  for id_user in range(nb_users):
    input = Variable(training_set[id_user]).unsqueeze(0)
    target = input.clone()
    if torch.sum(target.data > 0) > 0:
      output = sae(input)
      output[target == 0] = 0
      target.require_grad = False
      loss = criterion(output, target)
      mean_corrector = nb_movies / float(torch.sum(target.data > 0) + 1e-10)
      loss.backward()
      training_loss += np.sqrt(loss.data[0] * mean_corrector)
      s += 1.
      optimizer.step()
  print('epoch: '+str(epoch)+' loss:'+str(training_loss/s))

  app.launch_new_instance()


epoch: 1 loss:tensor(1.4286)
epoch: 2 loss:tensor(1.2000)
epoch: 3 loss:tensor(1.1649)
epoch: 4 loss:tensor(1.1187)
epoch: 5 loss:tensor(1.0916)
epoch: 6 loss:tensor(1.0831)
epoch: 7 loss:tensor(1.0777)
epoch: 8 loss:tensor(1.0735)
epoch: 9 loss:tensor(1.0710)
epoch: 10 loss:tensor(1.0675)
epoch: 11 loss:tensor(1.0655)
epoch: 12 loss:tensor(1.0647)
epoch: 13 loss:tensor(1.0616)
epoch: 14 loss:tensor(1.0591)
epoch: 15 loss:tensor(1.0581)
epoch: 16 loss:tensor(1.0562)
epoch: 17 loss:tensor(1.0537)
epoch: 18 loss:tensor(1.0527)
epoch: 19 loss:tensor(1.0494)
epoch: 20 loss:tensor(1.0487)
epoch: 21 loss:tensor(1.0470)
epoch: 22 loss:tensor(1.0451)
epoch: 23 loss:tensor(1.0444)
epoch: 24 loss:tensor(1.0430)
epoch: 25 loss:tensor(1.0417)
epoch: 26 loss:tensor(1.0419)
epoch: 27 loss:tensor(1.0417)
epoch: 28 loss:tensor(1.0392)
epoch: 29 loss:tensor(1.0391)
epoch: 30 loss:tensor(1.0385)
epoch: 31 loss:tensor(1.0375)
epoch: 32 loss:tensor(1.0362)
epoch: 33 loss:tensor(1.0362)
epoch: 34 loss:tens

In [0]:
# testing the model

test_loss = 0.
s = 0.
with torch.no_grad():
  for id_user in range(nb_users):
    input = Variable(training_set[id_user]).unsqueeze(0)
    target = Variable(test_set[id_user]).unsqueeze(0)
    if torch.sum(target.data > 0) > 0:
      output = sae(input)
      output[target == 0] = 0
      loss = criterion(output, target)
      mean_corrector = nb_movies / float(torch.sum(target.data > 0) + 1e-10)
      test_loss += np.sqrt(loss.data[0] * mean_corrector)
      s += 1.
print('test loss:'+str(test_loss/s))

  del sys.path[0]


test loss:tensor(0.9891)
