In [1]:
from google.colab import drive
drive.mount('/content/gdrive')

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


Importing the libraries

In [2]:
import torch
from torch import nn,optim
import numpy as np
import pandas as pd

Importing the dataset

In [3]:
movies = pd.read_csv("/content/gdrive/MyDrive/P16-AutoEncoders (1)/__MACOSX/AutoEncoders/ml-1m/movies.dat", header = None, sep = '::' , engine = 'python' , encoding = 'latin-1')
ratings = pd.read_csv("/content/gdrive/MyDrive/P16-AutoEncoders (1)/__MACOSX/AutoEncoders/ml-1m/ratings.dat", header = None, sep = '::' , engine = 'python' , encoding = 'latin-1')
users = pd.read_csv("/content/gdrive/MyDrive/P16-AutoEncoders (1)/__MACOSX/AutoEncoders/ml-1m/users.dat", header = None, sep = '::' , engine = 'python' , encoding = 'latin-1')

Preparing the training set and the test set

In [4]:
training_set = pd.read_csv("/content/gdrive/MyDrive/P16-AutoEncoders (1)/__MACOSX/AutoEncoders/ml-100k/u1.base", delimiter= '\t')
test_set = pd.read_csv("/content/gdrive/MyDrive/P16-AutoEncoders (1)/__MACOSX/AutoEncoders/ml-100k/u1.test", delimiter= '\t')

In [5]:
training_set = np.array(training_set , dtype = 'int')
test_set = np.array(test_set , dtype = 'int')

Getting the number of users and movies

In [6]:
nb_users = int(max(max(training_set[:, 0]), max(test_set[:, 0])))
nb_movies = int(max(max(training_set[:, 1]), max(test_set[:, 1])))

In [7]:
def convert(data):
  new_data = []
  for id_user in range(1, nb_users + 1):
    id_movies = data[:, 1][data[:, 0] == id_user]
    id_ratings = data[:, 2][data[:, 0] == id_user]
    ratings = np.zeros(nb_movies)
    ratings[id_movies - 1] = id_ratings
    new_data.append(list(ratings))
  return new_data

Converting the data into an array with users in lines and movies in columns


In [8]:
train = convert(training_set)
test = convert(test_set)

Converting the data into Torch tensors

In [9]:
train = torch.FloatTensor(train)
test = torch.FloatTensor(test)

Converting the ratings into binary ratings 1 (Liked) or 0 (Not Liked)

In [10]:
train[train == 0] = -1
train[train == 1] = 0
train[train == 2] = 0
train[train >= 3] = 1
test[test == 0] = -1
test[test == 1] = 0
test[test == 2] = 0
test[test >= 3] = 1

Creating the architecture of the Neural Network

In [11]:
class RBM(nn.Module):
  def __init__(self,nh, nv):
    self.W = torch.randn(nh , nv)
    self.a = torch.randn(1, nh)
    self.b = torch.randn(1, nv)

  def sample_h(self,x):
    wx = torch.mm(x , self.W.t())
    activation = wx + self.a.expand_as(wx)
    w_h_given_v = torch.sigmoid(activation)
    return w_h_given_v , torch.bernoulli(w_h_given_v)

  def sample_v(self,y):
    wy = torch.mm(y , self.W)
    activation = wy + self.b.expand_as(wy)
    w_v_given_h = torch.sigmoid(activation)
    return w_v_given_h , torch.bernoulli(w_v_given_h)

  def train(self, v0,vk, ph0,phk):
    self.W += (torch.mm(v0.t() , ph0) - torch.mm(vk.t() , phk)).t()
    self.a += torch.sum((ph0 - phk) , 0)
    self.b += torch.sum((v0-vk),0)

Training the RBM

In [12]:
nv = len(train[0])
nh = 100
rbm = RBM(nh, nv)
batch_size = 100
n_epochs = 20

for epoch in range(1, n_epochs + 1):
  train_loss = 0
  s = 0.
  for id_user in range(0, nb_users - batch_size , batch_size):
    v0 = train[id_user : id_user + batch_size]
    vk = train[id_user : id_user + batch_size]
    ph0,_ = rbm.sample_h(v0)
    for k in range(10):
      _,hk = rbm.sample_h(vk)
      _,vk = rbm.sample_v(hk)
      vk[v0 < 0] = v0[v0< 0]
    phk,_ = rbm.sample_h(vk)
    rbm.train(v0,vk,ph0,phk)
    train_loss += torch.mean(torch.abs(v0[v0 >= 0] - vk[v0 >= 0]))
    s += 1.
  print(f"Eppch :{epoch} , Train Loss :{train_loss/s}")

Eppch :1 , Train Loss :0.3482656478881836
Eppch :2 , Train Loss :0.2504951059818268
Eppch :3 , Train Loss :0.25145721435546875
Eppch :4 , Train Loss :0.24794724583625793
Eppch :5 , Train Loss :0.24993263185024261
Eppch :6 , Train Loss :0.2485201358795166
Eppch :7 , Train Loss :0.2475098818540573
Eppch :8 , Train Loss :0.24495860934257507
Eppch :9 , Train Loss :0.24499914050102234
Eppch :10 , Train Loss :0.24940729141235352
Eppch :11 , Train Loss :0.2450280785560608
Eppch :12 , Train Loss :0.24566347897052765
Eppch :13 , Train Loss :0.24629990756511688
Eppch :14 , Train Loss :0.2469441294670105
Eppch :15 , Train Loss :0.2474084496498108
Eppch :16 , Train Loss :0.24457107484340668
Eppch :17 , Train Loss :0.2425302118062973
Eppch :18 , Train Loss :0.2475060671567917
Eppch :19 , Train Loss :0.2474788874387741
Eppch :20 , Train Loss :0.24550382792949677


Testing the RBM

In [13]:
test_loss = 0
s = 0.
for id_user in range(nb_users):
  v = train[id_user : id_user + 1]
  vt = test[id_user : id_user + 1]
  ph0,_ = rbm.sample_h(vt)
  if len(vt[vt >= 0]) > 0:
    _,h = rbm.sample_h(v)
    _,v = rbm.sample_v(h)
    test_loss += torch.mean(torch.abs(vt[vt >= 0] - v[vt >= 0]))
    s += 1.
print(f"Test Loss :{test_loss/s}")

Test Loss :0.26436281204223633
