<a href="https://colab.research.google.com/github/HarshMartinTopno/ReCom/blob/main/RECOM.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [50]:
# importing the libraries
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.parallel
import torch.optim as optim
import torch.utils.data
from torch.autograd import Variable

In [51]:
# importing the dataset
movies = pd.read_csv('ml-1m/movies.dat' , sep = '::', header = None, encoding = 'latin-1' )
users = pd.read_csv('ml-1m/users.dat', sep = '::', header = None, encoding = 'latin-1')
ratings = pd.read_csv('ml-1m/ratings.dat', sep = '::', header = None, encoding = 'latin-1')

  movies = pd.read_csv('ml-1m/movies.dat' , sep = '::', header = None, encoding = 'latin-1' )
  users = pd.read_csv('ml-1m/users.dat', sep = '::', header = None, encoding = 'latin-1')
  ratings = pd.read_csv('ml-1m/ratings.dat', sep = '::', header = None, encoding = 'latin-1')


In [52]:
# Preparing the training set and the test set
training_set = pd.read_csv('ml-100k/u1.base', delimiter = '\t', header = None) # sep and delimiter are the same thing
training_set = np.array(training_set, dtype = 'int' )

test_set = pd.read_csv('ml-100k/u1.test', delimiter = '\t', header = None)
test_set = np.array(test_set, dtype = 'int')

In [53]:
# Getting the number of users and movies
nb_users = int(max(max(training_set[:,0]),max(test_set[:,0])))
nb_movies = int(max(max(training_set[:,1]), max(test_set[:,1])))

In [54]:
# Creating an array where the lines are users and the columns are features
def convert(data):
  new_data = []
  for id_users in range(1, nb_users + 1):
   id_movies = data[: , 1][data[: , 0] == id_users]
   id_ratings = data[: , 2][data[: , 0] == id_users]
   ratings = np.zeros(nb_movies)
   ratings[id_movies - 1] = id_ratings
   new_data.append(list(ratings))
  return new_data

training_set = convert(training_set)
test_set = convert(test_set)

In [55]:
# Converting data into tensors
training_set = torch.FloatTensor(training_set)
test_set = torch.FloatTensor(test_set)

In [56]:
# Converting the ratings (1 - 5) into binary rating (0 & 1), where 1 : Liked and 0 : Not Liked
training_set[training_set == 0] = -1
training_set[training_set == 1 ] = 0 # Torch doesn't support 'or' therefore we need to individually convert all ratings
training_set[training_set == 2 ] = 0
training_set[training_set >= 3] = 1

test_set[test_set == 0] = -1
test_set[test_set == 1 ] = 0
test_set[test_set == 2 ] = 0
test_set[test_set >= 3] = 1

In [57]:
# Creating the Architecture of Neural Network (RBM - Restricted Boltazman Machine)
class RBM():

  def __init__(self, nv, nh): # nv : number of visible nodes, nh : number of hidden nodes
  # parameters that will be optimised during the training of the RBM
    self.W = torch.randn(nh,nv) # inintialises all weights randomly
    self.a = torch.randn(1, nh) # Bias for hidden nodes
    self.b = torch.randn(1, nv) # Bias for visible nodes

  def sample_h(self, x): # sample the probabilties of the hidden nodes
    wx = torch.mm(x, self.W.t())
    activation = wx + self.a.expand_as(wx)
    p_h_given_v = torch.sigmoid(activation)
    return p_h_given_v, torch.bernoulli(p_h_given_v)

  def sample_v(self, y): # sample the probabilties of the visible nodes
    wy = torch.mm(y, self.W)
    activation = wy + self.b.expand_as(wy)
    p_h_given_h = torch.sigmoid(activation)
    return p_h_given_h, torch.bernoulli(p_h_given_h)
  def train(self, v0, vk, ph0, phk):
    self.W += torch.mm(v0.t(), ph0).t() - torch.mm(vk.t(), phk).t()
    self.b += torch.sum((v0 - vk), 0)
    self.a += torch.sum((ph0 - phk), 0)


In [62]:
nv = len(training_set[0])
nh = 100 # our choice (tuneable)
batch = 128 # tuneable

In [63]:
model = RBM(nv, nh)

In [64]:
nb_epoch = 20
for epoch in range(1, nb_epoch + 1):
  train_loss = 0
  s = 0.
  for id_users in range(0, nb_users - batch, batch):
    vk = training_set[id_users : id_users + batch]
    v0 = training_set[id_users : id_users + batch]
    ph0, _ = model.sample_h(v0)
    for k in range(10): # number of steps for random walk in contrastive divegence
      _, hk = model.sample_h(vk)  #hidden nodes obtained at kth step
      _, vk = model.sample_v(hk)
      vk[v0 < 0] = v0[v0 < 0]
    phk, _ = model.sample_h(vk)

    model.train(v0, vk, ph0, phk)
    train_loss += torch.mean(torch.abs(v0[v0 >= 0] - vk[v0 >= 0]))
    s += 1.

  print(f"Epoch: {epoch}, Loss: {train_loss/s}")

Epoch: 1, Loss: 0.36805853247642517
Epoch: 2, Loss: 0.2535548210144043
Epoch: 3, Loss: 0.2352680265903473
Epoch: 4, Loss: 0.2469603270292282
Epoch: 5, Loss: 0.24955618381500244
Epoch: 6, Loss: 0.2467261403799057
Epoch: 7, Loss: 0.24906863272190094
Epoch: 8, Loss: 0.2470588982105255
Epoch: 9, Loss: 0.24538518488407135
Epoch: 10, Loss: 0.24696490168571472
Epoch: 11, Loss: 0.2431633621454239
Epoch: 12, Loss: 0.24498772621154785
Epoch: 13, Loss: 0.24479785561561584
Epoch: 14, Loss: 0.2453276365995407
Epoch: 15, Loss: 0.24555285274982452
Epoch: 16, Loss: 0.24400050938129425
Epoch: 17, Loss: 0.24807286262512207
Epoch: 18, Loss: 0.2414771467447281
Epoch: 19, Loss: 0.24636368453502655
Epoch: 20, Loss: 0.24385391175746918
