In [None]:
#lets import drive
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
#lets import the necessary libraries
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.parallel
import torch.optim as optim
import torch.utils.data
from torch.autograd import Variable

In [None]:
#lets import the dataset
movies=pd.read_csv('/content/drive/MyDrive/Movie_Recommendation/ml-1m/movies.dat',sep='::',engine='python',encoding='latin-1',header=None)
users=pd.read_csv('/content/drive/MyDrive/Movie_Recommendation/ml-1m/users.dat',sep='::',engine='python',encoding='latin-1',header=None)
ratings=pd.read_csv('/content/drive/MyDrive/Movie_Recommendation/ml-1m/ratings.dat',sep='::',engine='python',encoding='latin-1',header=None)

In [None]:
#lets import training and testing dataset
training_set=pd.read_csv('/content/drive/MyDrive/Movie_Recommendation/ml-100k/u1.base',delimiter='\t')
testing_set=pd.read_csv('/content/drive/MyDrive/Movie_Recommendation/ml-100k/u1.test',delimiter='\t')

In [None]:
training_set=np.array(training_set,dtype='int')
testing_set=np.array(testing_set,dtype='int')


In [None]:
#to find the number of users and movies
nb_users=int(max(max(training_set[:,0]),max(testing_set[:,0])))
nb_movies=int(max(max(training_set[:,1]),max(testing_set[:,0])))


In [None]:
nb_users

943

In [None]:
nb_movies

1682

In [None]:
#lets create a function
def convert(data):
  new_data=[]
  for id_users in range(1,nb_users + 1):
    id_movies=data[:,1][data[:,0] == id_users]
    id_ratings=data[:,2][data[:,0] == id_users]
    rat=np.zeros(nb_movies)
    rat[id_movies-1]=id_ratings
    new_data.append(list(rat))
  return new_data


In [None]:
training_set=convert(training_set)
testing_set=convert(testing_set)

In [None]:
#converting the data into torch tensors
training_set=torch.FloatTensor(training_set)
testing_set=torch.FloatTensor(testing_set)

In [None]:
#conver the ratings into binary form 1 for liked and 0 for not liked
training_set[training_set==0]=-1
training_set[training_set==1]=0
training_set[training_set==2]=0
training_set[training_set>=3]=1

testing_set[testing_set==0]=-1
testing_set[testing_set==1]=0
testing_set[testing_set==2]=0
testing_set[testing_set>=3]=1

In [None]:
#lets initialize the RBM model
class RBM():
  def __init__(self, nv, nh):
    self.W = torch.randn(nh, nv)
    self.a = torch.randn(1, nh)
    self.b = torch.randn(1, nv)

  def sample_h(self, x):
    ## assigning the weights to the neuron
    wx = torch.mm(x, self.W.t())
    ## creating an activation function wher we have weights along with hidden neurons
    activation = wx + self.a.expand_as(wx)
    # now we need to find the probability of activation of hidden neuron given visible note
    p_h_given_v = torch.sigmoid(activation)
    #for the activation of hidden neuron we need to provide the sampling also which is bernoulli sampling
    return p_h_given_v, torch.bernoulli(p_h_given_v)

  def sample_v(self, y):
    ## assigning the weights to the neuron
    wy = torch.mm(y, self.W)
    ## creating an activation function wher we have weights along with visible neurons
    activation = wy + self.b.expand_as(wy)
    # now we need to find the probability of activation of visible neuron given hidden note
    p_v_given_h = torch.sigmoid(activation)
    #for the activation of visible neuron we need to provide the sampling also which is bernoulli sampling
    return p_v_given_h, torch.bernoulli(p_v_given_h)

  def train(self, v0, vk, ph0, phk):
    self.W += (torch.mm(v0.t(), ph0) - torch.mm(vk.t(), phk)).t()
    self.b += torch.sum((v0 - vk), 0)
    self.a += torch.sum((ph0 - phk), 0)







In [None]:
#lets start training the RBM
nv=len(training_set[0])
nh=100
batch_size=100
rbm=RBM(nv,nh)

In [None]:
#now lets start training our model
nb_epoch=10
for epoch in range(1,nb_epoch + 1):
  train_loss=0
  s=0.
  for id_user in range(0,nb_users-batch_size,batch_size):
    #lets take the input vector
    vk=training_set[id_user:id_user+batch_size]
    #lets take the target vector
    v0=training_set[id_user:id_user+batch_size]
    #lets take the initial probability of hidden node
    ph0,_ =rbm.sample_h(v0)
    #now lets start with gibbs sampling based on the number of epoches
    for k in range(10):
      #update the weights of the hidden node first
      _,hk =rbm.sample_h(vk)
      #now update the weigths of the visible node
      _,vk=rbm.sample_v(hk)
      #now remove the ratings which contain the value -1
      vk[v0<0] = v0[v0<0]
    #now find the probability after updating the weight
    phk,_ =rbm.sample_h(vk)
    #lets train the data
    rbm.train(v0,vk,ph0,phk)
    #the losses need to be normalised
    train_loss+=torch.mean(torch.abs(v0[v0>=0]-vk[v0>=0]))
    s+=1.

  #lets print the number of epochs and losses for each epoch here trainloss/s for normalised value
  print(f'epoch: {epoch} loss: {train_loss/s}')




epoch: 1 loss: 0.3574027419090271
epoch: 2 loss: 0.2478228658437729
epoch: 3 loss: 0.2501685619354248
epoch: 4 loss: 0.24915796518325806
epoch: 5 loss: 0.2499580681324005
epoch: 6 loss: 0.2475331872701645
epoch: 7 loss: 0.24922235310077667
epoch: 8 loss: 0.24917981028556824
epoch: 9 loss: 0.24557822942733765
epoch: 10 loss: 0.24687665700912476


In [None]:
#Testing the RBM
test_loss=0
s=0.
for id_user in range(nb_users):
  #so here the v should be training set as this it the input so training data needed to activate neurons
  v=training_set[id_user:id_user+1]
  vt=testing_set[id_user:id_user+1]
  if(len(vt[vt>=0]))>0:
    _,h=rbm.sample_h(v)
    _,v=rbm.sample_v(h)
    test_loss += torch.mean(torch.abs(vt[vt>=0]-v[vt>=0]))
    s += 1.
print(f'test loss: {(test_loss/s)}')


test loss: 0.2466803789138794
