#Boltzmann Machine

##Downloading the dataset

###ML-100K

In [None]:
!wget "http://files.grouplens.org/datasets/movielens/ml-100k.zip"
!unzip ml-100k.zip
!ls

--2020-06-08 15:18:04--  http://files.grouplens.org/datasets/movielens/ml-100k.zip
Resolving files.grouplens.org (files.grouplens.org)... 128.101.65.152
Connecting to files.grouplens.org (files.grouplens.org)|128.101.65.152|:80... connected.
HTTP request sent, awaiting response... 200 OK
Length: 4924029 (4.7M) [application/zip]
Saving to: ‘ml-100k.zip’


2020-06-08 15:18:04 (15.6 MB/s) - ‘ml-100k.zip’ saved [4924029/4924029]

Archive:  ml-100k.zip
   creating: ml-100k/
  inflating: ml-100k/allbut.pl       
  inflating: ml-100k/mku.sh          
  inflating: ml-100k/README          
  inflating: ml-100k/u.data          
  inflating: ml-100k/u.genre         
  inflating: ml-100k/u.info          
  inflating: ml-100k/u.item          
  inflating: ml-100k/u.occupation    
  inflating: ml-100k/u.user          
  inflating: ml-100k/u1.base         
  inflating: ml-100k/u1.test         
  inflating: ml-100k/u2.base         
  inflating: ml-100k/u2.test         
  inflating: ml-100k/u3.base    

###ML-1M

In [None]:
!wget "http://files.grouplens.org/datasets/movielens/ml-1m.zip"
!unzip ml-1m.zip
!ls

--2020-06-08 15:18:16--  http://files.grouplens.org/datasets/movielens/ml-1m.zip
Resolving files.grouplens.org (files.grouplens.org)... 128.101.65.152
Connecting to files.grouplens.org (files.grouplens.org)|128.101.65.152|:80... connected.
HTTP request sent, awaiting response... 200 OK
Length: 5917549 (5.6M) [application/zip]
Saving to: ‘ml-1m.zip’


2020-06-08 15:18:19 (2.44 MB/s) - ‘ml-1m.zip’ saved [5917549/5917549]

Archive:  ml-1m.zip
   creating: ml-1m/
  inflating: ml-1m/movies.dat        
  inflating: ml-1m/ratings.dat       
  inflating: ml-1m/README            
  inflating: ml-1m/users.dat         
ml-100k  ml-100k.zip  ml-1m  ml-1m.zip	sample_data


##Importing the libraries

In [None]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.parallel
import torch.optim as optim
import torch.utils.data
from torch.autograd import Variable

## Importing the dataset


In [None]:
# We won't be using this dataset.
movies = pd.read_csv('ml-1m/movies.dat', sep = '::', header = None, engine = 'python', encoding = 'latin-1')
users = pd.read_csv('ml-1m/users.dat', sep = '::', header = None, engine = 'python', encoding = 'latin-1')
ratings = pd.read_csv('ml-1m/ratings.dat', sep = '::', header = None, engine = 'python', encoding = 'latin-1')

## Preparing the training set and the test set


In [None]:
training_set = pd.read_csv('ml-100k/u1.base', delimiter = '\t')
training_set = np.array(training_set, dtype = 'int')
test_set = pd.read_csv('ml-100k/u1.test', delimiter = '\t')
test_set = np.array(test_set, dtype = 'int')

## Getting the number of users and movies


In [None]:
nb_users = int(max(max(training_set[:, 0], ), max(test_set[:, 0])))
nb_movies = int(max(max(training_set[:, 1], ), max(test_set[:, 1])))

## Converting the data into an array with users in lines and movies in columns


In [None]:
def convert(data):
  new_data = []
  for id_users in range(1, nb_users + 1):
    id_movies = data[:, 1] [data[:, 0] == id_users]
    id_ratings = data[:, 2] [data[:, 0] == id_users]
    ratings = np.zeros(nb_movies)
    ratings[id_movies - 1] = id_ratings
    new_data.append(list(ratings))
  return new_data
training_set = convert(training_set)
test_set = convert(test_set)

## Converting the data into Torch tensors


In [None]:
training_set = torch.FloatTensor(training_set)
test_set = torch.FloatTensor(test_set)

## Converting the ratings into binary ratings 1 (Liked) or 0 (Not Liked)


In [None]:
training_set[training_set == 0] = -1
training_set[training_set == 1] = 0
training_set[training_set == 2] = 0
training_set[training_set >= 3] = 1
test_set[test_set == 0] = -1
test_set[test_set == 1] = 0
test_set[test_set == 2] = 0
test_set[test_set >= 3] = 1

## Creating the architecture of the Neural Network


In [None]:
class RBM(): #Restricted Boltzmann machines
  def __init__(self, nv, nh): # nh-number of hidden nodes, nv-number of visible nodes
    self.W = torch.randn(nh, nv) #Initialize weights for probabilities of size nh,nv according to normal distribution.(weight attribute)
    self.a = torch.randn(1, nh)#Probability for bias For the probability of the hidden nodes when visible nodes are given (p_h_given_v)
    self.b = torch.randn(1, nv)#Probability for bias For the probability of the visible nodes when hidden nodes are given (p_v_given_h)
  def sample_h(self, x): # x-visible nodes
    wx = torch.mm(x, self.W.t()) # product of 2 tensors W(weights) and X(visble nodes)
    activation = wx + self.a.expand_as(wx) # to make sure that the bias is added to every moni batch we add a dimension to exxpand it as wx
    p_h_given_v = torch.sigmoid(activation) #The p (probability) of the given h (hidden) node is 1 given value of v (visible node)

#Sigmoid activation fn is applied to w(weight vector)
# product x(vsivble neurons vector) +bias
    return p_h_given_v, torch.bernoulli(p_h_given_v)

  def sample_v(self, y):#method
    wy = torch.mm(y, self.W)
    activation = wy + self.b.expand_as(wy)
    p_v_given_h = torch.sigmoid(activation)
    return p_v_given_h, torch.bernoulli(p_v_given_h)

  def train(self, v0, vk, ph0, phk):
    #V0- input vector consists of the user’s ratings
    #Vk- visible nodes after obtaining the rating of all movies of one user, later we will make a loop of all users obtained after k samplings.
    # Ph0- vector of probabilities that at the 1st iteration the hidden nodes= 1 given the v0 (input vector of observations). 
    # Phk- probabilities of the hidden nodes equal to 1 after k sampling given the values of visible nodes vk
    self.W += (torch.mm(v0.t(), ph0) - torch.mm(vk.t(), phk)).t()
    self.b += torch.sum((v0 - vk), 0)
    self.a += torch.sum((ph0 - phk), 0)
    # weights(W), visible(a)/hidden(b) node biases are updated above.
nv = len(training_set[0])
nh = 100
batch_size = 100
rbm = RBM(nv, nh) # rbm object of RBM class is created.

## Training the RBM


In [None]:
nb_epoch = 10 # number of epochs
for epoch in range(1, nb_epoch + 1):
  train_loss = 0 
  s = 0. # to normalize the loss fn so we need to divide it by a counter, so initialize a counter i.e, s
  for id_user in range(0, nb_users - batch_size, batch_size): #We have to loop over all the users in batches too as the functions in the RBM class are meant for a single user.
    vk = training_set[id_user : id_user + batch_size] #Vk- input batch- output after Gibbs sampling, i.e, after k steps of random walk.
    v0 = training_set[id_user : id_user + batch_size] # (target) batch of original ratings that’s not gonna change and be compared with predicted values at last. At the beginning the input is the same as the target.
    ph0,_ = rbm.sample_h(v0)# use the sample_h method to get the prob. of hidden layers=1 given v0(input batch i.e, same as the target now)
    for k in range(10): # these are the k steps that are used for constrative divergence
      _,hk = rbm.sample_h(vk)#  sample_h method on the visible node(vk becoz v0 is our target we don’t want to change and vk will be updated later) to get the first sampled hidden nodes
      _,vk = rbm.sample_v(hk)# Vk(samples of visibke nodes) is updated by sample_v  function which will give sampled visible nodes
      #At the end of the loop, we will get the 10th sample of hidden and visible nodes 
      vk[v0<0] = v0[v0<0] # We don't wanna learn where the user haven’t rated movies i.e nodes as -1.So vk[v0<0]< ensures -1 in vk is kept same as in v0 i,.e -1

    phk,_ = rbm.sample_h(vk) #We don’t have phk variable, so we will compute phk, phk,_ we use the sample_ h fn on the last sample of the visible nodes after the 10 steps i.e vk
    rbm.train(v0, vk, ph0, phk) # update the weights, biases for a epoch using train methid of the RBM class.
    train_loss += torch.mean(torch.abs(v0[v0 >= 0] - vk[v0 >= 0])) # updating the loss, we will use the simple distance between our target(v0) and our prediction(vk)
    #[v0>=0] - to use only existing ratings in the training i.e exclude the movies which were not rated.4
    s += 1. # increment the counter
  print('epoch: '+str(epoch)+' loss: '+str(train_loss/s)) # loss is printed foe each 10 epochs


epoch: 1 loss: tensor(0.3446)
epoch: 2 loss: tensor(0.2168)
epoch: 3 loss: tensor(0.2445)
epoch: 4 loss: tensor(0.2458)
epoch: 5 loss: tensor(0.2490)
epoch: 6 loss: tensor(0.2498)
epoch: 7 loss: tensor(0.2468)
epoch: 8 loss: tensor(0.2488)
epoch: 9 loss: tensor(0.2497)
epoch: 10 loss: tensor(0.2462)


## Testing the RBM


In [None]:
test_loss = 0
s = 0.
for id_user in range(nb_users): # each user is looped over
    v = training_set[id_user:id_user+1] # input ratings
    vt = test_set[id_user:id_user+1] #target, original ratings of the user in the test set
    if len(vt[vt>=0]) > 0: # if the user have rated a single movie atleast
        _,h = rbm.sample_h(v) #hidden nodes samples
        _,v = rbm.sample_v(h) #visible nodes samples
        test_loss += torch.mean(torch.abs(vt[vt>=0] - v[vt>=0]))
        # test set contains the actual ratings for movies that havent been rated initialy in the training set.
        s += 1.
print('test loss: '+str(test_loss/s)) 

test loss: tensor(0.2359)
