In [115]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [116]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.parallel
import torch.utils.data
from torch.autograd import variable

In [117]:
movies = pd.read_csv('/content/drive/MyDrive/Deep learning/P16-Boltzmann-Machines/Boltzmann_Machines/ml-1m/movies.dat', sep='::', engine='python', encoding='latin-1')
users = pd.read_csv('/content/drive/MyDrive/Deep learning/P16-Boltzmann-Machines/Boltzmann_Machines/ml-1m/users.dat', sep='::',engine='python', encoding='latin-1')
ratings = pd.read_csv('/content/drive/MyDrive/Deep learning/P16-Boltzmann-Machines/Boltzmann_Machines/ml-1m/ratings.dat', sep='::',engine='python', encoding='latin-1')

In [118]:
training_set = pd.read_csv('/content/drive/MyDrive/Deep learning/P16-Boltzmann-Machines/Boltzmann_Machines/ml-100k/u1.base', delimiter='\t')
training_set = np.array(training_set, dtype = 'int')
test_set = pd.read_csv('/content/drive/MyDrive/Deep learning/P16-Boltzmann-Machines/Boltzmann_Machines/ml-100k/u1.test', delimiter = '\t')
test_set = np.array(test_set, dtype = 'int')

In [119]:
nb_users = int(max(max(training_set[:, 0]), max(test_set[:, 0])))
nb_movies = int(max(max(training_set[:, 1]), max(test_set[:, 1])))
print(f"The no of users are {nb_users}")
print(f"The number of movies are {nb_movies}")

The no of users are 943
The number of movies are 1682


Convert the data into list of lists..every row contains user and every column has movies --

In [120]:
def convert(data):
    new_data = []
    for id_users in range(1, nb_users + 1):
        id_movies = data[:,1][data[:,0] == id_users]
        id_ratings = data[:,2][data[:,0] == id_users]
        ratings = np.zeros(nb_movies)
        ratings[id_movies - 1] = id_ratings
        new_data.append(list(ratings))
    return new_data


  

In [121]:
training_set = convert(training_set)
test_set = convert(test_set)

In [122]:
print(training_set[0])
print(test_set[0])

# 0 means users has not rated this movie

[0.0, 3.0, 4.0, 3.0, 3.0, 0.0, 4.0, 1.0, 5.0, 0.0, 2.0, 0.0, 5.0, 0.0, 5.0, 5.0, 0.0, 4.0, 5.0, 0.0, 1.0, 4.0, 0.0, 0.0, 4.0, 3.0, 0.0, 4.0, 1.0, 3.0, 0.0, 5.0, 0.0, 2.0, 1.0, 0.0, 2.0, 3.0, 0.0, 3.0, 2.0, 5.0, 4.0, 0.0, 5.0, 4.0, 0.0, 5.0, 0.0, 5.0, 0.0, 4.0, 0.0, 0.0, 5.0, 0.0, 5.0, 4.0, 5.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 4.0, 0.0, 4.0, 0.0, 0.0, 3.0, 0.0, 0.0, 0.0, 4.0, 0.0, 4.0, 0.0, 4.0, 0.0, 0.0, 0.0, 3.0, 0.0, 0.0, 0.0, 5.0, 4.0, 5.0, 0.0, 0.0, 0.0, 5.0, 2.0, 4.0, 0.0, 0.0, 0.0, 3.0, 0.0, 2.0, 0.0, 0.0, 0.0, 2.0, 4.0, 0.0, 0.0, 5.0, 1.0, 5.0, 0.0, 0.0, 0.0, 5.0, 3.0, 0.0, 0.0, 5.0, 0.0, 0.0, 3.0, 4.0, 5.0, 0.0, 2.0, 5.0, 0.0, 0.0, 0.0, 1.0, 0.0, 4.0, 0.0, 4.0, 3.0, 5.0, 1.0, 3.0, 0.0, 3.0, 2.0, 0.0, 4.0, 0.0, 4.0, 3.0, 0.0, 2.0, 0.0, 0.0, 5.0, 3.0, 0.0, 0.0, 4.0, 0.0, 3.0, 0.0, 0.0, 0.0, 4.0, 0.0, 0.0, 5.0, 5.0, 2.0, 5.0, 5.0, 0.0, 0.0, 5.0, 5.0, 0.0, 0.0, 5.0, 0.0, 5.0, 3.0, 0.0, 5.0, 4.0, 0.0, 0.0, 0.0, 0.0, 4.0, 0.0, 0.0, 0.0, 5.0, 4.0, 0.0, 4.0, 5.0, 0.0, 5.0, 5.0, 4.0, 0.0,

Convert the data into torch tensors using FloatTensors

In [123]:
training_set = torch.FloatTensor(training_set)
test_set = torch.FloatTensor(test_set)

Convert the ranking into binary - 0 (not liked) and 1 (liked)

In [124]:
training_set[training_set == 0] = -1
training_set[training_set == 1] = 0
training_set[training_set == 2] = 0
training_set[training_set >= 3] = 1
test_set[test_set == 0] = -1
test_set[test_set == 1] = 0
test_set[test_set == 2] = 0
test_set[test_set >= 3] = 1
print(training_set[0])
print(test_set[0])
training_set.size()

tensor([-1.,  1.,  1.,  ..., -1., -1., -1.])
tensor([-1., -1., -1.,  ..., -1., -1., -1.])


torch.Size([943, 1682])

create an architecture for RBM

In [174]:
class RBM():
  def __init__(self, nv, nh):
      self.W = torch.randn(nh,nv)  # rand normal distributed weights - mean = 0  var =1
      self.a = torch.randn(1, nh) # bias for hidden nodes , one bias per node..we need 2D input, 1st dim = batch
      self.b = torch.randn(1,nv) # bias for visible nodes

  def sample_h(self,x):
    wx = torch.mm(x, self.W.t())   # mm is used for product of two tensors - x is visible nodes, t() is transpose
    activation = wx + self.a.expand_as(wx)  # becasue a is in batch, we need expand_as() , it os actually wx + bias
    p_h_given_v = torch.sigmoid(activation) # hidden node is activated given visible node
    return p_h_given_v, torch.bernoulli(p_h_given_v) 

  def sample_v(self, y):
    wy = torch.mm(y, self.W)
    activation = wy + self.b.expand_as(wy)
    p_v_given_h = torch.sigmoid(activation)
    return p_v_given_h, torch.bernoulli(p_v_given_h)   

  def train(self, v0, vk, ph0, phk):
    self.W += (torch.mm(v0.t(), ph0) - torch.mm(vk.t(), phk)).t()
    self.b += torch.sum((v0- vk), 0)
    self.a += torch.sum((ph0 - phk), 0)





In [173]:
W = torch.randn(nh,nv)
g=torch.mm(v0.t(), ph0) - torch.mm(vk.t(), phk)
g.size()


torch.Size([1682, 100])

create first RBM object 

In [175]:
nv = len(training_set[0])
nh = 100
batch_size = 100
rbm = RBM(nv,nh)


Train the rbm object created

In [180]:
nb_epoch = 10
for epoch in range(1, nb_epoch+1):
    train_loss = 0
    s = 0.
    for id_user in range(0, nb_users -  batch_size, 100):
        vk = training_set[id_user: id_user+batch_size]
        v0 = training_set[id_user: id_user+batch_size]
        ph0,_ = rbm.sample_h(v0)
        for k in range(10):
            _, hk = rbm.sample_h(vk)
            _, vk = rbm.sample_v(hk)
            vk[v0<0] = v0[v0<0]
        phk, _ = rbm.sample_h(vk)
        rbm.train(v0, vk, ph0, phk)
        train_loss += torch.mean(torch.abs(v0[v0>0]-vk[v0>0]))
        s += 1
    print('epoch: '+str(epoch)+'loss: '+str(train_loss/s))   

 

epoch: 1loss: tensor(0.1504)
epoch: 2loss: tensor(0.1487)
epoch: 3loss: tensor(0.1439)
epoch: 4loss: tensor(0.1497)
epoch: 5loss: tensor(0.1467)
epoch: 6loss: tensor(0.1492)
epoch: 7loss: tensor(0.1428)
epoch: 8loss: tensor(0.1459)
epoch: 9loss: tensor(0.1488)
epoch: 10loss: tensor(0.1458)


In [182]:
test_loss = 0
s = 0.
for id_user in range(0, nb_users):
    v = training_set[id_user: id_user+1]
    vt = test_set[id_user: id_user+1]
    if len(vt[vt>=0])>0:
        _, h = rbm.sample_h(v_input)
        _, v_input = rbm.sample_v(h)
        test_loss += torch.mean(torch.abs(v_target[v_target>0]-
                                          v_input[v_target>0])) 
        s += 1
print('test loss: ' +str(test_loss/s))

test loss: tensor(0.0004)
