Downloading the dataset


In [34]:

!unzip ml-100k.zip
!ls

Archive:  ml-100k.zip
replace ml-100k/allbut.pl? [y]es, [n]o, [A]ll, [N]one, [r]ename: n
replace ml-100k/mku.sh? [y]es, [n]o, [A]ll, [N]one, [r]ename: n
replace ml-100k/README? [y]es, [n]o, [A]ll, [N]one, [r]ename: N
ml-100k  ml-100k.zip  sample_data


Libraries

In [35]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.parallel
import torch.optim as optim
import torch.utils.data
from torch.autograd import Variable

Training set and the Test set

In [36]:
training_set = pd.read_csv('ml-100k/u1.base', delimiter = '\t')
training_set = np.array(training_set, dtype = 'int')
test_set = pd.read_csv('ml-100k/u1.test', delimiter = '\t')
test_set = np.array(test_set, dtype = 'int')

Total no of users and movies

In [37]:
nb_users = int(max(max(training_set[:, 0], ), max(test_set[:, 0])))
nb_movies = int(max(max(training_set[:, 1], ), max(test_set[:, 1])))

converting data into list of lists, users as rows and movies as columns and ratings will be the cell values

In [38]:
def convert(data):
  new_data = []
  for id_users in range(1, nb_users + 1):
    id_movies = data[:, 1] [data[:, 0] == id_users]
    id_ratings = data[:, 2] [data[:, 0] == id_users]
    ratings = np.zeros(nb_movies)
    ratings[id_movies - 1] = id_ratings
    new_data.append(list(ratings))
  return new_data
training_set = convert(training_set)
test_set = convert(test_set)

converting training_set and test_set into torch tensors

In [39]:
training_set = torch.FloatTensor(training_set)
test_set = torch.FloatTensor(test_set)

Converting ratings into binary values ie. 1==liked, 0==disliked

In [40]:
training_set[training_set == 0] = -1
training_set[training_set == 1] = 0
training_set[training_set == 2] = 0
training_set[training_set >= 3] = 1
test_set[test_set == 0] = -1
test_set[test_set == 1] = 0
test_set[test_set == 2] = 0
test_set[test_set >= 3] = 1

creating the class for RBM

In [41]:
class RBM():
  def __init__(self, nv, nh):
    self.W = torch.randn(nh, nv)
    self.a = torch.randn(1, nh)
    self.b = torch.randn(1, nv)
  def sample_h(self, x):
    wx = torch.mm(x, self.W.t())
    activation = wx + self.a.expand_as(wx)
    p_h_given_v = torch.sigmoid(activation)
    return p_h_given_v, torch.bernoulli(p_h_given_v)
  def sample_v(self, y):
    wy = torch.mm(y, self.W)
    activation = wy + self.b.expand_as(wy)
    p_v_given_h = torch.sigmoid(activation)
    return p_v_given_h, torch.bernoulli(p_v_given_h)
  def train(self, v0, vk, ph0, phk):
    self.W += (torch.mm(v0.t(), ph0) - torch.mm(vk.t(), phk)).t()
    self.b += torch.sum((v0 - vk), 0)
    self.a += torch.sum((ph0 - phk), 0)

creating the object of class

In [42]:
nv = len(training_set[0])
nh = 100
batch_size = 100
rbm = RBM(nv, nh)

Training the RBM

In [43]:
nb_epoch = 10
for epoch in range(1, nb_epoch + 1):
  train_loss = 0
  s = 0.
  for id_user in range(0, nb_users - batch_size, batch_size):
    vk = training_set[id_user : id_user + batch_size]
    v0 = training_set[id_user : id_user + batch_size]
    ph0,_ = rbm.sample_h(v0)
    for k in range(10):
      _,hk = rbm.sample_h(vk)
      _,vk = rbm.sample_v(hk)
      vk[v0<0] = v0[v0<0]
    phk,_ = rbm.sample_h(vk)
    rbm.train(v0, vk, ph0, phk)
    train_loss += torch.mean(torch.abs(v0[v0 >= 0] - vk[v0 >= 0]))
    s += 1.
  print('epoch: '+str(epoch)+' loss: '+str(train_loss/s))

epoch: 1 loss: tensor(0.3726)
epoch: 2 loss: tensor(0.2560)
epoch: 3 loss: tensor(0.2500)
epoch: 4 loss: tensor(0.2533)
epoch: 5 loss: tensor(0.2488)
epoch: 6 loss: tensor(0.2469)
epoch: 7 loss: tensor(0.2518)
epoch: 8 loss: tensor(0.2498)
epoch: 9 loss: tensor(0.2501)
epoch: 10 loss: tensor(0.2477)


Testing the RBM (how well it works)

In [44]:
test_loss = 0
s = 0.
for id_user in range(nb_users):
    v = training_set[id_user:id_user+1]
    vt = test_set[id_user:id_user+1]
    if len(vt[vt>=0]) > 0:
        _,h = rbm.sample_h(v)
        _,v = rbm.sample_v(h)
        test_loss += torch.mean(torch.abs(vt[vt>=0] - v[vt>=0]))
        s += 1.
print('test loss: '+str(test_loss/s))

test loss: tensor(0.2515)


Testing (predictions) for a particular user (1st user)

In [45]:
test_loss=0
v = training_set[1:2]
vt = test_set[1:2]
_,h = rbm.sample_h(v)
_,v = rbm.sample_v(h)


In [46]:
test_loss += torch.mean(torch.abs(vt[vt>=0] - v[vt>=0]))
print('test loss: '+str(test_loss))

test loss: tensor(0.3636)
