In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.parallel
import torch.utils.data
from torch.autograd import Variable

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
#Import Database

In [4]:
movies=pd.read_csv('movies.dat', sep='::',header= None, engine='python',encoding='latin-1')

In [5]:
users= pd.read_csv('users.dat', sep='::',header= None, engine='python',encoding='latin-1')

In [6]:
users

Unnamed: 0,0,1,2,3,4
0,1,F,1,10,48067
1,2,M,56,16,70072
2,3,M,25,15,55117
3,4,M,45,7,02460
4,5,M,25,20,55455
...,...,...,...,...,...
6035,6036,F,25,15,32603
6036,6037,F,45,1,76006
6037,6038,F,56,1,14706
6038,6039,F,45,0,01060


In [7]:
ratings= pd.read_csv('ratings.dat', sep='::',header= None, engine='python',encoding='latin-1')

In [8]:
# Train Test Split

In [9]:
training_set= pd.read_csv('u1.base', delimiter='\t')

In [10]:
training_set= np.array(training_set, dtype='int')

In [11]:
training_set

array([[        1,         2,         3, 876893171],
       [        1,         3,         4, 878542960],
       [        1,         4,         3, 876893119],
       ...,
       [      943,      1188,         3, 888640250],
       [      943,      1228,         3, 888640275],
       [      943,      1330,         3, 888692465]])

In [12]:
test_set= pd.read_csv('u1.test', delimiter='\t')

In [13]:
test_set= np.array(test_set, dtype='int')

In [14]:
test_set

array([[        1,        10,         3, 875693118],
       [        1,        12,         5, 878542960],
       [        1,        14,         5, 874965706],
       ...,
       [      459,       934,         3, 879563639],
       [      460,        10,         3, 882912371],
       [      462,       682,         5, 886365231]])

In [15]:
# Getting the number of Users

In [16]:
nb_users= int(max(max(training_set[:,0]), max(test_set[:,0])))

In [17]:
nb_movies= int(max(max(training_set[:,1]), max(test_set[:,1])))

In [18]:
nb_users

943

In [19]:
nb_movies

1682

In [20]:
def convert(data):
  #list of lists
  new_data= []
  for id_users in range(1, nb_users+1):
    id_movies= data[:,1][data[:,0]== id_users]
    id_ratings= data[:,2][data[:,0]== id_users]
    ratings = np.zeros(nb_movies)
    ratings[id_movies - 1] = id_ratings
    new_data.append(list(ratings))
  
  return new_data 

In [21]:
training_set= convert(training_set)
test_set= convert(test_set)

In [22]:
# Convert into Tensors

In [23]:
training_set= torch.FloatTensor(training_set)

In [24]:
test_set= torch.FloatTensor(test_set)

In [25]:

training_set

tensor([[0., 3., 4.,  ..., 0., 0., 0.],
        [4., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        ...,
        [5., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 5., 0.,  ..., 0., 0., 0.]])

COnverting Training Set to Binary  

In [26]:
training_set[training_set==0]= -1
training_set[training_set==1]= 0
training_set[training_set==2]= 0
training_set[training_set>=3]= 1

In [27]:
test_set[test_set==0]= -1
test_set[test_set==1]= 0
test_set[test_set==2]= 0
test_set[test_set>=3]= 1

In [28]:
test_set

tensor([[-1., -1., -1.,  ..., -1., -1., -1.],
        [-1., -1., -1.,  ..., -1., -1., -1.],
        [-1., -1., -1.,  ..., -1., -1., -1.],
        ...,
        [-1., -1., -1.,  ..., -1., -1., -1.],
        [-1., -1., -1.,  ..., -1., -1., -1.],
        [-1., -1., -1.,  ..., -1., -1., -1.]])

In [29]:
# Architecture of NN

In [39]:
class RBM():
  def __init__(self, nv, nh):
    self.W = torch.randn(nh, nv)
    self.a = torch.randn(1, nh)
    self.b = torch.randn(1, nv)
  def sample_h(self, x):
    wx = torch.mm(x, self.W.t())
    activation = wx + self.a.expand_as(wx)
    p_h_given_v = torch.sigmoid(activation)
    return p_h_given_v, torch.bernoulli(p_h_given_v)
  def sample_v(self, y):
    wy = torch.mm(y, self.W)
    activation = wy + self.b.expand_as(wy)
    p_v_given_h = torch.sigmoid(activation)
    return p_v_given_h, torch.bernoulli(p_v_given_h)
  def train(self, v0, vk, ph0, phk):
    self.W += (torch.mm(v0.t(), ph0) - torch.mm(vk.t(), phk)).t()
    self.b += torch.sum((v0 - vk), 0)
    self.a += torch.sum((ph0 - phk), 0)
nv = len(training_set[0])
nh = 100
batch_size = 100
rbm = RBM(nv, nh)

In [40]:
batch_size

100

In [41]:
rbm= RBM(nv,nh)

In [42]:
#Training the Model

In [43]:
nb_epoch = 10
for epoch in range(1, nb_epoch + 1):
  train_loss = 0
  s = 0.
  for id_user in range(0, nb_users - batch_size, batch_size):
    vk = training_set[id_user : id_user + batch_size]
    v0 = training_set[id_user : id_user + batch_size]
    ph0,_ = rbm.sample_h(v0)
    for k in range(10):
      _,hk = rbm.sample_h(vk)
      _,vk = rbm.sample_v(hk)
      vk[v0<0] = v0[v0<0]
    phk,_ = rbm.sample_h(vk)
    rbm.train(v0, vk, ph0, phk)
    train_loss += torch.mean(torch.abs(v0[v0 >= 0] - vk[v0 >= 0]))
    s += 1.
  print('epoch: '+str(epoch)+' loss: '+str(train_loss/s))

epoch: 1 loss: tensor(0.3448)
epoch: 2 loss: tensor(0.2331)
epoch: 3 loss: tensor(0.2529)
epoch: 4 loss: tensor(0.2499)
epoch: 5 loss: tensor(0.2498)
epoch: 6 loss: tensor(0.2503)
epoch: 7 loss: tensor(0.2465)
epoch: 8 loss: tensor(0.2499)
epoch: 9 loss: tensor(0.2448)
epoch: 10 loss: tensor(0.2473)


In [44]:
#Testing The BM 

In [45]:
test_loss = 0
s = 0.
for id_user in range(nb_users):
    v = training_set[id_user:id_user+1]
    vt = test_set[id_user:id_user+1]
    if len(vt[vt>=0]) > 0:
        _,h = rbm.sample_h(v)
        _,v = rbm.sample_v(h)
        test_loss += torch.mean(torch.abs(vt[vt>=0] - v[vt>=0]))
        s += 1.
print('test loss: '+str(test_loss/s))

test loss: tensor(0.2430)
