In [1]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.parallel
import torch.optim as optim
import torch.utils.data
from torch.autograd import Variable

In [2]:
training_set = pd.read_csv('ml-100k/u1.base', delimiter = '\t')
training_set = np.array(training_set, dtype = 'int')
test_set = pd.read_csv('ml-100k/u1.test', delimiter = '\t')
test_set = np.array(test_set, dtype = 'int')

In [3]:
n_users=int(max(max(training_set[:,0]),max(test_set[:,0])))
n_movies=int(max(max(training_set[:,1]),max(test_set[:,1])))

In [4]:
def convert(data):
    new_data=[]
    for id_user in range(1,n_users+1):
        id_movies=data[:,1][data[:,0]==id_user]
        id_rating=data[:,2][data[:,0]==id_user]
        ratings=np.zeros(n_movies)
        ratings[id_movies-1] = id_rating
        new_data.append(list(ratings))
    return new_data

In [5]:
training_set = convert(training_set)
test_set = convert(test_set)

In [6]:
training_set=torch.FloatTensor(training_set)
test_set=torch.FloatTensor(test_set)

In [7]:
training_set


    0     3     4  ...      0     0     0
    4     0     0  ...      0     0     0
    0     0     0  ...      0     0     0
       ...          ⋱          ...       
    5     0     0  ...      0     0     0
    0     0     0  ...      0     0     0
    0     5     0  ...      0     0     0
[torch.FloatTensor of size 943x1682]

In [8]:
training_set[training_set==0]=-1
training_set[training_set==1]=0
training_set[training_set==2]=0
training_set[training_set >= 3] = 1
test_set[test_set == 0] = -1
test_set[test_set == 1] = 0
test_set[test_set == 2] = 0
test_set[test_set >= 3] = 1

In [9]:
training_set


   -1     1     1  ...     -1    -1    -1
    1    -1    -1  ...     -1    -1    -1
   -1    -1    -1  ...     -1    -1    -1
       ...          ⋱          ...       
    1    -1    -1  ...     -1    -1    -1
   -1    -1    -1  ...     -1    -1    -1
   -1     1    -1  ...     -1    -1    -1
[torch.FloatTensor of size 943x1682]

In [10]:
class RBM():
    def __init__(self,nv,nh):
        self.W=torch.randn(nh,nv)
        self.a=torch.randn(1,nh)
        self.b=torch.randn(1,nv)
    def sample_h(self,x):
        wx=torch.mm(x,self.W.t())
        activation=wx+self.a.expand_as(wx)
        p_h_given_v=torch.sigmoid(activation)
        return p_h_given_v,torch.bernoulli(p_h_given_v)
    def sample_v(self,y):
        wy=torch.mm(y,self.W)
        activation=wy+self.b.expand_as(wy)
        p_v_given_h=torch.sigmoid(activation)
        return p_v_given_h,torch.bernoulli(p_v_given_h)
    def train(self,v0,vk,ph0,phk):
        self.W += torch.mm(v0.t(),ph0)-torch.mm(vk.t(),phk)
        self.b +=torch.sum((v0-vk),0)
        self.a +=torch.sum((ph0-phk),0)
        

In [11]:
nv=len(training_set[0])
nh=100
batch_size=100
rbm=RBM(nv,nh)

In [12]:
nb_epoch=20
for i in range(1,nb_epoch+1):
    train_loss=0
    s=0.
    for id_user in range(0,n_users-batch_size,batch_size):
        vk=training_set[id_user:id_user+batch_size]
        v0=training_set[id_user:id_user+batch_size]
        ph0,_=rbm.sample_h(v0)
        for k in range(10):
            _,hk=rbm.sample_h(vk)
            _,vk=rbm.sample_v(hk)
            vk[v0<0]=v0[v0<0]
        phk,_=rbm.sample_h(vk)
        rbm.train(v0,vk,ph0,phk)
        train_loss += torch.mean(torch.abs(v0[v0>=0]-vk[v0>=0]))
        s+=1.
    print('epoch: '+str(i)+' loss: '+str(train_loss/s))

  return self.add_(other)


epoch: 1 loss: 0.29363373390611586
epoch: 2 loss: 0.25265674841529395
epoch: 3 loss: 0.25227124937115764
epoch: 4 loss: 0.24794800862519312
epoch: 5 loss: 0.252418302806215
epoch: 6 loss: 0.2521652300475114
epoch: 7 loss: 0.25093897356170364
epoch: 8 loss: 0.25256888971621894
epoch: 9 loss: 0.25105839630014837
epoch: 10 loss: 0.24577594369241984
epoch: 11 loss: 0.2525581192093916
epoch: 12 loss: 0.24788126048751424
epoch: 13 loss: 0.25287338720174407
epoch: 14 loss: 0.24963747846366946
epoch: 15 loss: 0.2478598528378322
epoch: 16 loss: 0.2504462311523847
epoch: 17 loss: 0.2505656310228275
epoch: 18 loss: 0.24990466944953896
epoch: 19 loss: 0.25053833351682653
epoch: 20 loss: 0.2501061495909018


In [15]:
test_loss=0
s=0.
for id_user in range(n_users):
        v=training_set[id_user:id_user+1]
        vt=test_set[id_user:id_user+1]
        ph0,_=rbm.sample_h(v0)
        if len(vt[vt>=0]) > 0:
            _,h = rbm.sample_h(v)
            _,v = rbm.sample_v(h)
            test_loss += torch.mean(torch.abs(vt[vt>=0]-v[vt>=0]))
            s+=1.
print('loss: '+str(test_loss/s))

loss: 0.23663561979518508
