In [38]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.parallel
import torch.optim as optim
import torch.utils.data
from torch.autograd import Variable


In [39]:
movies=pd.read_csv("ml-1m/movies.dat",sep='::',header=None,engine='python',encoding='latin-1')
users=pd.read_csv("ml-1m/users.dat",sep='::',header=None,engine='python',encoding='latin-1')
ratings=pd.read_csv("ml-1m/ratings.dat",sep='::',header=None,engine='python',encoding='latin-1')


In [40]:
training_set = pd.read_csv('ml-100k/u1.base',delimiter='\t')
training_set=np.array(training_set,dtype='int')
test_set = pd.read_csv('ml-100k/u1.test',delimiter='\t')
test_set=np.array(test_set,dtype='int')


In [41]:
nb_users=int(max(max(training_set[:,0]), max(test_set[:,0])))
nb_movies=int(max(max(training_set[:,1]), max(test_set[:,1])))

In [42]:
#converting the data into an array with users in lines and movies in columns
def convert(data):
    new_data=[]
    for id_users in range(1,nb_users+1):
        id_movies=data[:,1][data[:,0]==id_users]
        id_ratings=data[:,2][data[:,0]==id_users]
        ratings = np.zeros(nb_movies)
        ratings[id_movies-1] = id_ratings
        new_data.append(list(ratings))
    return new_data


training_set=convert(training_set)
test_set=convert(test_set)

In [43]:
training_set=torch.FloatTensor(training_set)
test_set=torch.FloatTensor(test_set)

In [44]:
training_set[training_set==0]=-1
training_set[training_set==1 ]=0
training_set[training_set==2]=0
training_set[training_set>=3]=1



test_set[test_set==0]=-1
test_set[test_set==1]=0
test_set[test_set==2]=0
test_set[test_set>=3]=1



In [45]:
class RBM():
    def __init__(self,nv,nh):
        self.W=torch.randn(nv,nh) #weights
        self.a=torch.randn(1,nh) #biases for hidden nodes
        self.b=torch.rand(1,nv) #for visible nodes
    def sample_h(self,x): #x is the visible node and we apply to it to find out the hidden one value
        wx=torch.mm(x,self.W)
        activation=wx+self.a.expand_as(wx)
        p_h_given_v = torch.sigmoid(activation)
        return p_h_given_v, torch.bernoulli(p_h_given_v)
    def sample_v(self,y): #y is the hidden node
        wy=torch.mm(y,self.W.t())
        activation=wy+self.b.expand_as(wy)
        p_v_given_h = torch.sigmoid(activation)
        return p_v_given_h, torch.bernoulli(p_v_given_h)
    def train(self,v0,vk,ph0,phk):
        self.W+=torch.mm(v0.t(),ph0)-torch.mm(vk.t(),phk)
        self.b+=torch.sum((v0-vk),0)
        self.a+=torch.sum((ph0-phk),0)
        



    

In [46]:

nv=len(training_set[0])
print(nv)
nh=100
batch_size=100
rbm=RBM(nv,nh)

1682


In [None]:
nb_epoch=10
for epoch in range(1,nb_epoch+1):
    train_loss=0
    s=0.
    for id_user in range(0, nb_users-batch_size, batch_size):
        vk=training_set[id_user:id_user+batch_size]
        v0=training_set[id_user:id_user+batch_size]#intial nodes
        ph0,_=rbm.sample_h(v0)
        for k in range(10):
            _,hk=rbm.sample_h(vk)#k steps
            _,vk=rbm.sample_v(hk)
            vk[v0<0]=v0[v0<0]
        phk,_=rbm.sample_h(vk)
        rbm.train(v0,vk,ph0,phk)
        train_loss+=torch.mean(torch.abs(v0[v0>=0]-vk[v0>=0]))
        s+=1
    print("epoch: " + str(epoch)+' loss' +str(train_loss/s))
    




epoch: 1 losstensor(0.3537)
epoch: 2 losstensor(0.2474)
epoch: 3 losstensor(0.2484)
epoch: 4 losstensor(0.2483)
epoch: 5 losstensor(0.2466)
epoch: 6 losstensor(0.2437)
epoch: 7 losstensor(0.2473)
epoch: 8 losstensor(0.2487)
epoch: 9 losstensor(0.2464)
epoch: 10 losstensor(0.2457)


In [48]:
print("v0:", v0.shape)
print("ph0:", ph0.shape)
print("vk:", vk.shape)
print("phk:", phk.shape)


v0: torch.Size([100, 1682])
ph0: torch.Size([100, 100])
vk: torch.Size([100, 1682])
phk: torch.Size([100, 100])


In [50]:

test_loss=0
s=0.
for id_user in range( nb_users):
    v=training_set[id_user:id_user+1]
    vt=test_set[id_user:id_user+1]#intial nodes
    
    if len(vt[vt>=0])>0:
        _,h=rbm.sample_h(v)#1 step
        _,v=rbm.sample_v(h)
    
        test_loss+=torch.mean(torch.abs(vt[vt>=0]-v[vt>=0]))
        s+=1
print('test loss' +str(test_loss/s))
    




test losstensor(0.2570)


In [53]:
import numpy as np
u = np.random.choice([0,1], 100000)
v = np.random.choice([0,1], 100000)
u[:50000] = v[:50000]
print(sum(u==v)/float(len(u))) # -> you get 0.75
print(np.mean(np.abs(u-v))) # -> you get 0.25

0.74975
0.25025
