# MOVIE RECOMMENDATION SYSTEM USING USING BOLZMANN MACHINE

## MOVIE_LENS DATASYSTEM USING RESTRICTED BOLTZMANN MACHINE

### IMPORTING THE LIBRARIES

In [2]:
import numpy as np
import pandas as pd

### Importing the dataset

In [4]:
movies = pd.read_csv('ml-1m/movies.dat', sep = '::', header = None, engine = 'python', encoding = 'latin-1')
users = pd.read_csv('ml-1m/users.dat', sep = '::', header = None, engine = 'python', encoding = 'latin-1')
ratings = pd.read_csv('ml-1m/ratings.dat', sep = '::', header = None, engine = 'python', encoding = 'latin-1')


### Preparing the training set and the test set

In [6]:
training_set=pd.read_csv('ml-100k/u1.base',delimiter='\t')

In [7]:
test_set=pd.read_csv('ml-100k/u1.test',delimiter='\t')

In [8]:
training_set=np.array(training_set)
test_set=np.array(test_set)

In [9]:
num_of_users=int(max(max(training_set[:,0]),max(test_set[:,0])))

In [10]:
num_of_movies=int(max(max(training_set[:,1]),max(test_set[:,1])))

In [11]:
num_of_movies

1682

In [12]:
num_of_users

943

### Converting the data into an array with users in lines and movies in columns

In [14]:
new_data=[]
def convert(data):
    for id_user in range(0,num_of_users+1):
        id_movies=data[:,1][data[:,0]==id_user]
        id_ratings=data[:,2][data[:,0]==id_user]
        ratings=np.zeros(num_of_movies)
        ratings[id_movies-1]=id_ratings
        new_data.append(list(ratings))
        
    return new_data
    
    

In [15]:
training_set=convert(training_set)
test_set=convert(test_set)

### Importing Pytorch Libraries

In [16]:
import torch
import torch.nn as nn
import torch.nn.parallel
import torch.optim as optim
import torch.utils.data
from torch.autograd import Variable

### Converting the data into Torch tensors

In [18]:
training_set = torch.FloatTensor(training_set)
test_set = torch.FloatTensor(test_set)


In [19]:
training_set[training_set==0]=-1
training_set[training_set==1]=0
training_set[training_set==2]=0
training_set[training_set>=3]=1


In [20]:
test_set[test_set == 0] = -1
test_set[test_set == 1] = 0
test_set[test_set == 2] = 0
test_set[test_set >= 3] = 1


### Creating the architecture of the Neural Network

In [22]:
class RBM():
    def __init__(self, nv, nh):
        self.W = torch.randn(nh, nv)
        self.a = torch.randn(1, nh)
        self.b = torch.randn(1, nv)
    def sample_h(self, x):
        wx=torch.mm(x,self.W.t())
        activation = wx + self.a.expand_as(wx)
        p_h_given_v = torch.sigmoid(activation)
        return p_h_given_v, torch.bernoulli(p_h_given_v)
    def sample_v(self, y):
        wy = torch.mm(y, self.W)
        activation = wy + self.b.expand_as(wy)
        p_v_given_h = torch.sigmoid(activation)
        return p_v_given_h, torch.bernoulli(p_v_given_h)
    def train(self, v0, vk, ph0, phk):
        self.W += (torch.mm(v0.t(), ph0) - torch.mm(vk.t(), phk)).t()
        self.b += torch.sum((v0 - vk), 0)
        self.a += torch.sum((ph0 - phk), 0)


In [23]:
nv=len(training_set[0])
nh=100

rbm=RBM(nv,nh)

### Training the RBM

In [25]:
nb_epochs=10
batch_size=100
for epoch in range(0,nb_epochs):
    train_loss=0
    s=0
    for i in range(0,num_of_users-batch_size,batch_size):
        input_=training_set[i:i+batch_size]
        v0=training_set[i:i+batch_size]
        ph0,_=rbm.sample_h(v0)
        for k in range(10):
            _,hk=rbm.sample_h(input_)
            _,vk=rbm.sample_v(hk)
            vk[v0<0]=v0[v0<0]
        phk,_=rbm.sample_h(vk)
       
        
        rbm.train(v0,vk,ph0,phk)
        train_loss+=torch.mean(torch.abs(vk[v0>0]-v0[v0>0]))
        s+=1
        
    print('epoch '+str(epoch)+' '+ 'train_loss '+str(train_loss/s))
        
        
            
            

epoch 0 train_loss tensor(0.2756)
epoch 1 train_loss tensor(0.1360)
epoch 2 train_loss tensor(0.1500)
epoch 3 train_loss tensor(0.1426)
epoch 4 train_loss tensor(0.1532)
epoch 5 train_loss tensor(0.1472)
epoch 6 train_loss tensor(0.1468)
epoch 7 train_loss tensor(0.1463)
epoch 8 train_loss tensor(0.1477)
epoch 9 train_loss tensor(0.1497)


### Testing the RBM

In [27]:
test_loss=0
s=0
for l in range(num_of_users):
    input_ = training_set[l:l+1]
    target = test_set[l:l+1]
    if len(target[target>=0])>0:
        
        _,h = rbm.sample_h(input_)
        _,v = rbm.sample_v(h)
        test_loss += torch.mean(torch.abs(target[target>=0]-v[target>=0]))
        s+=1
print('test loss: '+str(test_loss/s))
    


test loss: tensor(0.2589)
