**Dataset**: [MovieLens Dataset](http://grouplens.org/datasets/movielens/) from grouplens.org.

**Task**: To build a recommender system using [Restricted Boltzman Machines (RBMs)](http://cms.dm.uba.ar/academico/materias/1ercuat2018/probabilidades_y_estadistica_C/5a89b5075af5cbef5becaf419457cdd77cc9.pdf).

In [1]:
## package imports

import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.parallel
import torch.optim as optim
import torch.utils.data
from torch.autograd import Variable

In [2]:
## data import

train = pd.read_csv('ml-100k/u1.base', sep='\t', header=None)
test = pd.read_csv('ml-100k/u1.test', sep='\t', header=None)

In [3]:
train.head()

Unnamed: 0,0,1,2,3
0,1,1,5,874965758
1,1,2,3,876893171
2,1,3,4,878542960
3,1,4,3,876893119
4,1,5,3,889751712


In [4]:
test.head()

Unnamed: 0,0,1,2,3
0,1,6,5,887431973
1,1,10,3,875693118
2,1,12,5,878542960
3,1,14,5,874965706
4,1,17,3,875073198


**Important**: Columns '0', '1', '2', and '3', represent users, movies, ratings, and timestamps respectively.

In [5]:
## converting train and test sets into arrays

train = np.array(train, dtype=int)
test = np.array(test, dtype=int)

In [6]:
## getting number of users and movies

num_users = int(max(max(train[:,0]), max(test[:,0])))
num_movies = int(max(max(train[:,1]), max(test[:,1])))

In [7]:
num_users

943

In [8]:
num_movies

1682

In [9]:
## coverting datasets into a list of lists of dimension (num_users, num_movies) for pytorch tensor compatibility

def convert(data): ## function to convert
    
    final_list = list() ## initialization
    
    for user_id in range(1, num_users + 1):
        
        user_movies = data[:,1][data[:,0]==user_id] ## all movies rated by user
        user_ratings = data[:,2][data[:,0]==user_id] ## all ratings by user
        
        user_all_movies = np.zeros(num_movies) ## user ratings for each movie. 0 for urated morvies.
        user_all_movies[user_movies-1] = user_ratings
            
        final_list.append(user_all_movies)
    
    return final_list

train, test = convert(train), convert(test) ## data format conversion

In [10]:
train[0]

array([5., 3., 4., ..., 0., 0., 0.])

In [11]:
test[0]

array([0., 0., 0., ..., 0., 0., 0.])

In [12]:
## converting into pytorch tensors of dimensions (num_users, num_movies)

train = torch.FloatTensor(train)
test = torch.FloatTensor(test)

  This is separate from the ipykernel package so we can avoid doing imports until


In [13]:
train

tensor([[5., 3., 4.,  ..., 0., 0., 0.],
        [4., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        ...,
        [5., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 5., 0.,  ..., 0., 0., 0.]])

In [14]:
test

tensor([[0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        ...,
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.]])

In [15]:
train.shape

torch.Size([943, 1682])

In [16]:
test.shape

torch.Size([943, 1682])

In [17]:
 ## converting into binary ratings

## train set

train[train == 0] = -1 ## -1 for unrated
train[train == 1] = 0
train[train == 2] = 0
train[train >= 3] = 1

## test set

test[test == 0] = -1 ## -1 for unrated
test[test == 1] = 0
test[test == 2] = 0
test[test >= 3] = 1

In [18]:
train

tensor([[ 1.,  1.,  1.,  ..., -1., -1., -1.],
        [ 1., -1., -1.,  ..., -1., -1., -1.],
        [-1., -1., -1.,  ..., -1., -1., -1.],
        ...,
        [ 1., -1., -1.,  ..., -1., -1., -1.],
        [-1., -1., -1.,  ..., -1., -1., -1.],
        [-1.,  1., -1.,  ..., -1., -1., -1.]])

In [19]:
test

tensor([[-1., -1., -1.,  ..., -1., -1., -1.],
        [-1., -1., -1.,  ..., -1., -1., -1.],
        [-1., -1., -1.,  ..., -1., -1., -1.],
        ...,
        [-1., -1., -1.,  ..., -1., -1., -1.],
        [-1., -1., -1.,  ..., -1., -1., -1.],
        [-1., -1., -1.,  ..., -1., -1., -1.]])

In [20]:
## creating model architecture

class RBM():
    
    def __init__(self, nh, nv): ## nh-->no. of hidden nodes, nv-->no. of visible nodes 
        
        self.W = torch.randn((nh, nv)) ## W--> weights, parameteres for prob. of visible nodes given hidden nodes
        
        self.Bhv = torch.randn((1, nh)) ##Bhv--> bias for prob. of hidden nodes given visible node
        
        self.Bvh = torch.randn(1, nv) ##Bvh--> bias for prob. of visible nodes given hidden node
        
        ## initialized all weights and biases into tensors of reqd. dimensions based on normal dist. of mean=0 and variance=1
        
    ## sampling of hidden nodes given visible nodes
    
    def sample_h(self, x): ## x-->visible neurons
        
        wx = torch.mm(x, self.W.t()) ## matrix multiplication of visible nodes with transpose of weights
        
        activate = wx + self.Bhv.expand_as(wx) ## wx + bias(hidden given visible), .expand_as ensure bias is added to all values
        
        Phv = torch.sigmoid(activate) ## prob. that hidden node fires up for specified visible node for each node
        
        hv = torch.bernoulli(Phv) ## Phv-->binary, 1 for activate hidden neuron and 0 for don't activate hidden neuron
        
        return Phv, hv
    
    ## sampling for visible nodes given visible nodes
    
    def sample_v(self, y): ## y-->hidden neurons
        
        wy = torch.mm(y, self.W) ## matrix multiplication of hidden nodes with weights
        
        activate = wy + self.Bvh.expand_as(wy) ## wx + bias(hidden given visible), .expand_as ensure bias is added to all values
        
        Pvh = torch.sigmoid(activate) ## prob. that hidden node fires up for specified visible node for each node
        
        vh = torch.bernoulli(Pvh) ## Phv-->binary, 1 for activate hidden neuron and 0 for don't activate hidden neuron
        
        return Pvh, vh
    
    ## function to train model with Contrastive Divergence 1 (Gibbs Sampling)
    
    ## v0-->visible nodes initially, vk --> visible nodes after k steps, ph0, phk--> prob. of hidden nodes with v0 and vk resp.
    
    def train(self, v0, vk, ph0, phk): 
        
        self.W += (torch.mm(v0.t(), ph0) - torch.mm(vk.t(), phk)).t() ## weight update
        
        ## bias update
        
        self.Bvh += torch.sum((v0 - vk), 0)
        
        self.Bhv += torch.sum((ph0 - phk), 0)

In [21]:
## instantiating the RBM class

nv = len(train[0])
nh = 150 ## detect 50 features for a start
batch_size = 20 ## for faster training

rbm = RBM(nh=nh, nv=nv)

type(rbm)

__main__.RBM

In [22]:
num_epochs = 100

counter = 0.

for epoch in range(1, num_epochs+1):
    
    for user_id in range(0, num_users-batch_size, batch_size):
        
        train_loss = 0
        
        vk = train[user_id: user_id+100] ## changes
        
        v0 = train[user_id: user_id+100] ## never changes
        
        ph0,_ = rbm.sample_h(x=v0) ## prob. of hidden nodes being activated given visible nodes at 0
        
        ## k-steps constrastive divergence
        
        for k in range(10):
            
            _,hk = rbm.sample_h(x=vk)
            
            _,vk = rbm.sample_v(y=hk)
            
            vk[v0<0] = v0[v0<0] ## maintain all -1 ratings
            
        phk,_ = rbm.sample_h(x=vk) ## prob. of hidden nodes being activated given visible nodes at k
        
        rbm.train(v0=v0, vk=vk, ph0=ph0, phk=phk)
        
        train_loss += torch.mean(torch.abs(v0[v0 >= 0] - vk[v0 >= 0]))
        
    counter += 1.
        
    print(f"epoch: {counter:<{7}} train_loss: {train_loss}")

epoch: 1.0     train_loss: 0.16156382858753204
epoch: 2.0     train_loss: 0.16344794631004333
epoch: 3.0     train_loss: 0.1431935876607895
epoch: 4.0     train_loss: 0.16015073657035828
epoch: 5.0     train_loss: 0.16109278798103333
epoch: 6.0     train_loss: 0.15308526158332825
epoch: 7.0     train_loss: 0.1756947785615921
epoch: 8.0     train_loss: 0.1648610383272171
epoch: 9.0     train_loss: 0.16580310463905334
epoch: 10.0    train_loss: 0.17004239559173584
epoch: 11.0    train_loss: 0.16344794631004333
epoch: 12.0    train_loss: 0.16203485429286957
epoch: 13.0    train_loss: 0.16344794631004333
epoch: 14.0    train_loss: 0.15873762965202332
epoch: 15.0    train_loss: 0.16344794631004333
epoch: 16.0    train_loss: 0.17522373795509338
epoch: 17.0    train_loss: 0.1714554876089096
epoch: 18.0    train_loss: 0.17239755392074585
epoch: 19.0    train_loss: 0.15920867025852203
epoch: 20.0    train_loss: 0.1540273129940033
epoch: 21.0    train_loss: 0.17286857962608337
epoch: 22.0    tra

In [23]:
test_loss_list = list() ## list to collect loss

counter = 0.

for user_id in range(num_users):
        
        test_loss = 0
        
        v = train[user_id: user_id+1] ## VERY IMPORTANT --> use the weights to convert train data into value
        
        vt = test[user_id: user_id+1] ## real ratings for comparison
        
        ## k-steps constrastive divergence
        
        ## blind walk after training with random walk
        
        if len(vt[vt>=0]) > 0:
            
            _,h = rbm.sample_h(x=v)
            
            _,v = rbm.sample_v(y=h)
        
        test_loss = torch.mean(torch.abs(vt[vt >= 0] - v[vt >= 0]))
        
        
        test_loss_list.append(float(test_loss))
        
        counter += 1.

test_loss_list = [i for i in test_loss_list if i>= 0] ## dealing with NaN losses

print(f"test_loss: {np.mean(test_loss_list)}")

test_loss: 0.21222034768730985
