# Import the libs

In [12]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.parallel
import torch.optim as optim
import torch.utils.data
from torch.autograd import Variable

# Import the dataset

In [13]:
# We won't be using this dataset.

In [14]:
# Some of the movies contains title and the columns seperated by ':' therefor sep='::'
# Header=None, there are no column names
# engine = python, to load the data correctly
# encoding = 'latin-1', some of the movie titles contain special characters
movies = pd.read_csv('data/ml-1m/movies.dat', sep = '::', header = None, engine = 'python', encoding = 'latin-1')
movies

Unnamed: 0,0,1,2
0,1,Toy Story (1995),Animation|Children's|Comedy
1,2,Jumanji (1995),Adventure|Children's|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama
4,5,Father of the Bride Part II (1995),Comedy
...,...,...,...
3878,3948,Meet the Parents (2000),Comedy
3879,3949,Requiem for a Dream (2000),Drama
3880,3950,Tigerland (2000),Drama
3881,3951,Two Family House (2000),Drama


In [15]:
#First column -> UserID
#Second column -> Gender
#Third column -> Age
#Fourth column -> Rated Movie Number
#Fifth column -> zipcode
users = pd.read_csv('data/ml-1m/users.dat', sep = '::', header = None, engine = 'python', encoding = 'latin-1')
users

Unnamed: 0,0,1,2,3,4
0,1,F,1,10,48067
1,2,M,56,16,70072
2,3,M,25,15,55117
3,4,M,45,7,02460
4,5,M,25,20,55455
...,...,...,...,...,...
6035,6036,F,25,15,32603
6036,6037,F,45,1,76006
6037,6038,F,56,1,14706
6038,6039,F,45,0,01060


In [16]:
# First column -> User IDs
# Second column -> Movie IDs
# Third column -> Rating in 1-5 stars
# Fourth column -> Timestamps
ratings = pd.read_csv('data/ml-1m/ratings.dat', sep = '::', header = None, engine = 'python', encoding = 'latin-1')
ratings

Unnamed: 0,0,1,2,3
0,1,1193,5,978300760
1,1,661,3,978302109
2,1,914,3,978301968
3,1,3408,4,978300275
4,1,2355,5,978824291
...,...,...,...,...
1000204,6040,1091,1,956716541
1000205,6040,1094,5,956704887
1000206,6040,562,5,956704746
1000207,6040,1096,4,956715648


# Prepare the training & test set

In [17]:
training_set = pd.read_csv('data/ml-100k/u1.base', delimiter = '\t')
# %80train - %20test
# First column -> Users
# Second column -> Movies
# Third column -> Ratings
# Fourth column -> Time stamps
training_set = np.array(training_set, dtype = 'int')
# We need arrays not dataframes

In [18]:
test_set = pd.read_csv('data/ml-100k/u1.test', delimiter = '\t')
test_set = np.array(test_set, dtype = 'int')

In [19]:
# Create two matrices
# Same number of movies, users, columns, lines
# Lines -> User IDs
# Columns -> Movie IDS
# Cells -> Ratings(if no rating, put zero)

# Get the number of users and movies

In [20]:
nb_users = int(max(max(training_set[:, 0], ), max(test_set[:, 0])))
nb_users

943

In [21]:
nb_movies = int(max(max(training_set[:, 1], ), max(test_set[:, 1])))
nb_movies

1682

# Converting data

In [22]:
def convert(data):
  new_data = []
  for id_users in range(1, nb_users + 1):
    id_movies = data[:, 1] [data[:, 0] == id_users]
    id_ratings = data[:, 2] [data[:, 0] == id_users]
    ratings = np.zeros(nb_movies)
    ratings[id_movies - 1] = id_ratings # id_movies -1 cause MovieIDs start from 2
    new_data.append(list(ratings))
  return new_data
training_set = convert(training_set)
test_set = convert(test_set)

# Conver data into torch tensors

In [23]:
training_set = torch.FloatTensor(training_set)
test_set = torch.FloatTensor(test_set)

In [24]:
training_set.shape

torch.Size([943, 1682])

In [25]:
test_set.shape

torch.Size([943, 1682])

# Ratings into binary ratings

In [26]:
training_set[training_set == 0] = -1
training_set[training_set == 1] = 0
training_set[training_set == 2] = 0
training_set[training_set >= 3] = 1
test_set[test_set == 0] = -1
test_set[test_set == 1] = 0
test_set[test_set == 2] = 0
test_set[test_set >= 3] = 1

# Creating the architecture of NN

In [27]:
# First function to make is the __init__, it happens in every class
# We need to start with init
# Init function to define parameters of the object that will be created once the class is made
class RBM():
  # self -> correspond to the obj. that will be created afterwords
  # and variables of obj.
  # nv -> number of visible nodes
  # nh -> number of hidden nodes
  def __init__(self, nv, nh):
    # weights and biases will be optimized
    # nh -> a = 100
    # nv -> b = 1682
    self.W = torch.randn(nh, nv) # init a tensor size of nh and nv -> axb = 100x1682
    
    # we had 2 biases
    # first dimension is the batch, second dimension is the bias
    self.a = torch.randn(1, nh) # bias for hidden, 1xa = 1x100
    self.b = torch.randn(1, nv) # bias for visible 1xb = 1x1682
      
  # given visible nodes, compute probability of hidden nodes
  # after the probability sample the activations of the hidden nodes
  # example: if the prob is >= 0.7 , make it 1(Liked)
  # given visible nodes, x -> visible neurons
  def sample_h(self, x):
    # x -> axb = 100x1682
    # W -> axb = 100x1682
    # W.t() -> bxa = 1682x100
    # x*W.t() -> 100x100
    wx = torch.mm(x, self.W.t()) # -> axa = 100x100
    
    # add the bias for hidden and be ready for activation
    # make sure that the bias is applied to each line of the mini batch -> expand wx
    # expand_as -> expand the self.a(1x100) so that it becomes the same dimension with wx (100x100)
    activation = wx + self.a.expand_as(wx)
    p_h_given_v = torch.sigmoid(activation)
    
    # we're making bernoulli RBM -> yes or no / 1 or 0
    # return -> probability = p_h_given_v
            #-> bernoulli values = torch.bernoulli(p_h_given_v)
    return p_h_given_v, torch.bernoulli(p_h_given_v)
  def sample_v(self, y):
    # y -> axa = 100x100
    # W -> axb = 100x1682
    # we can direct multiply -> 100x100 * 100x1682 -> 100x1682
    # don't need transpose
    wy = torch.mm(y, self.W) # 100x1682
    activation = wy + self.b.expand_as(wy)
    p_v_given_h = torch.sigmoid(activation)
    return p_v_given_h, torch.bernoulli(p_v_given_h)
  
  # v0 is the input vector contains the ratings of all the movies by one user, later we'll loop all users
  # vk -> visible nodes after k samplings
  # ph0 -> P(h(0)|v(0))
  # phk -> P(h(k)|v(k))
  def train(self, v0, vk, ph0, phk):
    #ph0 and phk -> 100x100
    #v0 and vk -> 100x1682
    # 1682x100, at the beginning self.W was 100x1682, that's why we take .t()
    self.W += (torch.mm(v0.t(), ph0) - torch.mm(vk.t(), phk)).t()
    self.b += torch.sum((v0 - vk), 0)
    self.a += torch.sum((ph0 - phk), 0)

In [28]:
nv = len(training_set[0]) # 1682 visible nodes

#basically the number of features we want to detect
nh = 100 #correspond to some features that are going to be detected by the RBM model

# number of users by 1 iteration/epoch
# it can be optimized
batch_size = 100
rbm = RBM(nv, nh)

# Training the RBM

In [29]:
nb_epoch = 10
for epoch in range(1, nb_epoch + 1):
  train_loss = 0
  s = 0.
  # As default, for loop has the step size 1, but we put batch_size
  # first iteration id_user = 0, second -> 100, third -> 200, ...
  for id_user in range(0, nb_users - batch_size, batch_size):
      #input-> ratings of all the movies for the specific id_user
      vk = training_set[id_user:id_user+batch_size] # 100x1682 fix
      v0 = training_set[id_user:id_user+batch_size] # 100x1682 fix
      
      # get init probabilities
      # to only get first element use x,_ =
      # p(h0) given v(0)
      ph0,_ = rbm.sample_h(v0) # 100x100
      for k in range(10):
          # update the visible and hidden nodes
          # v0 is our fixed/init variable, don't touch it
          _,hk = rbm.sample_h(vk) # 100x100
          _,vk = rbm.sample_v(hk) # 100x1682
          
          # we changed the non-rated movies too, therefore we need to put their place the value -1 again
          # put -1, where there is no rating, freeze these visible nodes
          # use untouched variable v0
          vk[v0<0] = v0[v0<0]
          
      # get the phk based on the trained vk,hk
      phk,_ = rbm.sample_h(vk) # 100x100
      
      # update the weights and biases
      rbm.train(v0, vk, ph0, phk) # 100x1682, 100x1682, 100x100, 100x100
      
      # update the train_loss through calculated weights
      train_loss += torch.mean(torch.abs(v0[v0>=0] - vk[v0>=0]))
      #train_loss += np.sqrt(torch.mean( (v0[v0>=0] - vk[v0>=0])**2 )) -> RMSE
      s += 1.
  print('epoch: ' + str(epoch) + ' loss: ' + str(train_loss/s))

epoch: 1 loss: tensor(0.3368)
epoch: 2 loss: tensor(0.2511)
epoch: 3 loss: tensor(0.2457)
epoch: 4 loss: tensor(0.2475)
epoch: 5 loss: tensor(0.2508)
epoch: 6 loss: tensor(0.2457)
epoch: 7 loss: tensor(0.2516)
epoch: 8 loss: tensor(0.2486)
epoch: 9 loss: tensor(0.2461)
epoch: 10 loss: tensor(0.2518)


In [30]:
vk.shape

torch.Size([100, 1682])

In [31]:
v0.shape

torch.Size([100, 1682])

In [32]:
hk.shape

torch.Size([100, 100])

In [33]:
phk.shape

torch.Size([100, 100])

In [34]:
ph0.shape

torch.Size([100, 100])

# Testing the RBM

In [35]:
test_loss = 0
s = 0.
for id_user in range(nb_users):
    v = training_set[id_user:id_user+1] #1x1682
    vt = test_set[id_user:id_user+1] # 1x1682
    if len(vt[vt>=0]) >0:
        _,h = rbm.sample_h(v) #1x100
        _,v = rbm.sample_v(h) # 1x1682
        test_loss += torch.mean(torch.abs(vt[vt>=0] - v[vt>=0]))
        #test_loss += np.sqrt(torch.mean( (vt[vt>=0] - v[vt>=0])**2 )) -> RMSE
        s += 1.
print(' test_loss: ' + str(test_loss/s))

 test_loss: tensor(0.2305)
