### **Downloading the Dataset**

#### ML-100K

In [1]:
  !wget "http://files.grouplens.org/datasets/movielens/ml-100k.zip"
  !unzip ml-100k.zip
  !ls

--2021-01-24 01:58:55--  http://files.grouplens.org/datasets/movielens/ml-100k.zip
Resolving files.grouplens.org (files.grouplens.org)... 128.101.65.152
Connecting to files.grouplens.org (files.grouplens.org)|128.101.65.152|:80... connected.
HTTP request sent, awaiting response... 200 OK
Length: 4924029 (4.7M) [application/zip]
Saving to: ‘ml-100k.zip’


2021-01-24 01:58:56 (16.3 MB/s) - ‘ml-100k.zip’ saved [4924029/4924029]

Archive:  ml-100k.zip
   creating: ml-100k/
  inflating: ml-100k/allbut.pl       
  inflating: ml-100k/mku.sh          
  inflating: ml-100k/README          
  inflating: ml-100k/u.data          
  inflating: ml-100k/u.genre         
  inflating: ml-100k/u.info          
  inflating: ml-100k/u.item          
  inflating: ml-100k/u.occupation    
  inflating: ml-100k/u.user          
  inflating: ml-100k/u1.base         
  inflating: ml-100k/u1.test         
  inflating: ml-100k/u2.base         
  inflating: ml-100k/u2.test         
  inflating: ml-100k/u3.base    

#### ML-1M

In [2]:
!wget "http://files.grouplens.org/datasets/movielens/ml-1m.zip"
!unzip ml-1m.zip
!ls

--2021-01-24 01:59:32--  http://files.grouplens.org/datasets/movielens/ml-1m.zip
Resolving files.grouplens.org (files.grouplens.org)... 128.101.65.152
Connecting to files.grouplens.org (files.grouplens.org)|128.101.65.152|:80... connected.
HTTP request sent, awaiting response... 200 OK
Length: 5917549 (5.6M) [application/zip]
Saving to: ‘ml-1m.zip’


2021-01-24 01:59:32 (19.1 MB/s) - ‘ml-1m.zip’ saved [5917549/5917549]

Archive:  ml-1m.zip
   creating: ml-1m/
  inflating: ml-1m/movies.dat        
  inflating: ml-1m/ratings.dat       
  inflating: ml-1m/README            
  inflating: ml-1m/users.dat         
__notebook_source__.ipynb  ml-100k  ml-100k.zip  ml-1m	ml-1m.zip


### **Importing Libraries**

In [3]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.parallel
import torch.optim as optim
import torch.utils.data
from torch.autograd import Variable

### Importing Dataset

In [4]:
# We won't be using this dataset.
movies = pd.read_csv('ml-1m/movies.dat', sep = '::', header = None, engine = 'python', encoding = 'latin-1')
users = pd.read_csv('ml-1m/users.dat', sep = '::', header = None, engine = 'python', encoding = 'latin-1')
ratings = pd.read_csv('ml-1m/ratings.dat', sep = '::', header = None, engine = 'python', encoding = 'latin-1')

### Preparing the training set and test set

In [5]:
# In this dataset folder, there are 5 different train-test set pairs for k-fold cv // xx.base is the training set and xx.test is the test set
# But we are just using one train-test set pair

training_set = pd.read_csv('ml-100k/u1.base', delimiter = '\t')
training_set = np.array(training_set, dtype = 'int') #pytorch can also work with arrays so transfer data to numpy array

test_set = pd.read_csv('ml-100k/u1.test', delimiter = '\t')
test_set = np.array(test_set, dtype = 'int') #pytorch can also work with arrays so transfer data to numpy array

### Getting the number of users and movies

In [8]:
# The maximum user_id or movie_id can be either in training_set or test_set so have to do max using both

nb_users = int(max(max(training_set[:, 0]), max(test_set[:, 0]))) # index0 col = 1st column = user_id column
nb_movies = int(max(max(training_set[:, 1]), max(test_set[:, 1]))) # index1 col = 2nd column = movid_id column

### Converting the data into an array with "users in lines" and "movies in columns"

In [11]:
# For recommendation system, we need to struture the data in a particular manner
# each movie has to correspond to each column/variable and each observation/line corresponds to each user(1st line = user1, 2nd line = user 2 ....)

# Creating list of lists = what torch expects
def convert(data):
    new_data = []
    for id_users in range(1, nb_users + 1):
        id_movies = data[:, 1] [data[:, 0] == id_users] # all the movie IDs each user watched
        id_ratings = data[:, 2] [data[:, 0] == id_users] # all the ratings associated with those movies in the previous line
        ratings = np.zeros(nb_movies)
        ratings[id_movies - 1] = id_ratings
        new_data.append(list(ratings))
    return new_data

training_set = convert(training_set)
test_set = convert(test_set)

### Converting the data into Torch Tensors

In [12]:
# torch = multi dimensional matrix which is way more efficient than numpy arrays for most deep learning operations

training_set = torch.FloatTensor(training_set)
test_set = torch.FloatTensor(test_set)

### Converting the ratings into binary ratings (1 liked) or 0 (not liked)

In [15]:
training_set[training_set == 0] = -1
training_set[training_set == 1] = 0
training_set[training_set == 2] = 0
training_set[training_set >= 3] = 1

test_set[test_set == 0] = -1
test_set[test_set == 1] = 0
test_set[test_set == 2] = 0
test_set[test_set >= 3] = 1

### Creating Architecture of NN

In [16]:
class RBM():
    
    ## Initializing Weights
    def __init__(self, nv, nh): #nv: number of visible nodes ; nh: number of hidden nodes
        self.W = torch.randn(nh, nv) # Returns a tensor filled with random numbers from a standard normal distribution(mean 0 and variance 1) with shape=(nh,nv)
        self.a = torch.randn(1, nh) # "1" is intentional to maintain 2 dimensions ( as opposed to just .randn(nh) )
        self.b = torch.randn(1, nv)
        
    def sample_h(self, x): # b=Wx+a; a: bias of hidden nodes; b: bias of visible nodes
        wx = torch.mm(x, self.W.t()) #torch.mm : product of two tensors; t(): transpose
        activation = wx + self.a.expand_as(wx) #.expand_as : expand dimension; bias is applied to each line of the mini batch
        p_h_given_v = torch.sigmoid(activation) # p(h|v) = sigmoid activation function ==> applied to Wx+a
        return p_h_given_v, torch.bernoulli(p_h_given_v) #bernoulli because it's binary problem(like or dislike movie) 
        #random sampling between 0~1 bernoulli distribution and if that number is below p_v_given_h, activate neuron(1s), not activate(0s) neuron otherwise
    
    def sample_v(self, y):
        wy = torch.mm(y, self.W)
        activation = wy + self.b.expand_as(wy)
        p_v_given_h = torch.sigmoid(activation) # p(v|h)
        return p_v_given_h, torch.bernoulli(p_v_given_h) 
    
    def train(self, v0, vk, ph0, phk): #contrastive divergence
        self.W += (torch.mm(v0.t(), ph0) - torch.mm(vk.t(), phk)).t()
        self.b += torch.sum((v0 - vk), 0)
        self.a += torch.sum((ph0 - phk), 0)

In [17]:
nv = len(training_set[0]) #number of features/variables of training_set (in this case, number of movies)
nh = 100 # number of hidden nodes (selected by us; in this case number of different features of various movies)
batch_size = 100
rbm = RBM(nv, nh)

### Training RBM

In [18]:
nb_epoch = 10

for epoch in range(1, nb_epoch + 1):
    train_loss = 0
    s = 0.
    for id_user in range(0, nb_users - batch_size, batch_size): #id_user of first user in each batch
        vk = training_set[id_user : id_user + batch_size] #first user ~ last user in each batch
        v0 = training_set[id_user : id_user + batch_size]
        ph0,_ = rbm.sample_h(v0) # ,_: if you want to return only the first element of the result
        for k in range(10): #k steps of contrastive divergence (random walk in gibbs sampling, MCMC technique)
            _,hk = rbm.sample_h(vk)
            _,vk = rbm.sample_v(hk) #update vk
            vk[v0<0] = v0[v0<0] #excluding rating-1 in gibbs sampling (which we assigned as -1 whenever a movie was not watched by user before)
            # so making sure training doesn't happen on movies that are unwatched and thus don't have available rating before

        phk,_ = rbm.sample_h(vk)
        rbm.train(v0, vk, ph0, phk)

        train_loss += torch.mean(torch.abs(v0[v0 >= 0] - vk[v0 >= 0])) 
        # Can use many metrics for evaluation like simple distance, RMSE etc. but for this RBM example, 
        # we will use "simple absolute distance" between predicted rating and actual rating
        s += 1.
    print('epoch: '+str(epoch)+' loss: '+str(train_loss/s))


epoch: 1 loss: tensor(0.3509)
epoch: 2 loss: tensor(0.2391)
epoch: 3 loss: tensor(0.2506)
epoch: 4 loss: tensor(0.2485)
epoch: 5 loss: tensor(0.2479)
epoch: 6 loss: tensor(0.2495)
epoch: 7 loss: tensor(0.2485)
epoch: 8 loss: tensor(0.2486)
epoch: 9 loss: tensor(0.2500)
epoch: 10 loss: tensor(0.2476)


In [20]:
# ############# Using RMSE as metric for training ############

# nb_epoch = 10
# for epoch in range(1, nb_epoch + 1):
#     train_loss = 0
#     s = 0.
#     for id_user in range(0, nb_users - batch_size, batch_size):
#         vk = training_set[id_user:id_user+batch_size]
#         v0 = training_set[id_user:id_user+batch_size]
#         ph0,_ = rbm.sample_h(v0)
#         for k in range(10):
#             _,hk = rbm.sample_h(vk)
#             _,vk = rbm.sample_v(hk)
#             vk[v0<0] = v0[v0<0]
#         phk,_ = rbm.sample_h(vk)
#         rbm.train(v0, vk, ph0, phk)
#         train_loss += np.sqrt(torch.mean((v0[v0>=0] - vk[v0>=0])**2)) # RMSE here
#         s += 1.
#     print('epoch: '+str(epoch)+' loss: '+str(train_loss/s))

### Testing RBM

In [19]:
test_loss = 0
s = 0.
for id_user in range(nb_users):
    v = training_set[id_user:id_user+1]
    vt = test_set[id_user:id_user+1]
    if len(vt[vt>=0]) > 0:
        _,h = rbm.sample_h(v)
        _,v = rbm.sample_v(h)
        test_loss += torch.mean(torch.abs(vt[vt>=0] - v[vt>=0]))
        s += 1.
print('test loss: '+str(test_loss/s))

test loss: tensor(0.2434)


In [21]:
# ####### using RMSE for evaluating on test set #########3
# test_loss = 0
# s = 0.
# for id_user in range(nb_users):
#     v = training_set[id_user:id_user+1]
#     vt = test_set[id_user:id_user+1]
#     if len(vt[vt>=0]) > 0:
#         _,h = rbm.sample_h(v)
#         _,v = rbm.sample_v(h)
#         test_loss += np.sqrt(torch.mean((vt[vt>=0] - v[vt>=0])**2)) # RMSE here
#         s += 1.
# print('test loss: '+str(test_loss/s))