In [1]:
#Importing libraries
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.parallel
import torch.optim as optim
import torch.utils.data
from torch.autograd import Variable

In [2]:
#Importing Dataset
movies = pd.read_csv('ml-1m/movies.dat', sep= '::', engine= 'python', encoding= 'latin-1')
users = pd.read_csv('ml-1m/users.dat', sep= '::', engine= 'python', encoding= 'latin-1')
ratings = pd.read_csv('ml-1m/ratings.dat', sep= '::', engine= 'python', encoding= 'latin-1')

In [3]:
#Preparing the trainning set and test set
training_set = pd.read_csv('ml-100k/u1.base', delimiter= '\t')
training_set = np.array(training_set, dtype= 'int')
test_set = pd.read_csv('ml-100k/u1.test', delimiter= '\t')
test_set = np.array(test_set, dtype= 'int')

In [4]:
#Getting the number of users and movies
nb_users = int(max(max(training_set[:, 0], ), max(test_set[:, 0])))
nb_movies = int(max(max(training_set[:, 1], ), max(test_set[:, 1])))

In [5]:
#Converting data into an array with users in lines and movies in columns
#This function, convert, is designed to transform a dataset of user ratings into a matrix where each row represents a user and each column represents a movie.
def convert(data): #This defines a function named convert that takes a single parameter data.
    #nitializes an empty list new_data which will eventually hold the rows of the new matrix, with each row representing the ratings of a single user.
    new_data = []
    for id_users in range(1, nb_users + 1):#nb_users - 1 is to include all the user id.
        #getting all the movies id.
        id_movies = data[:, 1] [data[:, 0] == id_users]#data[:, 1] is to select the movie id column && [data[:, 0] == id_user] to get all the movies id when user id is 1, otherwise user 2 also rated for the movies that will include in the list.
        #getting all the movies id
        id_ratings = data[:, 2] [data[:, 0] == id_users]#data[:, 1] is to select the ratings id column && [data[:, 0] == id_user] to get all the ratings id when user id is 1, otherwise rating of  user 2 also  will include in the list.
        #set all the ratings to zero.
        ratings = np.zeros(nb_movies)
        #replace 0 with movies rating && will keep 0 for the movie that did not get a rating.
        ratings[id_movies - 1] = id_ratings#ratings[id_movies - 1] to include 0 number id of the movie.
        #append all the raings into the main data list.
        new_data.append(list(ratings))
    return new_data

#Apply convert() function to trainning set && test set
training_set = convert(training_set)
test_set = convert(test_set)

In [6]:
# Print the ratings for the first user
print(training_set[0])

# Print the ratings for the second user
print(training_set[1])

# Print the ratings for the last user
print(training_set[-1])

[0.0, 3.0, 4.0, 3.0, 3.0, 0.0, 4.0, 1.0, 5.0, 0.0, 2.0, 0.0, 5.0, 0.0, 5.0, 5.0, 0.0, 4.0, 5.0, 0.0, 1.0, 4.0, 0.0, 0.0, 4.0, 3.0, 0.0, 4.0, 1.0, 3.0, 0.0, 5.0, 0.0, 2.0, 1.0, 0.0, 2.0, 3.0, 0.0, 3.0, 2.0, 5.0, 4.0, 0.0, 5.0, 4.0, 0.0, 5.0, 0.0, 5.0, 0.0, 4.0, 0.0, 0.0, 5.0, 0.0, 5.0, 4.0, 5.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 4.0, 0.0, 4.0, 0.0, 0.0, 3.0, 0.0, 0.0, 0.0, 4.0, 0.0, 4.0, 0.0, 4.0, 0.0, 0.0, 0.0, 3.0, 0.0, 0.0, 0.0, 5.0, 4.0, 5.0, 0.0, 0.0, 0.0, 5.0, 2.0, 4.0, 0.0, 0.0, 0.0, 3.0, 0.0, 2.0, 0.0, 0.0, 0.0, 2.0, 4.0, 0.0, 0.0, 5.0, 1.0, 5.0, 0.0, 0.0, 0.0, 5.0, 3.0, 0.0, 0.0, 5.0, 0.0, 0.0, 3.0, 4.0, 5.0, 0.0, 2.0, 5.0, 0.0, 0.0, 0.0, 1.0, 0.0, 4.0, 0.0, 4.0, 3.0, 5.0, 1.0, 3.0, 0.0, 3.0, 2.0, 0.0, 4.0, 0.0, 4.0, 3.0, 0.0, 2.0, 0.0, 0.0, 5.0, 3.0, 0.0, 0.0, 4.0, 0.0, 3.0, 0.0, 0.0, 0.0, 4.0, 0.0, 0.0, 5.0, 5.0, 2.0, 5.0, 5.0, 0.0, 0.0, 5.0, 5.0, 0.0, 0.0, 5.0, 0.0, 5.0, 3.0, 0.0, 5.0, 4.0, 0.0, 0.0, 0.0, 0.0, 4.0, 0.0, 0.0, 0.0, 5.0, 4.0, 0.0, 4.0, 5.0, 0.0, 5.0, 5.0, 4.0, 0.0,

In [7]:
#Converting the data into torch tensors
training_set = torch.FloatTensor(training_set)
test_set = torch.FloatTensor(test_set)

In [8]:
#convert the ratings into binary ratings 1 (liked) & 0 (not liked)
training_set[training_set == 0] = -1
training_set[training_set == 1] = 0
training_set[training_set == 2] = 0
training_set[training_set >= 3] = 1

test_set[test_set == 0] = -1
test_set[test_set == 1] = 0
test_set[test_set == 2] = 0
test_set[test_set >= 3] = 1

In [9]:
#Creating the architecture of the neural network.
class RBM():
    def __inti__(self, nv, nh):
        self.w = torch.randn(nv, nh)
        self.a = torch.randn(1, nv)
        self.b = torch.randn(1, nh)

    def sanple_h(self, x):
        wx = torch.mm(x, self.w.t())
        activation = wx + self.a.expand_as(wx)
        p_h_given_v = torch.sigmoid(activation)
        return p_h_given_v, torch.bernoulli(p_h_given_v)
    
    def sample_h(self, y):
        wy = torch.mm(y, self.w)
        activation = wy + self.b.expand_as(wy)
        p_v_given_h = torch.sigmoid(activation)
        return p_v_given_h, torch.bernoulli(p_v_given_h)

    def train(self, v0, vk, ph0, phk):
        self.w += torch.mm(v0.t(), ph0) - torch.mm(vk.t(), phk)
        self.b += torch.sum((v0 - vk), 0)
        self.a += torch.sum((ph0 - phk), 0)

In [None]:
nv = len(training_set[0])
nh = 100
batch_size = 100
rbm = RBM(nv, nh)