In [1]:
import numpy as np
import pandas as pd
import torch
import torch.nn.parallel
import torch.utils.data


In [2]:
movies = pd.read_csv('movies.dat', sep='::', header=None, engine='python', encoding='latin-1')
users = pd.read_csv('users.dat', sep='::', header=None, engine='python', encoding='latin-1')
ratings = pd.read_csv('ratings.dat', sep='::', header=None, engine='python', encoding='latin-1')

In [3]:
training_set = pd.read_csv('u1.base', delimiter='\t', header=None)
training_set = np.array(training_set, dtype='int')
test_set = pd.read_csv('u1.test', delimiter='\t', header=None)
test_set = np.array(test_set, dtype='int')

In [4]:
nb_users = int(max(max(training_set[:, 0], ), max(test_set[:, 0])))
nb_movies = int(max(max(training_set[:, 1], ), max(test_set[:, 1])))
test_set_movie_link = []

In [5]:
def convert(data, set_type):
  new_data = []
  for id_users in range(1, nb_users + 1):
    id_movies = data[:, 1] [data[:, 0] == id_users]
    id_ratings = data[:, 2] [data[:, 0] == id_users]
    ratings = np.zeros(nb_movies)
    ratings[id_movies - 1] = id_ratings
    new_data.append(list(ratings))
    if(set_type == 'test'):
      test_set_movie_link.append(id_movies)
  return new_data

In [6]:
def encode(dataset):
    dataset[dataset == 0] = -1
    dataset[dataset == 1] = 0
    dataset[dataset == 2] = 0
    dataset[dataset >= 3] = 1
    return dataset

In [7]:
training_set = convert(training_set, 'train')
test_set = convert(test_set, 'test')

In [8]:
training_set = torch.FloatTensor(np.array(training_set))
test_set = torch.FloatTensor(np.array(test_set))

In [9]:
training_set =  encode(training_set)
test_set =  encode(test_set)

In [10]:
class RBM():
    def __init__(self, num_vis, num_hid):
        self.W = torch.randn(num_hid, num_vis)
        self.a = torch.randn(1, num_hid)
        self.b = torch.randn(1, num_vis)
    def sample_h(self, x):
        weight_x = torch.mm(x, self.W.t())
        activation = weight_x + self.a.expand_as(weight_x)
        prob_hid_given_vis = torch.sigmoid(activation)
        return prob_hid_given_vis, torch.bernoulli(prob_hid_given_vis)
    def sample_v(self, y):
        weight_y = torch.mm(y, self.W)
        activation = weight_y + self.b.expand_as(weight_y)
        prob_vis_given_hid = torch.sigmoid(activation)
        return prob_vis_given_hid, torch.bernoulli(prob_vis_given_hid)
    def train(self, vis, vis_k, prob_hid, prob_hid_k):
        self.W += (torch.mm(vis.t(), prob_hid) - torch.mm(vis_k.t(), prob_hid_k)).t()
        self.b += torch.sum((vis-vis_k), 0)
        self.a += torch.sum((prob_hid-prob_hid_k), 0)

In [11]:
num_vis = len(training_set[0])
num_hid = 100
batch_size = 100
rbm = RBM(num_vis, num_hid)
num_epoch = 10

for epoch in range(1, num_epoch+1):
    train_loss = 0
    s = 0.0
    for id_user in range(0, nb_users - batch_size, batch_size):
        vis_k = training_set[id_user: id_user+batch_size]
        vis = training_set[id_user: id_user+batch_size]
        prob_hid,_ = rbm.sample_h(vis)
        for k in range(10):
            _,hid_k = rbm.sample_h(vis_k)
            _,vis_k = rbm.sample_v(hid_k)
            vis_k[vis<0] = vis[vis<0]
        prob_hid_k,_ = rbm.sample_h(vis_k)
        rbm.train(vis, vis_k, prob_hid, prob_hid_k)
        train_loss += torch.mean(torch.abs(vis[vis > 0] - vis_k[vis > 0]))
        s += 1.0
    print('epoch: ' +str(epoch)+ ' loss: ' + str(train_loss/s))

epoch: 1 loss: tensor(0.3078)
epoch: 2 loss: tensor(0.1265)
epoch: 3 loss: tensor(0.1451)
epoch: 4 loss: tensor(0.1485)
epoch: 5 loss: tensor(0.1491)
epoch: 6 loss: tensor(0.1478)
epoch: 7 loss: tensor(0.1475)
epoch: 8 loss: tensor(0.1490)
epoch: 9 loss: tensor(0.1439)
epoch: 10 loss: tensor(0.1515)


In [12]:
test_loss = 0
s = 0.0
test_set_movie_rates = []
for id_user in range(nb_users):
    vis = training_set[id_user: id_user+1]
    vis_t = test_set[id_user: id_user+1]
    if len(vis_t[vis_t>=0]) > 0:
        _,hid = rbm.sample_h(vis)
        _,vis = rbm.sample_v(hid)
        test_loss += torch.mean(torch.abs(vis_t[vis_t > 0] - vis[vis_t > 0]))
        test_set_movie_rates.append(vis)
        s += 1.0
print('test loss: ' + str(test_loss/s))

test loss: tensor(0.1638)


In [13]:
ratings = ratings.rename(columns={0: 'user_id', 1: 'movie_id', 2: 'movie_rating', 3: 'time_stamp'})
movies = movies.rename(columns={0: 'movie_id_id', 1: 'movie_name', 2: 'movie_genre'})
users = users.rename(columns={0: 'user_id', 1: 'user_gender', 2: 'user_age', 3: 'user_job_code', 4: 'user_zip_code'})

In [14]:
def existing_user_movie_recommend(id_user):

    user_rates = test_set_movie_rates[id_user].flatten()
    movies_list = movies['movie_name']
    user_gender = users['user_gender']
    user_gender = user_gender[id_user-1]

    bad_recommendation = []
    good_recommendation = []


    if(user_gender == 'F'):
        identity = "she"
    else:
        identity = "he"

    for id_movie in test_set_movie_link[id_user-1]:
        movie = movies_list[id_movie]
        result = "User " + str(id_user) + " haven't watched yet the movie '" + movie + "', and " + identity
        if(user_rates[id_movie-1] == 1):
            good_recommendation.append(result+ " might like it")
        else:
            bad_recommendation.append(result+ " might not like it")
            
    return np.array(good_recommendation), np.array(bad_recommendation)

In [15]:
def new_user_rating_populate(liked_movies, disliked_movies):
    ratings_temp = np.full((nb_movies), -1.0)
    for r in liked_movies:
        ratings_temp[r] = 1.0
    for d in disliked_movies:
        ratings_temp[d] = 0.0
    return ratings_temp

In [16]:
def nonexisting_user_movie_recommend(liked_movies, disliked_movies):
    movies_list = movies['movie_name']
    bad_recommendation = []
    good_recommendation = []

    ratingslist = new_user_rating_populate(liked_movies, disliked_movies)
    ratingslist = torch.FloatTensor(np.array(ratingslist))
    ratingslist = torch.unsqueeze(ratingslist, 0)
    _,hid = rbm.sample_h(ratingslist)
    _,res = rbm.sample_v(hid)

    for id_movie in range(nb_movies):
        movie = movies_list[id_movie]
        if ratingslist[0][id_movie] < 0.0:
            result = "New User haven't watched yet the movie '" + movie
            if res[0][id_movie] == 1:
                good_recommendation.append(result+"', and he/she might like it")
            else:
                bad_recommendation.append(result+"', and he/she might not like it")

    return np.array(good_recommendation), np.array(bad_recommendation)

In [17]:
def init():
    response = input("Existing User: Type Y or N:")
    if response == 'Y':
        user_id = input("Enter User ID:")
        good_recommends, bad_recommends = existing_user_movie_recommend(int(user_id))
    else:
        liked_movies = []
        disliked_movies = []
        num_liked_movies= input("Enter number of movies you like:")
        for i in range(int(num_liked_movies)):
            id_movie = input("Enter the IDs of the movies you liked:")
            liked_movies.append(int(id_movie))
        num_disliked_movies= input("Enter number of movies you dislike:")
        for i in range(int(num_disliked_movies)):
            id_movie = input("Enter the IDs of the movies you disliked:")
            disliked_movies.append(int(id_movie))
        good_recommends, bad_recommends = nonexisting_user_movie_recommend(liked_movies, disliked_movies)
            
    return good_recommends, bad_recommends
        


In [18]:
thumbs_up, thumbs_down = init()

In [19]:
thumbs_up

array(["User 13 haven't watched yet the movie 'Dracula: Dead and Loving It (1995)', and he might like it",
       "User 13 haven't watched yet the movie 'Cutthroat Island (1995)', and he might like it",
       "User 13 haven't watched yet the movie 'Leaving Las Vegas (1995)', and he might like it",
       "User 13 haven't watched yet the movie 'Othello (1995)', and he might like it",
       "User 13 haven't watched yet the movie 'Shanghai Triad (Yao a yao yao dao waipo qiao) (1995)', and he might like it",
       "User 13 haven't watched yet the movie 'Richard III (1995)', and he might like it",
       "User 13 haven't watched yet the movie 'Restoration (1995)', and he might like it",
       "User 13 haven't watched yet the movie 'How to Make an American Quilt (1995)', and he might like it",
       "User 13 haven't watched yet the movie 'When Night Is Falling (1995)', and he might like it",
       "User 13 haven't watched yet the movie 'Home for the Holidays (1995)', and he might like 

In [20]:
thumbs_down

array(["User 13 haven't watched yet the movie 'Grumpier Old Men (1995)', and he might not like it",
       "User 13 haven't watched yet the movie 'Copycat (1995)', and he might not like it",
       "User 13 haven't watched yet the movie 'Persuasion (1995)', and he might not like it",
       "User 13 haven't watched yet the movie 'Kicking and Screaming (1995)', and he might not like it",
       "User 13 haven't watched yet the movie 'Mary Reilly (1996)', and he might not like it",
       "User 13 haven't watched yet the movie 'In the Bleak Midwinter (1995)', and he might not like it",
       "User 13 haven't watched yet the movie 'Cure, The (1995)', and he might not like it",
       "User 13 haven't watched yet the movie 'Circle of Friends (1995)', and he might not like it",
       "User 13 haven't watched yet the movie 'Fluke (1995)', and he might not like it",
       "User 13 haven't watched yet the movie 'Hoop Dreams (1994)', and he might not like it",
       "User 13 haven't watched