In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf

if tf.__version__.startswith('2'):
    tf.compat.v1.disable_eager_execution()

from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

from scipy.sparse import lil_matrix, csr_matrix, save_npz, load_npz

In [202]:
class RBM(object):

    def __init__(self, input_size, output_size,
                 learning_rate, epochs, batchsize):
        # Define hyperparameters
        self._input_size = input_size
        self._output_size = output_size
        self.learning_rate = learning_rate
        self.epochs = epochs
        self.batchsize = batchsize

        # Initialize weights and biases using zero matrices
        self.w = np.zeros([input_size, output_size], dtype=np.float32)
        self.hb = np.zeros([output_size], dtype=np.float32)
        self.vb = np.zeros([input_size], dtype=np.float32)

    def prob_h_given_v(self, visible, w, hb):
        return tf.nn.sigmoid(tf.matmul(visible, w) + hb)

    def prob_v_given_h(self, hidden, w, vb):
        return tf.nn.sigmoid(tf.matmul(hidden, tf.transpose(w)) + vb)

    def sample_prob(self, probs):
        return tf.nn.relu(tf.sign(probs - tf.compat.v1.random_uniform(tf.shape(probs))))

    def train(self, X):
        _w = tf.compat.v1.placeholder(tf.float32, [self._input_size, self._output_size])
        _hb = tf.compat.v1.placeholder(tf.float32, [self._output_size])
        _vb = tf.compat.v1.placeholder(tf.float32, [self._input_size])

        prv_w = np.zeros([self._input_size, self._output_size], dtype=np.float32)
        prv_hb = np.zeros([self._output_size], dtype=np.float32)
        prv_vb = np.zeros([self._input_size], dtype=np.float32)

        cur_w = np.zeros([self._input_size, self._output_size], dtype=np.float32)
        cur_hb = np.zeros([self._output_size], dtype=np.float32)
        cur_vb = np.zeros([self._input_size], dtype=np.float32)

        v0 = tf.compat.v1.placeholder(tf.float32, [None, self._input_size])
        h0 = self.sample_prob(self.prob_h_given_v(v0, _w, _hb))
        v1 = self.sample_prob(self.prob_v_given_h(h0, _w, _vb))
        h1 = self.prob_h_given_v(v1, _w, _hb)

        positive_grad = tf.matmul(tf.transpose(v0), h0)
        negative_grad = tf.matmul(tf.transpose(v1), h1)

        update_w = _w + self.learning_rate * (positive_grad - negative_grad) / tf.compat.v1.to_float(tf.shape(v0)[0])
        update_vb = _vb + self.learning_rate * tf.reduce_mean(v0 - v1, 0)
        update_hb = _hb + self.learning_rate * tf.reduce_mean(h0 - h1, 0)

        err = tf.reduce_mean(tf.square(v0 - v1))

        error_list = []

        with tf.compat.v1.Session() as sess:
            sess.run(tf.compat.v1.global_variables_initializer())

            for epoch in range(self.epochs):
                for start, end in zip(range(0, len(X), self.batchsize), range(self.batchsize, len(X), self.batchsize)):
                    batch = X[start:end]
                    cur_w = sess.run(update_w, feed_dict={v0: batch, _w: prv_w, _hb: prv_hb, _vb: prv_vb})
                    cur_hb = sess.run(update_hb, feed_dict={v0: batch, _w: prv_w, _hb: prv_hb, _vb: prv_vb})
                    cur_vb = sess.run(update_vb, feed_dict={v0: batch, _w: prv_w, _hb: prv_hb, _vb: prv_vb})
                    prv_w = cur_w
                    prv_hb = cur_hb
                    prv_vb = cur_vb
                error = sess.run(err, feed_dict={v0: X, _w: cur_w, _vb: cur_vb, _hb: cur_hb})
                print('Epoch: %d' % epoch, 'reconstruction error: %f' % error)
                error_list.append(error)
            self.w = prv_w
            self.hb = prv_hb
            self.vb = prv_vb
            return error_list

    def rbm_output(self, X):
        input_X = tf.constant(X)
        _w = tf.constant(self.w)
        _hb = tf.constant(self.hb)
        _vb = tf.constant(self.vb)
        out = tf.nn.sigmoid(tf.matmul(input_X, _w) + _hb)
        hiddenGen = self.sample_prob(self.prob_h_given_v(input_X, _w, _hb))
        visibleGen = self.sample_prob(self.prob_v_given_h(hiddenGen, _w, _vb))
        with tf.compat.v1.Session() as sess:
            sess.run(tf.compat.v1.global_variables_initializer())
            return sess.run(out), sess.run(visibleGen), sess.run(hiddenGen)

In [203]:
start_path = '../../../data/functional/sparse/'
A = load_npz(start_path + "ratings_sparse_train.npz").toarray()
A_test = load_npz(start_path + "ratings_sparse_test.npz").toarray()

A = np.float32(A)

In [277]:
rbm = RBM(1999, 1999, 0.2, 10, 200)

In [278]:
err = rbm.train(A)

Epoch: 0 reconstruction error: 0.658407
Epoch: 1 reconstruction error: 0.640131
Epoch: 2 reconstruction error: 0.624979
Epoch: 3 reconstruction error: 0.606198
Epoch: 4 reconstruction error: 0.588669
Epoch: 5 reconstruction error: 0.571860
Epoch: 6 reconstruction error: 0.555588
Epoch: 7 reconstruction error: 0.543512
Epoch: 8 reconstruction error: 0.531365
Epoch: 9 reconstruction error: 0.519754


In [279]:
outputX, reconstructedX, hiddenX = rbm.rbm_output(A)

# inputValidation = ratings_validation
# inputValidation = inputValidation.astype(np.float32)
# finalOutput_validation, reconstructedOutput_validation, _ = rbm.rbm_output(inputValidation)
outputX

array([[1.93236023e-01, 1.60529464e-01, 3.01662534e-01, ...,
        7.77973533e-01, 1.13093391e-01, 2.28641927e-01],
       [1.38775416e-04, 8.39076412e-04, 2.78046617e-04, ...,
        1.28848548e-03, 1.24078412e-02, 1.97609863e-03],
       [1.52425021e-02, 3.66334617e-02, 7.40028679e-01, ...,
        2.14049174e-03, 3.65217711e-05, 4.11403412e-03],
       ...,
       [1.36151211e-03, 3.89877846e-03, 2.61700875e-03, ...,
        1.02746524e-02, 3.29008438e-02, 2.33393861e-04],
       [8.08033869e-02, 2.45803166e-02, 4.23069857e-02, ...,
        4.37809452e-02, 4.27013589e-03, 5.04100993e-02],
       [7.83712938e-02, 1.23798795e-01, 1.84712425e-01, ...,
        6.20571077e-02, 2.56455958e-01, 1.16352700e-01]], dtype=float32)

In [280]:
predictionsArray = reconstructedX
pred_validation = predictionsArray[A.nonzero()].flatten()
actual_validation = A[A.nonzero()].flatten()
rbm_prediction = mean_squared_error(pred_validation, actual_validation)
print('Mean squared error using RBM prediction:', rbm_prediction)

Mean squared error using RBM prediction: 59.583378


In [281]:
print(pred_validation)

[1. 0. 1. ... 0. 0. 0.]


In [282]:
import pickle

with open('../../../data/shrink/ratings-book-translation.json', 'rb') as f:
    book_translations_old_uu = pickle.load(f)

In [283]:
book_translations_uu_old = {v: k for k, v in book_translations_old_uu.items()}

book_translations_uu_old

{0: 16388,
 1: 221189,
 2: 147462,
 3: 172045,
 4: 13,
 5: 90141,
 6: 114722,
 7: 213032,
 8: 213033,
 9: 139307,
 10: 73777,
 11: 41021,
 12: 237631,
 13: 16450,
 14: 237637,
 15: 147528,
 16: 196680,
 17: 213073,
 18: 16467,
 19: 122965,
 20: 114777,
 21: 172123,
 22: 65630,
 23: 204898,
 24: 221282,
 25: 237667,
 26: 90213,
 27: 114813,
 28: 126,
 29: 106623,
 30: 123005,
 31: 196737,
 32: 8322,
 33: 8330,
 34: 141,
 35: 139405,
 36: 73873,
 37: 163988,
 38: 49304,
 39: 139419,
 40: 147630,
 41: 57519,
 42: 147637,
 43: 123064,
 44: 32954,
 45: 164027,
 46: 65724,
 47: 221373,
 48: 114883,
 49: 73933,
 50: 147665,
 51: 213204,
 52: 82137,
 53: 164058,
 54: 49380,
 55: 123121,
 56: 49399,
 57: 131323,
 58: 131326,
 59: 49422,
 60: 123153,
 61: 82195,
 62: 16665,
 63: 8473,
 64: 283,
 65: 237853,
 66: 98601,
 67: 24874,
 68: 41262,
 69: 90415,
 70: 123185,
 71: 123190,
 72: 82231,
 73: 180543,
 74: 8515,
 75: 326,
 76: 24908,
 77: 164174,
 78: 16718,
 79: 155987,
 80: 164183,
 81: 106

In [334]:
user = (300000, {book_translations_old_uu[k]: v for k, v in {136033: 10, 232521: 9, 136639: 8}.items()})

In [335]:

np_user = np.zeros((1, 1999))

In [336]:
for k, v in user[1].items():
    np_user[0][k] = v

In [337]:
np_user = np.float32(np_user)

outputX, reconstructedX, hiddenX = rbm.rbm_output(np_user)

In [338]:
print(outputX)

[[1.1099967e-03 1.9241832e-01 1.9139105e-02 ... 9.3436422e-04
  9.9999994e-01 2.0346234e-03]]


In [339]:
books_old_output = {book_translations_uu_old[i]: outputX[0][i] for i in range(len(outputX[0]))}

In [340]:
sorted_books = {k: v for k, v in sorted(books_old_output.items(), key=lambda item: item[1], reverse=True)}

In [341]:
import pandas as pd

df_info = pd.read_csv("../../../data/edited/books-info-edited.csv")

In [342]:
n_books = [i for i in sorted_books.keys()]

In [343]:
df_info["new_rating"] = df_info.bookId.map(sorted_books)

In [346]:
df_info_books = df_info[df_info.bookId.isin(n_books)]

print(sorted_books)

{41021: 1.0, 237631: 1.0, 16450: 1.0, 114777: 1.0, 123005: 1.0, 49304: 1.0, 123121: 1.0, 8473: 1.0, 90415: 1.0, 123190: 1.0, 164183: 1.0, 33114: 1.0, 16840: 1.0, 8673: 1.0, 8689: 1.0, 188948: 1.0, 156216: 1.0, 98961: 1.0, 205568: 1.0, 222061: 1.0, 222073: 1.0, 197502: 1.0, 148403: 1.0, 189368: 1.0, 213961: 1.0, 99297: 1.0, 181283: 1.0, 82993: 1.0, 197734: 1.0, 33908: 1.0, 230695: 1.0, 238914: 1.0, 66910: 1.0, 107987: 1.0, 157224: 1.0, 91744: 1.0, 231043: 1.0, 108172: 1.0, 42681: 1.0, 59298: 1.0, 157621: 1.0, 124889: 1.0, 116709: 1.0, 165863: 1.0, 26600: 1.0, 182278: 1.0, 116751: 1.0, 231470: 1.0, 174147: 1.0, 34887: 1.0, 182345: 1.0, 51284: 1.0, 182380: 1.0, 116858: 1.0, 59717: 1.0, 67935: 1.0, 223652: 1.0, 76212: 1.0, 158202: 1.0, 27144: 1.0, 76386: 1.0, 43683: 1.0, 84654: 1.0, 35518: 1.0, 84708: 1.0, 109287: 1.0, 43779: 1.0, 43821: 1.0, 174907: 1.0, 215953: 1.0, 215982: 1.0, 84976: 1.0, 183303: 1.0, 109596: 1.0, 183391: 1.0, 191588: 1.0, 52392: 1.0, 199937: 1.0, 126214: 1.0, 36122: 1

In [345]:
df_info_books[df_info_books.title.str.contains("Potter")]

Unnamed: 0,isbn,title,author,year,publisher,image_s,image_m,image_l,bookId,new_rating
2088,059035342X,Harry Potter and the Sorcerer's Stone (Harry P...,J. K. Rowling,1999,Arthur A. Levine Books,http://images.amazon.com/images/P/059035342X.0...,http://images.amazon.com/images/P/059035342X.0...,http://images.amazon.com/images/P/059035342X.0...,219655,0.002094
2735,0590353403,Harry Potter and the Sorcerer's Stone (Book 1),J. K. Rowling,1998,Scholastic,http://images.amazon.com/images/P/0590353403.0...,http://images.amazon.com/images/P/0590353403.0...,http://images.amazon.com/images/P/0590353403.0...,237853,0.999997
3363,0439064872,Harry Potter and the Chamber of Secrets (Book 2),J. K. Rowling,2000,Scholastic,http://images.amazon.com/images/P/0439064872.0...,http://images.amazon.com/images/P/0439064872.0...,http://images.amazon.com/images/P/0439064872.0...,31697,0.094717
3720,0439136350,Harry Potter and the Prisoner of Azkaban (Book 3),J. K. Rowling,1999,Scholastic,http://images.amazon.com/images/P/0439136350.0...,http://images.amazon.com/images/P/0439136350.0...,http://images.amazon.com/images/P/0439136350.0...,136033,0.986067
5256,0439139597,Harry Potter and the Goblet of Fire (Book 4),J. K. Rowling,2000,Scholastic,http://images.amazon.com/images/P/0439139597.0...,http://images.amazon.com/images/P/0439139597.0...,http://images.amazon.com/images/P/0439139597.0...,214312,0.001367
5326,043935806X,Harry Potter and the Order of the Phoenix (Boo...,J. K. Rowling,2003,Scholastic,http://images.amazon.com/images/P/043935806X.0...,http://images.amazon.com/images/P/043935806X.0...,http://images.amazon.com/images/P/043935806X.0...,232521,0.037377
5595,0425154092,From Potter's Field,Patricia Daniels Cornwell,1996,Berkley Publishing Group,http://images.amazon.com/images/P/0425154092.0...,http://images.amazon.com/images/P/0425154092.0...,http://images.amazon.com/images/P/0425154092.0...,146121,0.000532
72548,0439425220,Harry Potter and the Chamber of Secrets Postca...,J. K. Rowling,2002,Scholastic,http://images.amazon.com/images/P/0439425220.0...,http://images.amazon.com/images/P/0439425220.0...,http://images.amazon.com/images/P/0439425220.0...,94815,0.004252
