# Layer 6 SDAE for text embedding
ref) Zhang, Fuzheng, et al. "Collaborative knowledge base embedding for recommender systems." Proceedings of the 22nd ACM SIGKDD International Conference on Knowledge Discovery and Data Mining. ACM, 2016.

In [None]:
from datetime import datetime
import numpy as np
import os
import pickle
from time import time

import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
from tensorflow.python.framework import ops

from BatchGenerator import BatchGenerator
import param

In [None]:
with open('Amazon/bow_mtx.pkl', 'rb') as f:
    bow_mtx = pickle.load(f)

In [None]:
ops.reset_default_graph()
sess = tf.Session()

In [None]:
input_dim = bow_mtx.shape[1]
# input_dim = 43735
# input_dim = 3000

In [None]:
# batch_size = param.batch_size
batch_size = param.batch_size
iter_time = param.iter_time

In [None]:
lambda_W = param.lambda_W
lambda_b = param.lambda_b
lambda_X = param.lambda_X
hidden_dim = param.hidden_dim
dim = param.dim

In [None]:
learning_rate = param.learning_rate

In [None]:
sample_mtx = bow_mtx[:, :input_dim]

---

In [None]:
x_data = tf.placeholder(dtype=tf.float32, shape = [batch_size, input_dim])

In [None]:
def denoise(x_data):
    noise = tf.Variable(tf.random_normal(shape = [batch_size, input_dim]), name="noise")
    return tf.add(noise, x_data)

In [None]:
with tf.name_scope("Denoise") as scope:
    X0 = denoise(x_data)

In [None]:
def e_layer_1(X0):
    W1 = tf.Variable(tf.random_normal(shape = [input_dim, hidden_dim], mean = 0., stddev = 1/lambda_W), name = 'W1')
    b1 = tf.Variable(tf.random_normal(shape = [batch_size, hidden_dim], mean = 0., stddev = 1/lambda_b), name = 'b1')
    output1 = tf.nn.sigmoid(tf.add(tf.matmul(X0, W1), b1), name = 'output1')
    X1 = tf.random_normal(shape = [batch_size, hidden_dim], mean = output1, stddev = 1/lambda_X, name = 'X1')
    return output1, X1

In [None]:
with tf.name_scope("e_layer_1") as scope:
    output1, X1 = e_layer_1(X0)

In [None]:
def e_layer_2(X1):
    W2 = tf.Variable(tf.random_normal(shape = [hidden_dim, hidden_dim], mean = 0., stddev = 1/lambda_W), name = 'W2')
    b2 = tf.Variable(tf.random_normal(shape = [batch_size, hidden_dim], mean = 0., stddev = 1/lambda_b), name = 'b2')
    output2 = tf.nn.sigmoid(tf.add(tf.matmul(X1, W2), b2), name = 'output2')
    X2 = tf.random_normal(shape = [batch_size, hidden_dim], mean = output2, stddev = 1/lambda_X, name = 'X2')
    return output2, X2

In [None]:
with tf.name_scope("e_layer_2") as scope:
    output2, X2 = e_layer_2(X1)

In [None]:
def e_layer_3(X2):
    W3 = tf.Variable(tf.random_normal(shape = [hidden_dim, dim], mean = 0, stddev = 1/lambda_W), name = 'W3')
    b3 = tf.Variable(tf.random_normal(shape = [batch_size, dim], mean = 0., stddev = 1/lambda_b), name = 'b3')
    output3 = tf.nn.sigmoid(tf.add(tf.matmul(X2, W3), b3), name = 'output3')
    X3_ = tf.random_normal(shape = [batch_size, dim], mean = output3, stddev = 1/lambda_X, name = 'X3_')
    return output3, X3_

In [None]:
with tf.name_scope("e_layer_3") as scope:
    output3, X3_ = e_layer_3(X2)

In [None]:
def d_layer_1(X3_):
    W4 = tf.Variable(tf.random_normal(shape = [dim, hidden_dim], mean = 0., stddev = 1/lambda_W), name = 'W4')
    b4 = tf.Variable(tf.random_normal(shape = [batch_size, hidden_dim], mean = 0., stddev = 1/lambda_b), name = 'b4')
    output4 = tf.nn.sigmoid(tf.add(tf.matmul(X3_, W4), b4), name = 'output4')
    X4 = tf.random_normal(shape = [batch_size, hidden_dim], mean = output4, stddev = 1/lambda_X, name = 'X4')
    return output4, X4

In [None]:
with tf.name_scope("d_layer_1") as scope:
    output4, X4 = d_layer_1(X3_)

In [None]:
def d_layer_2(X4):
    W5 = tf.Variable(tf.random_normal(shape = [hidden_dim, hidden_dim], mean = 0., stddev = 1/lambda_W), name = 'W5')
    b5 = tf.Variable(tf.random_normal(shape = [batch_size, hidden_dim], mean = 0., stddev = 1/lambda_b), name = 'b5')
    output5 = tf.nn.sigmoid(tf.add(tf.matmul(X4, W5), b5), name = 'output5')
    X5 = tf.random_normal(shape = [batch_size, hidden_dim], mean = output5, stddev = 1/lambda_X, name = 'X5')
    return output5, X5

In [None]:
with tf.name_scope('d_layer_2') as scope:
    output5, X5 = d_layer_2(X4)

In [None]:
def d_layer_3(X5):
    W6 = tf.Variable(tf.random_normal(shape = [hidden_dim, input_dim], mean = 0., stddev = 1/lambda_W), name = 'W6')
    b6 = tf.Variable(tf.random_normal(shape = [batch_size, input_dim], mean = 0., stddev = 1/lambda_b), name = 'b6')
    output6 = tf.nn.sigmoid(tf.add(tf.matmul(X5, W6), b6), name = 'output6')
    X6 = tf.random_normal(shape = [batch_size, input_dim], mean = output6, stddev = 1/lambda_X, name = 'X6')
    return output6, X6

In [None]:
with tf.name_scope('d_layer_3') as scope:
    output6, X6 = d_layer_3(X5)

In [None]:
[noise, W1, b1, W2, b2, W3, b3, W4, b4, W5, b5, W6, b6] = tf.global_variables()

In [None]:
log_likelihood = -tf.multiply(lambda_X, tf.add_n([tf.reduce_mean(tf.square(tf.subtract(output1, X1))), 
                         tf.reduce_mean(tf.square(tf.subtract(output2, X2))),
                         tf.reduce_mean(tf.square(tf.subtract(output3, X3_))), 
                         tf.reduce_mean(tf.square(tf.subtract(output4, X4))),
                         tf.reduce_mean(tf.square(tf.subtract(output5, X5))), 
                         tf.reduce_mean(tf.square(tf.subtract(output6, X6)))]))/2 - tf.add_n([tf.add(tf.multiply(lambda_W, tf.reduce_mean(tf.square(W1))), tf.multiply(lambda_b, tf.reduce_mean(tf.square(b1)))),
                 tf.add(tf.multiply(lambda_W, tf.reduce_mean(tf.square(W2))), tf.multiply(lambda_b, tf.reduce_mean(tf.square(b2)))),
                 tf.add(tf.multiply(lambda_W, tf.reduce_mean(tf.square(W3))), tf.multiply(lambda_b, tf.reduce_mean(tf.square(b3)))),
                 tf.add(tf.multiply(lambda_W, tf.reduce_mean(tf.square(W4))), tf.multiply(lambda_b, tf.reduce_mean(tf.square(b4)))),
                 tf.add(tf.multiply(lambda_W, tf.reduce_mean(tf.square(W5))), tf.multiply(lambda_b, tf.reduce_mean(tf.square(b5)))),
                 tf.add(tf.multiply(lambda_W, tf.reduce_mean(tf.square(W6))), tf.multiply(lambda_b, tf.reduce_mean(tf.square(b6))))])

loss = -log_likelihood

In [None]:
opt = tf.train.AdadeltaOptimizer(learning_rate=learning_rate)
train = opt.minimize(loss)

In [None]:
init = tf.global_variables_initializer()
sess.run(init)

In [None]:
model_id = datetime.fromtimestamp(time()).strftime('%Y-%m-%d_%H:%M:%S')

In [None]:
save_dir = './SDAE/%s' %(model_id)
save_summary_path = os.path.join(save_dir, 'model_summary')
save_variable_path = os.path.join(save_dir, 'model_variables')

In [None]:
if not os.path.exists(save_dir):
    os.makedirs(save_dir)
if not os.path.exists(save_summary_path):
    os.makedirs(save_summary_path)
if not os.path.exists(save_variable_path):
    os.makedirs(save_variable_path)

In [None]:
merged = tf.summary.merge_all(key = 'summaries')
summary_writer = tf.summary.FileWriter(save_summary_path)

In [None]:
generator = BatchGenerator(sample_mtx, sample_mtx, batch_size)
X, _ = generator.next_batch()

In [None]:
for i in range(50000):
    sess.run(train, feed_dict={x_data:X})
    if (i+1)%10 == 0:
        temp_loss = sess.run(loss, feed_dict={x_data:X})
        print(temp_loss)
    X, _ = generator.next_batch()

In [None]:
for v in tf.trainable_variables():
    print(v.name, end=' ')
    print(v.get_shape())
    fname = v.name.replace('/','-')
    fname = '{}.csv'.format(fname)
    fname = os.path.join(save_variable_path, fname)
    np.savetxt(fname, v.eval(session=sess), delimiter=',')