In [1]:
import numpy as np
import os
import pickle
import re

import tensorflow as tf
from tensorflow.contrib import rnn
import tensorflow.contrib.slim as slim
from tqdm import tqdm

In [2]:
time_window = 10
max_len_review = 288
char_mtx_row = 1000
_len_alphabet = 70
_meta_dim = 1038
item_dim = 70000

In [3]:
char_mtx = tf.placeholder(dtype=tf.float32, shape=[None, max_len_review, char_mtx_row, _len_alphabet])
img_mtx = tf.placeholder(dtype=tf.float32, shape=[None, 32,32,3]) # input image shape: 32x32
meta = tf.placeholder(dtype=tf.float32, shape=[None, _meta_dim])
y = tf.placeholder(dtype=tf.float32, shape=[None, item_dim])

In [4]:
def review_CNN(char_mtx, filter_sizes = [2, 3, 7, 11], filter_nums = [100, 100, 100, 100], stride=[1,1,1,1],
               mlp_units=[200,50], reuse=False, is_training=True):
    ## CNN for review aspect, sentiment extraction
    with tf.variable_scope('review-charCNN', reuse=reuse):
        input_ = tf.reshape(char_mtx, shape=[-1, char_mtx_row, _len_alphabet])
        input_ = tf.expand_dims(input_, axis=3)
        with tf.name_scope('conv-filter-1'):
            filter_shape1 = [filter_sizes[0], _len_alphabet, 1, filter_nums[0]]
            W1 = tf.Variable(tf.truncated_normal(shape=filter_shape1), name='filter-1')
            b1 = tf.Variable(tf.random_uniform(shape=[filter_nums[0]]), name='bias-1')
            conv1 = tf.nn.conv2d(input_, W1, strides=stride, padding='VALID', name='conv-1')
            out1 = tf.nn.relu(tf.nn.bias_add(conv1, b1))
            max1 = tf.reduce_max(out1, axis=1) # batch_size, 1, 100
        with tf.name_scope('conv-filter-2'):
            filter_shape2= [filter_sizes[1], _len_alphabet, 1, filter_nums[1]]
            W2 = tf.Variable(tf.truncated_normal(shape=filter_shape2), name='filter-2')
            b2 = tf.Variable(tf.random_uniform(shape=[filter_nums[1]]), name='bias-2')
            conv2 = tf.nn.conv2d(input_, W2, strides=stride, padding='VALID', name='conv-2')
            out2 = tf.nn.relu(tf.nn.bias_add(conv2, b2))
            max2 = tf.reduce_max(out2, axis=1) # batch_size, 1, 100
        with tf.name_scope('conv-filter-3'):
            filter_shape3 = [filter_sizes[2], _len_alphabet, 1, filter_nums[2]]
            W3 = tf.Variable(tf.truncated_normal(shape=filter_shape3), name='filter-3')
            b3 = tf.Variable(tf.random_uniform(shape=[filter_nums[2]]), name='bias-3')
            conv3 = tf.nn.conv2d(input_, W3, strides=stride, padding='VALID', name='conv-3')
            out3 = tf.nn.relu(tf.nn.bias_add(conv3, b3))
            max3 = tf.reduce_max(out3, axis=1) # batch_size, 1, 100
        with tf.name_scope('conv-filter-4'):
            filter_shape4= [filter_sizes[3], _len_alphabet, 1, filter_nums[3]]
            W4 = tf.Variable(tf.truncated_normal(shape=filter_shape4), name='filter-4')
            b4 = tf.Variable(tf.random_uniform(shape=[filter_nums[3]]), name='bias-4')
            conv4 = tf.nn.conv2d(input_, W4, strides=stride, padding='VALID', name='conv-4')
            out4 = tf.nn.relu(tf.nn.bias_add(conv4, b4))
            max4 = tf.reduce_max(out4, axis=1) # batch_size, 1, 100
        ## concat
        max_concat = tf.squeeze(tf.concat([max1, max2, max3, max4], axis=2), axis=1) # batch_size, 400
    ## MLP for feature reduction
    with tf.variable_scope('review-MLP', reuse=reuse):
        fc1 = slim.fully_connected(max_concat, mlp_units[0])
        fc2 = slim.fully_connected(fc1, mlp_units[1])
    result = tf.reshape(fc2, shape=[-1, max_len_review, mlp_units[1]])
    return tf.reduce_sum(result, axis=1)

In [5]:
def image_CNN(img_mtx, mlp_units=[200,50], reuse=False, is_training=True):
    with tf.variable_scope('image-CNN', reuse=reuse):
        with slim.arg_scope([slim.conv2d], padding='SAME', activation_fn=None, 
                    stride = 2, weights_initializer=tf.contrib.layers.xavier_initializer()):
            with slim.arg_scope([slim.batch_norm], decay=0.95, center=True, scale=True,
                            updates_collections = None, activation_fn=tf.nn.relu,
                            is_training =is_training):
                conv1 = slim.conv2d(img_mtx, 6, [3,3], scope='conv-1')
                bn1 = slim.batch_norm(conv1, scope='bn-1')
                conv2 = slim.conv2d(bn1, 12, [3,3], scope='conv-2')
                bn2 = slim.batch_norm(conv2, scope='bn-2')
        with tf.variable_scope('MLP', reuse=reuse):
            fc1 = slim.fully_connected(slim.flatten(bn2), mlp_units[0])
            fc2 = slim.fully_connected(fc1, mlp_units[1])
    return fc2

In [6]:
def userLSTM(lstm_input, hidden=128, reuse=False):
    cell = rnn.BasicLSTMCell(hidden, reuse=reuse)
    input_ = tf.unstack(lstm_input, axis=1)    
    outputs, _ = rnn.static_rnn(cell, input_, dtype=tf.float32, scope='juungLSTM')
    return outputs[-1]

In [7]:
def fc_pred(lstm_result, reuse=False):
    fc1 = slim.fully_connected(lstm_result, 1024, scope='fc-1')
    fc2 = slim.fully_connected(fc1, item_dim, scope='fc-2')
    return fc2

In [8]:
def juung(char_mtx, img_mtx, meta, hidden=128, reuse=False):
    review_result = review_CNN(char_mtx, reuse=False)
    img_result = image_CNN(img_mtx)
    concat_result = tf.concat([review_result, img_result, meta], axis=1)
    lstm_input = tf.reshape(concat_result, shape=[-1, time_window, 1138]) # reshape 확인하기
    lstm_result = userLSTM(lstm_input, hidden=hidden)
    pred = fc_pred(lstm_result)
    return pred

In [9]:
pred = juung(char_mtx, img_mtx, meta)

In [10]:
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y, logits=pred))

In [12]:
y_idx = tf.cast(tf.argmax(y, axis=1), dtype=tf.int32)

In [19]:
top_k = tf.reduce_sum(tf.cast(tf.nn.in_top_k(pred, y_idx, k=10, name='top-k'), dtype=tf.float32))