In [1]:
import tensorflow as tf
import numpy as np
import json as js
import random

In [2]:
data = {}
with open('data.json') as file:
    data = js.load(file)
random.shuffle(data)
print(data[0])

data = [x for x in data if x['d_time'] >= 0]

{'user': 'user277', 'weekday': 4, 'time': 64, 'mention': 79, 'message': 677, 'mention_all': 6, 'reaction': 48, 'd_time': -55444.43258333333}


In [3]:
X, T = np.array([[x[y] for y in x if y !='user' and y != 'd_time'] for x in data]), np.array([[x['d_time']] for x in data])
print(X[0])
print(T)
T = np.array([[float(x[0] < 1), float(x[0] < 12.0), float(x[0] < 144.0)] for x in T])
print(T)

[   4   81  560 2597   17  165]
[[1.23745000e+00]
 [4.26578500e+00]
 [7.31966667e-02]
 ...
 [8.50144450e+01]
 [7.78833333e-03]
 [2.12266667e-01]]
[[0. 1. 1.]
 [0. 1. 1.]
 [1. 1. 1.]
 ...
 [0. 0. 1.]
 [1. 1. 1.]
 [1. 1. 1.]]


In [4]:
X_train, X_test = np.vsplit(X, [int(X.shape[0] * 0.9)])
T_train, T_test = np.vsplit(T, [int(T.shape[0] * 0.9)])

In [5]:
N_train = X_train.shape[0]
N_test = X_test.shape[0]
N_input = X_train.shape[1]
N_output = 3

In [6]:
def MLP(x):
    init = tf.variance_scaling_initializer()
    layer_1 = tf.layers.dense(x, N_input * 2, activation=tf.nn.relu, kernel_initializer=init)
    layer_2 = tf.layers.dense(layer_1, N_input * 2, activation=tf.nn.relu, kernel_initializer=init)
    out = tf.layers.dense(layer_2, N_output, kernel_initializer=init)
    return out

In [7]:
tf.reset_default_graph()
alpha = 0.0001
n_epoch = 500
batchsize = 256
x = tf.placeholder(tf.float32, [None, N_input])
t = tf.placeholder(tf.float32, [None, N_output])

y = MLP(x)

# 目的関数:softmax cross entropy
# 入力：labels->正解ラベル， logits：predictionの結果
# 出力：softmax cross entropyで計算された誤差
# xentropy = tf.nn.softmax_cross_entropy_with_logits(labels=t, logits=y)
# cost = tf.reduce_mean(xentropy)
cost = tf.reduce_mean(tf.square(y - t))

# SGD(Stochastic Gradient Descent : 確率的勾配降下法)で目的関数を最小化する
optimizer = tf.train.AdamOptimizer(alpha).minimize(cost)

# test用
accuracy = tf.reduce_mean(tf.cast(tf.equal(tf.greater(y, [0.5, 0.5, 0.5]), tf.greater(t, [0.5, 0.5, 0.5])), tf.float32))

train_summary_loss = tf.summary.scalar('train_loss', cost)
train_summary_accuracy = tf.summary.scalar('train_accuracy', accuracy)
test_summary_loss = tf.summary.scalar('test_loss', cost)
test_summary_accuracy = tf.summary.scalar('test_accuracy', accuracy)

# optimizer = tf.train.AdamOptimizer(alpha).minimize(cost)

# accuracy = tf.reduce_mean(tf.cast(tf.equal(tf.greater(y * t_std + t_mean, [0.5]), tf.greater(t * t_std + t_mean, [0.5])), tf.float32))

# train_summary_loss = tf.summary.scalar('train_loss', cost)
# train_summary_accuracy = tf.summary.scalar('train_accuracy', accuracy)
# test_summary_loss = tf.summary.scalar('test_loss', cost)
# test_summary_accuracy = tf.summary.scalar('test_accuracy', accuracy)

In [8]:
import os
model_path = './model/'
if not os.path.exists(model_path):
    os.mkdir(model_path)
    
import shutil
logs_path = './log/'
if os.path.exists(logs_path):
    shutil.rmtree(logs_path)
os.mkdir(logs_path)
saver = tf.train.Saver()
summary_writer = tf.summary.FileWriter(logs_path, graph=tf.get_default_graph())

In [9]:
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    max_accuracy = 0
    for epoch in range(n_epoch):
        print('epoch %d | ' % epoch, end='')
        sum_loss = 0
        perm = np.random.permutation(N_train)
        for i in range(0, N_train, batchsize):
            X_batch = X_train[perm[i:i+batchsize]]
            T_batch = T_train[perm[i:i+batchsize]]
            
            _, loss, train_loss, train_accuracy = sess.run([optimizer, cost, train_summary_loss, train_summary_accuracy], feed_dict={x:X_batch, t:T_batch})
            sum_loss += np.mean(loss) * X_batch.shape[0]
            summary_writer.add_summary(train_loss, epoch * N_train + len(perm[:i+batchsize]))
            summary_writer.add_summary(train_accuracy, epoch * N_train + len(perm[:i+batchsize]))
            
        loss = sum_loss / N_train
        accuracy_, test_loss, test_accuracy = sess.run([accuracy, test_summary_loss, test_summary_accuracy], feed_dict={x:X_test, t:T_test})
        summary_writer.add_summary(test_loss, epoch)
        summary_writer.add_summary(test_accuracy, epoch)
        print('Train loss %.4f | Test accuracy: %.4f' %(loss, accuracy_), end='')
        if max_accuracy < accuracy_:
            max_accuracy = accuracy_
            saver.save(sess, model_path)
            print(' #', end='')
        print('')

epoch 0 | Train loss 46474.5223 | Test accuracy: 0.6291 #
epoch 1 | Train loss 811.2584 | Test accuracy: 0.4779
epoch 2 | Train loss 351.0734 | Test accuracy: 0.4974
epoch 3 | Train loss 159.8666 | Test accuracy: 0.5426
epoch 4 | Train loss 48.3937 | Test accuracy: 0.5806
epoch 5 | Train loss 30.4016 | Test accuracy: 0.5977
epoch 6 | Train loss 20.0081 | Test accuracy: 0.5982
epoch 7 | Train loss 12.4879 | Test accuracy: 0.6080
epoch 8 | Train loss 7.4670 | Test accuracy: 0.6088
epoch 9 | Train loss 4.4203 | Test accuracy: 0.5910
epoch 10 | Train loss 2.6242 | Test accuracy: 0.5990
epoch 11 | Train loss 1.6074 | Test accuracy: 0.5941
epoch 12 | Train loss 1.1131 | Test accuracy: 0.6122
epoch 13 | Train loss 0.8570 | Test accuracy: 0.6165
epoch 14 | Train loss 0.6982 | Test accuracy: 0.6123
epoch 15 | Train loss 0.5792 | Test accuracy: 0.6270
epoch 16 | Train loss 0.4905 | Test accuracy: 0.6432 #
epoch 17 | Train loss 0.4232 | Test accuracy: 0.6677 #
epoch 18 | Train loss 0.3710 | Test 

epoch 153 | Train loss 0.1506 | Test accuracy: 0.7819
epoch 154 | Train loss 0.1497 | Test accuracy: 0.7674
epoch 155 | Train loss 0.1498 | Test accuracy: 0.7801
epoch 156 | Train loss 0.1502 | Test accuracy: 0.7798
epoch 157 | Train loss 0.1499 | Test accuracy: 0.7823
epoch 158 | Train loss 0.1507 | Test accuracy: 0.7813
epoch 159 | Train loss 0.1493 | Test accuracy: 0.7650
epoch 160 | Train loss 0.1510 | Test accuracy: 0.7819
epoch 161 | Train loss 0.1502 | Test accuracy: 0.7819
epoch 162 | Train loss 0.1494 | Test accuracy: 0.7807
epoch 163 | Train loss 0.1510 | Test accuracy: 0.7666
epoch 164 | Train loss 0.1501 | Test accuracy: 0.7526
epoch 165 | Train loss 0.1495 | Test accuracy: 0.7825
epoch 166 | Train loss 0.1496 | Test accuracy: 0.7641
epoch 167 | Train loss 0.1492 | Test accuracy: 0.7828 #
epoch 168 | Train loss 0.1511 | Test accuracy: 0.7766
epoch 169 | Train loss 0.1493 | Test accuracy: 0.7767
epoch 170 | Train loss 0.1498 | Test accuracy: 0.7683
epoch 171 | Train loss 0.1

epoch 305 | Train loss 0.1489 | Test accuracy: 0.7803
epoch 306 | Train loss 0.1490 | Test accuracy: 0.7818
epoch 307 | Train loss 0.1494 | Test accuracy: 0.7797
epoch 308 | Train loss 0.1494 | Test accuracy: 0.7800
epoch 309 | Train loss 0.1484 | Test accuracy: 0.7647
epoch 310 | Train loss 0.1499 | Test accuracy: 0.7745
epoch 311 | Train loss 0.1489 | Test accuracy: 0.7816
epoch 312 | Train loss 0.1491 | Test accuracy: 0.7789
epoch 313 | Train loss 0.1499 | Test accuracy: 0.7796
epoch 314 | Train loss 0.1494 | Test accuracy: 0.7807
epoch 315 | Train loss 0.1496 | Test accuracy: 0.7742
epoch 316 | Train loss 0.1499 | Test accuracy: 0.7808
epoch 317 | Train loss 0.1494 | Test accuracy: 0.7808
epoch 318 | Train loss 0.1498 | Test accuracy: 0.7822
epoch 319 | Train loss 0.1490 | Test accuracy: 0.7791
epoch 320 | Train loss 0.1510 | Test accuracy: 0.7780
epoch 321 | Train loss 0.1488 | Test accuracy: 0.7825
epoch 322 | Train loss 0.1486 | Test accuracy: 0.7797
epoch 323 | Train loss 0.150

epoch 457 | Train loss 0.1487 | Test accuracy: 0.7809
epoch 458 | Train loss 0.1496 | Test accuracy: 0.7609
epoch 459 | Train loss 0.1489 | Test accuracy: 0.7643
epoch 460 | Train loss 0.1504 | Test accuracy: 0.7639
epoch 461 | Train loss 0.1501 | Test accuracy: 0.7741
epoch 462 | Train loss 0.1486 | Test accuracy: 0.7552
epoch 463 | Train loss 0.1486 | Test accuracy: 0.7778
epoch 464 | Train loss 0.1496 | Test accuracy: 0.7806
epoch 465 | Train loss 0.1485 | Test accuracy: 0.7821
epoch 466 | Train loss 0.1497 | Test accuracy: 0.7783
epoch 467 | Train loss 0.1490 | Test accuracy: 0.7733
epoch 468 | Train loss 0.1487 | Test accuracy: 0.7825
epoch 469 | Train loss 0.1502 | Test accuracy: 0.7818
epoch 470 | Train loss 0.1487 | Test accuracy: 0.7819
epoch 471 | Train loss 0.1494 | Test accuracy: 0.7830
epoch 472 | Train loss 0.1493 | Test accuracy: 0.7817
epoch 473 | Train loss 0.1491 | Test accuracy: 0.7823
epoch 474 | Train loss 0.1496 | Test accuracy: 0.7823
epoch 475 | Train loss 0.148

\`tensorboard --logdir ./tf/log/`

"localhost:6006"。

## モデルの利用

1. 計算グラフの初期化
1. パスからモデルを取得
1. `x_sample`：予測したい入力
1. `pre_x`：正規化された計算グラフ用入力データ
1. `pre_y`：正規化された計算グラフ用出出力データ
1. 予測値の出力

In [14]:
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    saver.restore(sess, model_path)
    x_sample = [3, 31, 0, 0, 7.75, 1, 1, 0]
    pre_x = (np.array([x_sample]) - x_mean) / x_std 
    pre_y = tf.reduce_mean(y * t_std + t_mean)
    print('result: %.2f' %pre_y.eval(feed_dict={x: pre_x}, session=sess))

INFO:tensorflow:Restoring parameters from ./model/
result: 0.16
