In [1]:
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
import data_processing
%matplotlib inline

  from ._conv import register_converters as _register_converters


In [2]:
data = data_processing.load_data(download=False)
new_data = data_processing.convertToOnehot(data)

# prepare training data
new_data = new_data.values.astype(np.float32) # change to numpy array and float32 [pandas to numpy(float 32)]
np.random.shuffle(new_data)
sep = int(0.7*len(new_data))
sep2 = int(0.9*len(new_data))
train_data = new_data[:sep]
validate_data = new_data[sep:sep2]
test_data = new_data[sep2:]

In [3]:
# build network
tf_input = tf.placeholder(tf.float32, [None, 25], "input")
tfx = tf_input[:, :21]
tfy = tf_input[:, 21:]

tf_is_training = tf.placeholder(tf.bool, None)  # to control dropout when training and testing
l1 = tf.layers.dense(tfx, 128, tf.nn.relu, name="l1")
d1 = tf.layers.dropout(l1, rate=0.5, training=tf_is_training)   # drop out 50% of inputs
l2 = tf.layers.dense(l1, 128, tf.nn.relu, name="l2")
d2 = tf.layers.dropout(l2, rate=0.5, training=tf_is_training)   # drop out 50% of inputs

out = tf.layers.dense(l2, 4, name="l3")
prediction = tf.nn.softmax(out, name="pred")

# logits是没有经过softmax的prediction，tf会自动给你过一层softmax
loss = tf.losses.softmax_cross_entropy(onehot_labels=tfy, logits=out)

# argmax是为了找出tfy中每行数据对应的汽车状态和out中最有可能的汽车状态
# tf.metrics.accuracy(...)[0]和[1]分别是什么？ [https://www.tensorflow.org/api_docs/python/tf/metrics/accuracy]
# ==> accuracy: A Tensor representing the accuracy, the value of total divided by count.
#     update_op: An operation that increments the total and count variables appropriately and whose value matches accuracy.
accuracy = tf.metrics.accuracy(          # return (acc, update_op), and create 2 local variables
    labels=tf.argmax(tfy, axis=1), predictions=tf.argmax(out, axis=1),)[1]

opt = tf.train.GradientDescentOptimizer(learning_rate=0.1)
# opt = tf.train.AdamOptimizer(learning_rate=0.1) # 准确率反而降低了
train_op = opt.minimize(loss)

sess = tf.Session()
sess.run(tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()))

In [4]:
for t in range(4000):
    # training
    batch_index = np.random.randint(len(train_data), size=32)
    sess.run(train_op, {tf_input: train_data[batch_index]})

    if t % 50 == 0:
        # testing
        acc_, pred_, loss_ = sess.run([accuracy, prediction, loss], {tf_input: validate_data})
        acc2_ = sess.run([accuracy], {tf_input: test_data})
        print("Step: %i" % t,"| Accurate on validate set: %.6f" % acc_,"| Loss: %.2f" % loss_,)
        print("acc on test set:",acc2_[0])


Step: 0 | Accurate on validate set: 0.633911 | Loss: 1.25
acc on test set: 0.6416185
Step: 50 | Accurate on validate set: 0.726672 | Loss: 0.45
acc on test set: 0.74349713
Step: 100 | Accurate on validate set: 0.782449 | Loss: 0.31
acc on test set: 0.7909441
Step: 150 | Accurate on validate set: 0.804239 | Loss: 0.30
acc on test set: 0.80780345
Step: 200 | Accurate on validate set: 0.825373 | Loss: 0.23
acc on test set: 0.8289017
Step: 250 | Accurate on validate set: 0.842925 | Loss: 0.19
acc on test set: 0.84609824
Step: 300 | Accurate on validate set: 0.855491 | Loss: 0.18
acc on test set: 0.85776216
Step: 350 | Accurate on validate set: 0.867425 | Loss: 0.15
acc on test set: 0.8699422
Step: 400 | Accurate on validate set: 0.877952 | Loss: 0.13
acc on test set: 0.8805395
Step: 450 | Accurate on validate set: 0.887506 | Loss: 0.10
acc on test set: 0.8898844
Step: 500 | Accurate on validate set: 0.895013 | Loss: 0.11
acc on test set: 0.8966106
Step: 550 | Accurate on validate set: 0.90