In [4]:
import tensorflow as tf
import numpy as np
import pandas as pd
import time
from datetime import datetime
from sklearn.datasets import load_iris
from sklearn.linear_model import Perceptron
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import MLPClassifier

# load iris
iris = load_iris()
X = iris["data"][:,[2,3]]
y = (iris["target"] == 0).astype(np.int)

# train perceptron
per_clf = Perceptron(random_state=42)
per_clf.fit(X, y)
print(accuracy_score(y, per_clf.predict(X)))

# load mnist
scaler = StandardScaler()
df_train = pd.read_csv("../datasets/mnist_train.csv")
X_train = scaler.fit_transform(df_train.iloc[:].drop("5", axis=1).values)
y_train = df_train.iloc[:]["5"].values.reshape(-1,1)
df_test = pd.read_csv("../datasets/mnist_test.csv")
X_test = scaler.transform(df_test.drop("7", axis=1).values)
y_test = df_test["7"].values.reshape(-1,1)
"""
# train MLP with tensorflow (high-level API)
start_time = time.time()
feature_cols = tf.contrib.learn.infer_real_valued_columns_from_input(X_train)
dnn_clf = tf.contrib.learn.DNNClassifier(hidden_units=[300,100], 
                                         n_classes=10, feature_columns=feature_cols)
dnn_clf = tf.contrib.learn.SKCompat(dnn_clf)
dnn_clf.fit(X_train, y_train, batch_size=50, steps=40000)
y_pred = dnn_clf.predict(X_test)
print("--- %s seconds ---" % (time.time() - start_time))
print(accuracy_score(y_test, y_pred["classes"])) # 0.9795 within 122.53914904594421 seconds

# train MLP with sklearn
start_time = time.time()
dnn_clf = MLPClassifier(hidden_layer_sizes=[300,100], batch_size=50, max_iter=40000)
dnn_clf.fit(X_train, y_train)
y_pred = dnn_clf.predict(X_test)
print("--- %s seconds ---" % (time.time() - start_time))
print(accuracy_score(y_test, y_pred)) # 0.9738 within 86.09499192237854 seconds
"""
# train MLP with tensorflow (low-level API)
"""
with tf.variable_scope("param", reuse=tf.AUTO_REUSE):
    n_inputs = tf.get_variable("n_inputs", shape=(), 
                                 initializer=tf.constant_initializer(28*28))
    n_hidden1 = tf.get_variable("n_hidden1", shape=(), 
                               initializer=tf.constant_initializer(300))
    n_hidden2 = tf.get_variable("n_hidden2", shape=(), 
                               initializer=tf.constant_initializer(100))
    n_outputs = tf.get_variable("n_outputs", shape=(), 
                               initializer=tf.constant_initializer(10))
"""
# my MLP: initialize
tf.reset_default_graph()
n_inputs = 28*28
n_hidden1 = 300
n_hidden2 = 100
n_outputs = 10
learning_rate = 0.01
n_epochs = 10
batch_size = 50
X = tf.placeholder(tf.float32, shape=(None, n_inputs), name="X")
y = tf.placeholder(tf.float32, shape=(None, 1), name="y")

# my MLP: neuron layer
def neuron_layer(X, n_neurons, name, activation=None):
    with tf.name_scope(name):
        #with tf.variable_scope("param", reuse=True):
        n_inputs = int(X.get_shape()[1])
        stddev = 2. / np.sqrt(n_inputs + n_neurons)
        init = tf.truncated_normal((n_inputs, n_neurons), stddev=stddev)
        W = tf.Variable(init, name="kernel")
        b = tf.Variable(tf.zeros([n_neurons]), name="bias")
        Z = tf.matmul(X, W) + b
        if activation is not None:
            return activation(Z)
        else:
            return Z

# my MLP: DNN
with tf.name_scope("dnn"):
    """
    with tf.variable_scope("param", reuse=True):
        n_hidden1 = tf.get_variable("n_hidden1")
        n_hidden2 = tf.get_variable("n_hidden2")
        n_outputs = tf.get_variable("n_outputs")
    """
    hidden1 = neuron_layer(X, n_hidden1, name="hidden1", activation=tf.nn.relu)
    hidden2 = neuron_layer(hidden1, n_hidden2, name="hidden2", activation=tf.nn.relu)
    logits = neuron_layer(hidden2, n_outputs, name="outputs")
    
# my MLP: loss function
with tf.name_scope("loss"):
    xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(
        labels=tf.cast(tf.reshape(y,[-1]), tf.int32), logits=logits)
    loss = tf.reduce_mean(xentropy, name="loss")
    
# my MLP: train
with tf.name_scope("train"):
    optimizer = tf.train.GradientDescentOptimizer(learning_rate)
    training_op = optimizer.minimize(loss)
    
# my MLP: evaluation
with tf.name_scope("eval"):
    correct = tf.nn.in_top_k(logits, tf.cast(tf.reshape(y,[-1]), tf.int32), 1)
    accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))

# my MLP: save graph
now = datetime.utcnow().strftime("%Y%m%d%H%M%S")
root_logdir = "tf_logs"
logdir = "{}/run-{}/".format(root_logdir, now)
file_writer = tf.summary.FileWriter(logdir, graph=tf.get_default_graph())
accuracy_summary = tf.summary.scalar("accuracy", accuracy)
    
# my MLP: run session
init = tf.global_variables_initializer()
with tf.Session() as sess:
    init.run()
    for epoch in range(n_epochs):
        shuffled_index = np.random.permutation(len(X_train))
        X_train, y_train = X_train[shuffled_index], y_train[shuffled_index]
        for i in range(len(X_train) // batch_size):
            X_batch = X_train[batch_size * i : batch_size * (i+1)]
            y_batch = y_train[batch_size * i : batch_size * (i+1)]
            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
        acc_train = accuracy.eval(feed_dict={X: X_batch, y: y_batch})
        acc_test = accuracy.eval(feed_dict={X: X_test, y: y_test})
        file_writer.add_summary(accuracy_summary.eval(feed_dict={X: X_test, y: y_test}), epoch)
        print(epoch, "Train accuracy:", acc_train, "Test accuracy:", acc_test)
    Z = logits.eval(feed_dict={X: X_train[100:120]})
    y_pred = np.argmax(Z, axis=1)
    print(y_pred, y_train[100:120].ravel())
file_writer.flush()
        



1.0




0 Train accuracy: 0.96 Test accuracy: 0.928293
1 Train accuracy: 0.92 Test accuracy: 0.944995
2 Train accuracy: 0.96 Test accuracy: 0.952795
3 Train accuracy: 0.94 Test accuracy: 0.956496
4 Train accuracy: 0.98 Test accuracy: 0.959796
5 Train accuracy: 0.98 Test accuracy: 0.962696
6 Train accuracy: 1.0 Test accuracy: 0.964696
7 Train accuracy: 0.98 Test accuracy: 0.964997
8 Train accuracy: 0.98 Test accuracy: 0.965997
9 Train accuracy: 1.0 Test accuracy: 0.967397
[3 9 0 9 2 9 8 2 4 7 1 2 9 2 0 6 1 5 4 6] [[3]
 [9]
 [0]
 [9]
 [2]
 [9]
 [8]
 [2]
 [4]
 [7]
 [1]
 [2]
 [9]
 [2]
 [0]
 [6]
 [1]
 [5]
 [4]
 [6]]
