In [5]:
from make_dataset_helper import prepare_mnist, prepare_iris, prepare_digits
import tensorflow as tf
import numpy as np
import logging
import matplotlib.pyplot as plt
% matplotlib inline


def create_placeholder(X, y):
    X = tf.placeholder(name="X", dtype=tf.float32, shape=(X.shape[0], None))
    y = tf.placeholder(name="y", dtype=tf.float32, shape=(y.shape[0], None))
    return X, y


def initialize_parameter(n_dims):
    parameters = {}
    for i in range(1, len(n_dims)):
        parameters["W" + str(i)] = tf.get_variable(name="W"+str(i), shape=(n_dims[i], n_dims[i-1]), initializer=tf.contrib.layers.xavier_initializer())
        parameters["b" + str(i)] = tf.get_variable(name="b" + str(i), shape=(n_dims[i], 1), initializer=tf.zeros_initializer())
    return parameters


def forward_function(_logger, parameters, X, n_dims, activation):
    outputs = {}
    outputs["A0"] = X
    _logger.info("Activation function is {}".format(activation))
    for i in range(1, len(n_dims)):
        Z = tf.add(tf.matmul(parameters["W" + str(i)], outputs["A" + str(i - 1)]), parameters["b" + str(i)])
        
        if activation == "relu":
            A = tf.nn.relu(Z)
            
        elif activation == "sigmoid":
            A = tf.nn.sigmoid(Z)
            
        elif activation == "tanh":
            A = tf.nn.tanh(Z)
            
        outputs["Z" + str(i)] = Z
        if i ==len(n_dims) - 1:
            continue
        outputs["A" + str(i)] = A
    return Z
        

def compute_cost(ZL, y):
    logits = tf.transpose(ZL)
    labels = tf.transpose(y)
    
    cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=labels))
    return cost
        

def main(_logger, X_train, X_test, y_train, y_test,  n_labels, n_dims, learning_rate=0.01, iteration_num=100, cost_print=True, activation="relu"):
    _logger.info("initialize the computation graph")
    tf.reset_default_graph()   
        
    _logger.info("create X, y placeholder")
    X, y = create_placeholder(X_train, y_train)
    
    _logger.info("intialize_parameter")
    parameters = initialize_parameter(n_dims)
    
    _logger.info("forward propagation")
    ZL = forward_function(_logger, parameters, X, n_dims, activation)
    
    _logger.info("compute cost function")
    cost = compute_cost(ZL, y)
    
    _logger.info("optimize with optimizer")
    optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)
    
    # initialize all valuables
    init = tf.global_variables_initializer()
    
    with tf.Session() as sess:
        sess.run(init)
        _logger.info("Begin optimizing cost")
        costs = []
        
        for epoch in range(1, iteration_num+1):
            cost_val, _ = sess.run([cost, optimizer],feed_dict={X: X_train, y: y_train})
            costs.append(cost_val)
            if cost_print==True and epoch % 10 == 0:
                print("epoch" + str(epoch), cost_val)
        
        # plot the cost
        plt.plot(np.squeeze(costs))
        plt.ylabel('cost')
        plt.xlabel('iterations (per tens)')
        plt.title("Learning rate =" + str(learning_rate))
        plt.show()

        # lets save the parameters in a variable
        parameters = sess.run(parameters)
        _logger.info("Parameters have been trained!")

        # Calculate the correct predictions
        correct_prediction = tf.equal(tf.argmax(ZL), tf.argmax(y))

        # Calculate accuracy on the test set
        accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
        
        train_accuracy = accuracy.eval({X: X_train, y: y_train})
        test_accuracy = accuracy.eval({X: X_test, y: y_test})
        
        _logger.info("Train Accuracy: {}".format(train_accuracy))
        _logger.info("Test Accuracy: {}".format(test_accuracy))
        


if __name__ == '__main__':
    _logger = logging.getLogger(__name__)
    logging.basicConfig(
    level=logging.INFO, format="%(asctime)s\t%(levelname)s\t%(name)s\t%(message)s"
    )
    X_train, X_test, y_train, y_test, n_labels = prepare_mnist()
    n_dims = [X_train.shape[0], X_train.shape[0]//2, X_train.shape[0]//4, X_train.shape[0]//8,  30, n_labels]
    main(_logger, X_train, X_test, y_train, y_test,  n_labels, n_dims, learning_rate=0.01, iteration_num=1000)

2018-10-01 14:30:18,330	INFO	__main__	initialize the computation graph
2018-10-01 14:30:18,331	INFO	__main__	create X, y placeholder
2018-10-01 14:30:18,336	INFO	__main__	intialize_parameter
2018-10-01 14:30:18,455	INFO	__main__	forward propagation
2018-10-01 14:30:18,457	INFO	__main__	Activation function is relu
2018-10-01 14:30:18,477	INFO	__main__	compute cost function
2018-10-01 14:30:18,553	INFO	__main__	optimize with optimizer
2018-10-01 14:30:19,012	INFO	__main__	Begin optimizing cost


epoch10 2.3031802
epoch20 2.3032067
epoch30 2.3028352
epoch40 2.3022916
epoch50 2.3018956
epoch60 2.3015714
epoch70 2.3014028
epoch80 2.3013058
epoch90 2.3013031
epoch100 2.3013465
epoch110 2.3012109
epoch120 2.3012118
epoch130 2.3012018
epoch140 2.3012
epoch150 2.3011897
epoch160 2.30121
epoch170 2.3011758
epoch180 2.3012106
epoch190 2.3012125
epoch200 2.3011904
epoch210 2.3011937
epoch220 2.3011854
epoch230 2.3012354
epoch240 2.3011768
epoch250 2.3012078
epoch260 2.3011193
epoch270 2.301159
epoch280 2.3011582
epoch290 2.301149
epoch300 2.3011591
epoch310 2.3011663
epoch320 2.3011577
epoch330 2.3011656
epoch340 2.3011656
epoch350 2.3011653
epoch360 2.3012064
epoch370 2.3012068
epoch380 2.30121
epoch390 2.3011978
epoch400 2.3011982
epoch410 2.3011982
epoch420 2.3011985
epoch430 2.3011985
epoch440 2.3011985
epoch450 2.3011982
epoch460 2.301209
epoch470 2.301209
epoch480 2.301209
epoch490 2.301209


KeyboardInterrupt: 