In [61]:
import pandas as pd
import numpy as np
import tensorflow as tf
import keras
from sklearn.model_selection import StratifiedShuffleSplit
from sklearn.preprocessing import StandardScaler

import re
import logging
from tensorflow.contrib.learn.python.learn.datasets.mnist import read_data_sets
import visualizations as vis


## 1. Load data

In [4]:
print(tf.__version__)


1.12.0


In [2]:
mnist_train = pd.read_csv("all/train.csv")
mnist_test = pd.read_csv("all/test.csv")

In [3]:
len(mnist_test)/(len(mnist_test) + len(mnist_train))

0.4

In [4]:
split = StratifiedShuffleSplit(n_splits=1, test_size=0.4)
for train_index, validate_index in split.split(mnist_train, mnist_train['label']):
    train_data = mnist_train.loc[train_index]
    validate_data = mnist_train.loc[validate_index]

In [5]:
#mnist_train.head()
train_label = train_data['label']
train_data.drop('label', axis=1, inplace = True)
train_label.reset_index(drop=True, inplace=True)
train_data.reset_index(drop=True, inplace=True)

validate_label = validate_data['label']
validate_data.drop('label', axis=1, inplace = True)
validate_label.reset_index(drop=True, inplace=True)
validate_data.reset_index(drop=True, inplace=True)

In [6]:
all_train = np.concatenate([train_data, validate_data])
all_label = np.concatenate([train_label, validate_label])

In [7]:
train_data.shape, train_label.shape, all_train.shape, all_label.shape

((25200, 784), (25200,), (42000, 784), (42000,))

In [25]:
r = np.eye(10)[train_label[:100]]
r.shape

(100, 10)

In [None]:
mnist_test.head()
mnist_train.head()

## 1. one-hot sample

In [86]:
fashion_mnist = keras.datasets.fashion_mnist

(train_images, train_labels), (test_images, test_labels) = fashion_mnist.load_data()

train_images = train_images / 255.0

test_images = test_images / 255.0


model = tf.keras.models.Sequential([
  tf.keras.layers.Flatten(),
  tf.keras.layers.Dense(512, activation=tf.nn.relu),
  tf.keras.layers.Dropout(0.2),
  tf.keras.layers.Dense(10, activation=tf.nn.softmax)
])

model.compile(optimizer=tf.train.AdamOptimizer(), 
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

#model.fit(np.array(train_data), np.array(train_label), epochs=5)

In [87]:
model.fit(train_images, train_labels, epochs=5)

model.evaluate(test_images, test_labels)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


[0.34023452818393707, 0.8787]

In [85]:
model.fit(np.array(all_train), np.array(all_label), epochs=5)
test_loss, test_acc = model.evaluate(validate_data, validate_label)

print('Test accuracy:', test_acc)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Test accuracy: 0.09678571428571428


## 2. Model， 1-NN

In [71]:
learning_rate = 0.01
training_epochs = 25
batch_size = 100
display_step = 1

In [72]:
x = tf.placeholder(tf.float32, [None, 784])
y = tf.placeholder(tf.float32, [None, 10])

In [73]:
# WHY 10 DIMENSITON? -> 0-9 recognition
W = tf.Variable(tf.zeros([784, 10]))
b = tf.Variable(tf.zeros([10]))

In [75]:
pred = tf.nn.softmax(tf.matmul(x, W) + b)
#tf.truncated_normal()

In [76]:
# lazy evaluation
#cross_entropy = -tf.reduce_sum(y*tf.log(pred))
cost = tf.reduce_mean(-tf.reduce_sum(y*tf.log(pred), reduction_indices=1))

#xentropy = tf.nn.softmax_cross_entropy_with_logits(labels = y, logits = pred)
#loss = tf.reduce_mean(xentropy, name='loss')

In [77]:
optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost)

In [78]:
init = tf.global_variables_initializer()

In [81]:
with tf.Session() as sess:
    sess.run(init)
    
    for epoch in range(training_epochs):
        avg_cost = 0
        total_batch  = int(len(train_data)/batch_size)

        for iteration in range(total_batch):
            x_batch = np.array(all_train[iteration * batch_size : min((iteration+1) * batch_size, len(train_data))])
            y_batch = np.array(all_label[iteration * batch_size : min((iteration+1) * batch_size, len(train_label))])
            
            _, c = sess.run([optimizer, cost], feed_dict = {x: x_batch, y: np.eye(10)[y_batch]})
            #print(c)
            avg_cost += c / total_batch
        
        # Display logs per epoch step
        if (epoch+1) % display_step == 0:
            print("Epoch:", '%04d' % (epoch+1), "cost=", "{:.9f}".format(avg_cost))
    
    print("Optimization Finished!")

    # Test model
    correct_prediction = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))
    print(correct_prediction)
    
    # Calculate accuracy for 3000 examples
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
    print("Accuracy:", accuracy.eval({x: np.array(validate_data), y: np.eye(10)[validate_label]}))

Epoch: 0001 cost= nan
Epoch: 0002 cost= nan
Epoch: 0003 cost= nan
Epoch: 0004 cost= nan
Epoch: 0005 cost= nan
Epoch: 0006 cost= nan
Epoch: 0007 cost= nan
Epoch: 0008 cost= nan
Epoch: 0009 cost= nan
Epoch: 0010 cost= nan
Epoch: 0011 cost= nan
Epoch: 0012 cost= nan
Epoch: 0013 cost= nan
Epoch: 0014 cost= nan
Epoch: 0015 cost= nan
Epoch: 0016 cost= nan
Epoch: 0017 cost= nan
Epoch: 0018 cost= nan
Epoch: 0019 cost= nan
Epoch: 0020 cost= nan
Epoch: 0021 cost= nan
Epoch: 0022 cost= nan
Epoch: 0023 cost= nan
Epoch: 0024 cost= nan
Epoch: 0025 cost= nan
Optimization Finished!
Tensor("Equal_1:0", shape=(?,), dtype=bool)
Accuracy: 0.09839286


## Model2: 2-NN

In [None]:
def NN_model_on_train_set(n_neurons_1=300, n_neurons_2 = 100, learning_rate = 0.01, n_epochs = 30, batch_size = 50):
    # here we build a two layers NN model and test on validation set, you may improve it to a CV version
    # n_neurons_1 : number of neurons in the first layer
    # n_neurons_2  : number of neurons in the second layer
    # learning_rate : the learning rate of BGD
    # n_epochs : times of training the model
    # batch_size : since we adopted BGD, then we need to define the size of a size
    # initialize variables
    X = tf.placeholder(tf.float32, shape=(None, 28*28), name='X')
    y = tf.placeholder(tf.int64, shape=(None), name = 'y')

    # weights
    W1 = tf.Variable(tf.truncated_normal((28*28, n_neurons_1),stddev = 0.01), name = 'layer_1')
    W2 = tf.Variable(tf.truncated_normal((n_neurons_1, n_neurons_2),stddev = 0.01), name = 'layer_2')
    W3 = tf.Variable(tf.truncated_normal((n_neurons_2 , 10),stddev = 0.01), name = 'output_layer')

    # biases
    b1 = tf.Variable(tf.zeros([n_neurons_1]), name='b_1')
    b2 = tf.Variable(tf.zeros([n_neurons_2]), name='b_2')
    b3 = tf.Variable(tf.zeros([10]), name='b_3')

    # the output of each layer
    Z1 = tf.nn.relu(tf.matmul(X,W1) + b1)
    Z2 = tf.nn.relu(tf.matmul(Z1, W2) + b2)
    output = tf.matmul(Z2, W3) + b3

    # define loss function. Cross-entropy was adopted rather than MSE
    xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels = y, logits = output)
    loss = tf.reduce_mean(xentropy, name='loss')

    # optimizer
    optimizer = tf.train.GradientDescentOptimizer(learning_rate)
    training_op = optimizer.minimize(loss)

    # define accuracy
    correct = tf.nn.in_top_k(output, y, 1)
    accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))

    # run everything
    init = tf.global_variables_initializer()
    with tf.Session() as sess:
        init.run()
        for epoch in range(n_epochs):
            for iteration in range(len(train_data) // batch_size):
                # 因为要batchSize=50个50个的取，取到末尾时可能不够，所以用个if判定一下
                if (iteration + 1) * batch_size <= len(train_data):
                    X_batch = np.array(train_data[iteration * batch_size : iteration * batch_size + batch_size])
                    y_batch = np.array(train_label[iteration * batch_size : iteration * batch_size + batch_size])
                else:
                    X_batch = np.array(train_data[iteration * batch_size : ])
                    y_batch = np.array(train_label[iteration * batch_size : ])
                sess.run(training_op, feed_dict={X:X_batch, y:y_batch})
            # train error
            acc_train = accuracy.eval(feed_dict={X:X_batch, y:y_batch})
            # test error
            acc_test = accuracy.eval(feed_dict={X:np.array(validate_data),
                                               y:np.array(validate_label)})
            print(epoch, 'Train accuracy:', acc_train, 'Test accuracy:', acc_test)

In [10]:
def NN_model_to_predict(n_neurons_1=300, n_neurons_2 = 100, learning_rate = 0.01, n_epochs = 30, batch_size = 50):
    # here we build a two layers NN model and test on validation set, you may improve it to a CV version
    # n_neurons_1 : number of neurons in the first layer
    # n_neurons_2  : number of neurons in the second layer
    # learning_rate : the learning rate of BGD
    # n_epochs : times of training the model
    # batch_size : since we adopted BGD, then we need to define the size of a size
    # initialize variables
    X = tf.placeholder(tf.float32, shape=(None, 28*28), name='X')
    y = tf.placeholder(tf.int64, shape=(None), name = 'y')

    # weights
    W1 = tf.Variable(tf.truncated_normal((28*28, n_neurons_1),stddev = 0.01), name = 'layer_1')
    W2 = tf.Variable(tf.truncated_normal((n_neurons_1, n_neurons_2),stddev = 0.01), name = 'layer_2')
    W3 = tf.Variable(tf.truncated_normal((n_neurons_2 , 10),stddev = 0.01), name = 'output_layer')

    # biases
    b1 = tf.Variable(tf.zeros([n_neurons_1]), name='b_1')
    b2 = tf.Variable(tf.zeros([n_neurons_2]), name='b_2')
    b3 = tf.Variable(tf.zeros([10]), name='b_3')

    # the output of each layer
    Z1 = tf.nn.relu(tf.matmul(X,W1) + b1)
    Z2 = tf.nn.relu(tf.matmul(Z1, W2) + b2)
    output = tf.matmul(Z2, W3) + b3

    # define loss function. Cross-entropy was adopted rather than MSE
    xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels = y, logits = output)
    loss = tf.reduce_mean(xentropy, name='loss')

    # optimizer
    optimizer = tf.train.GradientDescentOptimizer(learning_rate)
    training_op = optimizer.minimize(loss)

    # define accuracy
    correct = tf.nn.in_top_k(output, y, 1)
    accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))

    init = tf.global_variables_initializer()
    with tf.Session() as sess:
        init.run()
        for epoch in range(n_epochs):
            for iteration in range(len(train_data) // batch_size):
                X_batch = np.array(all_train[iteration * batch_size : min((iteration+1) * batch_size, len(train_data))])
                y_batch = np.array(all_label[iteration * batch_size : min((iteration+1) * batch_size, len(train_label))])
                '''if (iteration + 1) * batch_size <= len(train_data):
                    X_batch = np.array(train_data[iteration * batch_size : iteration * batch_size + batch_size])
                    y_batch = np.array(train_label[iteration * batch_size : iteration * batch_size + batch_size])
                else:
                    X_batch = np.array(train_data[iteration * batch_size : ])
                    y_batch = np.array(train_label[iteration * batch_size : ])'''
                sess.run(training_op, feed_dict={X:X_batch, y:y_batch})
            acc_train = accuracy.eval(feed_dict={X:X_batch, y:y_batch})
            # test error
            acc_test = accuracy.eval(feed_dict={X:np.array(validate_data),
                                               y:np.array(validate_label)})
            print(epoch, 'Train accuracy:', acc_train, 'Test accuracy:', acc_test)
        
        predict_output = sess.run(output,feed_dict={X:np.array(mnist_test)})
        return np.argmax(predict_output, axis= 1)

In [11]:
prediction = NN_model_to_predict(n_neurons_1=300, n_neurons_2 = 100, learning_rate = 0.01, n_epochs = 50, batch_size = 50)


0 Train accuracy: 0.98 Test accuracy: 0.945
1 Train accuracy: 1.0 Test accuracy: 0.95839286
2 Train accuracy: 1.0 Test accuracy: 0.96369046
3 Train accuracy: 1.0 Test accuracy: 0.9649405
4 Train accuracy: 1.0 Test accuracy: 0.96416664
5 Train accuracy: 1.0 Test accuracy: 0.9670238
6 Train accuracy: 1.0 Test accuracy: 0.9652381
7 Train accuracy: 1.0 Test accuracy: 0.9704762
8 Train accuracy: 1.0 Test accuracy: 0.9681548
9 Train accuracy: 1.0 Test accuracy: 0.96410716
10 Train accuracy: 1.0 Test accuracy: 0.97059524
11 Train accuracy: 1.0 Test accuracy: 0.97125
12 Train accuracy: 1.0 Test accuracy: 0.97369045
13 Train accuracy: 1.0 Test accuracy: 0.9751786
14 Train accuracy: 1.0 Test accuracy: 0.9755357
15 Train accuracy: 1.0 Test accuracy: 0.9751786
16 Train accuracy: 1.0 Test accuracy: 0.9750595
17 Train accuracy: 1.0 Test accuracy: 0.9751786
18 Train accuracy: 1.0 Test accuracy: 0.9753571
19 Train accuracy: 1.0 Test accuracy: 0.9753571
20 Train accuracy: 1.0 Test accuracy: 0.9755357
2

In [70]:
df = pd.DataFrame({'ImageId': [i for i in range(1,len(prediction)+1)],
                  'Label': prediction})
df.to_csv('./my_prediction.csv', index=None)

## Model 3: 5-NN

In [23]:
def five_NN_model_to_predict(learning_rate = 0.001, n_epochs = 2000, batch_size = 100):
    # here we build a two layers NN model and test on validation set, you may improve it to a CV version
    # n_neurons_1 : number of neurons in the first layer
    # n_neurons_2  : number of neurons in the second layer
    # learning_rate : the learning rate of BGD
    # n_epochs : times of training the model
    # batch_size : since we adopted BGD, then we need to define the size of a size
    # initialize variables
    X = tf.placeholder(tf.float32, shape=(None, 28*28), name='X')
    y = tf.placeholder(tf.int64, shape=(None), name = 'y')

    
    # layer size
    
    L1 = 300
    L2 = 120
    L3 = 60
    L4 = 30
    L5 = 10
    
    # weights
    W1 = tf.Variable(tf.truncated_normal((28*28, L1),stddev = 0.01), name = 'layer_1')
    W2 = tf.Variable(tf.truncated_normal((L1, L2),stddev = 0.01), name = 'layer_2')
    W3 = tf.Variable(tf.truncated_normal((L2, L3),stddev = 0.01), name = 'layer_3')
    W4 = tf.Variable(tf.truncated_normal((L3, L4),stddev = 0.01), name = 'layer_4')
    W5 = tf.Variable(tf.truncated_normal((L4 , L5),stddev = 0.01), name = 'output_layer')

    # biases
    b1 = tf.Variable(tf.zeros([L1]), name='b_1')
    b2 = tf.Variable(tf.zeros([L2]), name='b_2')
    b3 = tf.Variable(tf.zeros([L3]), name='b_3')
    b4 = tf.Variable(tf.zeros([L4]), name='b_4')
    b5 = tf.Variable(tf.zeros([L5]), name='b_3')

    # the output of each layer
    Z1 = tf.nn.relu(tf.matmul(X,W1) + b1)
    Z2 = tf.nn.relu(tf.matmul(Z1, W2) + b2)
    Z3 = tf.nn.relu(tf.matmul(Z2, W3) + b3)
    Z4 = tf.nn.relu(tf.matmul(Z3, W4) + b4)
    
    output = tf.matmul(Z4, W5) + b5

    # define loss function. Cross-entropy was adopted rather than MSE
    xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels = y, logits = output)
    loss = tf.reduce_mean(xentropy, name='loss')*100

    # optimizer
    optimizer = tf.train.GradientDescentOptimizer(learning_rate)
    #training_op = optimizer.minimize(loss)
    training_op = tf.train.AdamOptimizer(learning_rate).minimize(loss)
    
    # define accuracy
    correct = tf.nn.in_top_k(output, y, 1)
    accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))

    init = tf.global_variables_initializer()
    with tf.Session() as sess:
        init.run()
        for epoch in range(n_epochs):
            for iteration in range(len(train_data) // batch_size):
                X_batch = np.array(all_train[iteration * batch_size : min((iteration+1) * batch_size, len(train_data))])
                y_batch = np.array(all_label[iteration * batch_size : min((iteration+1) * batch_size, len(train_label))])
                '''if (iteration + 1) * batch_size <= len(train_data):
                    X_batch = np.array(train_data[iteration * batch_size : iteration * batch_size + batch_size])
                    y_batch = np.array(train_label[iteration * batch_size : iteration * batch_size + batch_size])
                else:
                    X_batch = np.array(train_data[iteration * batch_size : ])
                    y_batch = np.array(train_label[iteration * batch_size : ])'''
                sess.run(training_op, feed_dict={X:X_batch, y:y_batch})
            acc_train = accuracy.eval(feed_dict={X:X_batch, y:y_batch})
            # test error
            acc_test = accuracy.eval(feed_dict={X:np.array(validate_data),
                                               y:np.array(validate_label)})
            print(epoch, 'Train accuracy:', acc_train, 'Test accuracy:', acc_test)
        
        predict_output = sess.run(output,feed_dict={X:np.array(mnist_test)})
        return np.argmax(predict_output, axis= 1)

In [None]:
prediction = five_NN_model_to_predict()
df = pd.DataFrame({'ImageId': [i for i in range(1,len(prediction)+1)],
                  'Label': prediction})
df.to_csv('./my_prediction.csv', index=None)

## Model 4. CNN

In [64]:
def cnn_model(learning_rate = 0.001, n_epochs = 50, batch_size = 100):
    # what args do we need ? - -|
    #NUM_ITERS=5000
    #DISPLAY_STEP=100
    #BATCH=100
    
    #
    # input layer               - X[batch, 28, 28]
    # 1 conv. layer             - W1[5, 5, 1, C1] + b1[C1]   pad = 2?
    #                             Y1[batch, 28, 28, C1]
    # 2 conv. layer             - W2[3, 3, C1, C2] + b2[C2]
    # 2.1 max pooling filter 2x2, stride 2 - down sample the input (rescale input by 2) 28x28-> 14x14
    #                             Y2[batch, 14,14,C2] 
    # 3 conv. layer             - W3[3, 3, C2, C3]  + b3[C3]
    # 3.1 max pooling filter 2x2, stride 2 - down sample the input (rescale input by 2) 14x14-> 7x7
    #                             Y3[batch, 7, 7, C3] 
    # 4 fully connecteed layer  - W4[7*7*C3, FC4]   + b4[FC4]
    #                             Y4[batch, FC4] 
    # 5 output layer            - W5[FC4, 10]   + b5[10]
    # One-hot encoded labels      Y5[batch, 10]
    
    # input 
    
    X = tf.placeholder(tf.float32, shape=(None, 28, 28, 1), name='X')
    y = tf.placeholder(tf.int64, shape=(None), name = 'y')
    
    # Probability of keeping a node during dropout = 1.0 at test time (no dropout) and 0.75 at training time
    pkeep = tf.placeholder(tf.float32)
    
    # layer size, for cnn is conv depth (the number of detector)
    
    C1 = 4
    C2 = 8
    C3 = 16
    
    FC4 = 256  # fully connected layer
    
    stride = 1
    k = 2
    # conv 1
    W1 = tf.Variable(tf.truncated_normal((5,5, 1, C1),stddev = 0.01), name = 'conv_1')
    b1 = tf.Variable(tf.truncated_normal([C1], stddev = 0.01))
    
    Y1 = tf.nn.relu(tf.nn.conv2d(X, W1, strides=[1, stride, stride, 1], padding="SAME") + b1)   
    # conv 2 + maxpooling
    W2 = tf.Variable(tf.truncated_normal((3,3, C1, C2),stddev = 0.01), name = 'conv_2')
    b2 = tf.Variable(tf.truncated_normal([C2], stddev = 0.01))
    
    Y2 = tf.nn.relu(tf.nn.conv2d(Y1, W2, strides=[1, stride, stride, 1], padding="SAME") + b2)   
    Y2 = tf.nn.max_pool(Y2, ksize=[1, k, k, 1], strides=[1, k, k, 1], padding="SAME")
    
    # conv3 + maxpooling
    W3 = tf.Variable(tf.truncated_normal((3,3, C2, C3),stddev = 0.01), name = 'conv_3')
    b3 = tf.Variable(tf.truncated_normal([C3], stddev = 0.01))
    
    Y3 = tf.nn.relu(tf.nn.conv2d(Y2, W3, strides=[1, stride, stride, 1], padding="SAME") + b3)   
    Y3 = tf.nn.max_pool(Y3, ksize=[1, k, k, 1], strides=[1, k, k, 1], padding="SAME")
    
    # full connected 
    YY = tf.reshape(Y3, shape=[-1, 7 * 7 * C3])
    
    W4 = tf.Variable(tf.truncated_normal([7*7*C3, FC4], stddev=0.01, name = "full_connected"))
    b4 = tf.Variable(tf.truncated_normal([FC4], stddev=0.01))
    
    Y4 = tf.nn.relu(tf.matmul(YY, W4) + b4)
    
    # calculate softmax mapping to 10 classification
    W5 = tf.Variable(tf.truncated_normal([FC4, 10], stddev=0.01))
    b5 = tf.Variable(tf.truncated_normal([10], stddev=0.01))
    
    Y5 = tf.nn.relu(tf.matmul(Y4, W5) + b5)
    
    Y = tf.nn.softmax(Y5)
    
    # loss function
    xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=Y, labels=y)
    loss = tf.reduce_mean(xentropy) * 100
    
    # optimizer
    optimizer = tf.train.GradientDescentOptimizer(learning_rate)
    #training_op = optimizer.minimize(loss)
    training_op = tf.train.AdamOptimizer(learning_rate).minimize(loss)
    
    # accuracy
    correct = tf.nn.in_top_k(Y, y, 1)
    accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))
    
    #correct_prediction = tf.equal(tf.argmax(Y, 1), tf.argmax(y, 1))
    #accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
    
    # matplotlib visualization
    allweights = tf.concat([tf.reshape(W1, [-1]), tf.reshape(W2, [-1]), tf.reshape(W3, [-1]), tf.reshape(W4, [-1]), tf.reshape(W5, [-1])], 0)
    allbiases  = tf.concat([tf.reshape(b1, [-1]), tf.reshape(b2, [-1]), tf.reshape(b3, [-1]), tf.reshape(b4, [-1]), tf.reshape(b5, [-1])], 0)

    
    # init 
    init = tf.global_variables_initializer()

    train_losses = list()
    train_acc = list()
    test_losses = list()
    test_acc = list()

    saver = tf.train.Saver()
    
    # run session
    with tf.Session() as sess:
        sess.run(init)

        for epoch in range(n_epochs):
            for iteration in range(len(train_data) // batch_size):
                X_batch = np.array(all_train[iteration * batch_size : min((iteration+1) * batch_size, len(train_data))])
                y_batch = np.array(all_label[iteration * batch_size : min((iteration+1) * batch_size, len(train_label))])
                
                sess.run(training_op, feed_dict={X:np.reshape(X_batch, (len(X_batch), 28, 28, 1)), y:y_batch, pkeep: 0.85})
            
            acc_trn, loss_trn, w, b = sess.run([accuracy, loss, allweights, allbiases], feed_dict={X:np.reshape(X_batch, (len(X_batch), 28, 28, 1)), y:y_batch, pkeep: 1.0})
            
            acc_tst, loss_tst = sess.run([accuracy, loss], feed_dict={X:np.reshape(np.array(validate_data), (len(validate_data), 28, 28, 1)),
                                               y:np.array(validate_label), pkeep: 1.0})
            
            print("#{} Trn acc={} , Trn loss={} Tst acc={} , Tst loss={}".format(epoch,acc_trn,loss_trn,acc_tst,loss_tst))

            train_losses.append(loss_trn)
            train_acc.append(acc_trn)
            test_losses.append(loss_tst)
            test_acc.append(acc_tst)

    title = "MNIST_3.0 5 layers 3 conv"
    vis.losses_accuracies_plots(train_losses,train_acc,test_losses, test_acc,title,n_epochs)
       
    predict_output = sess.run(Y,feed_dict={X:X:np.reshape(np.array(mnist_test), (len(mnist_test), 28, 28, 1))})
        
    return np.argmax(predict_output, axis= 1)
    

In [65]:
prediction = cnn_model( n_epochs = 30)

#0 Trn acc=0.8500000238418579 , Trn loss=198.92689514160156 Tst acc=0.8520833253860474 , Tst loss=200.074462890625
#1 Trn acc=0.949999988079071 , Trn loss=198.0220947265625 Tst acc=0.928511917591095 , Tst loss=199.3626708984375
#2 Trn acc=0.9800000190734863 , Trn loss=197.552978515625 Tst acc=0.9424999952316284 , Tst loss=199.33871459960938
#3 Trn acc=0.9800000190734863 , Trn loss=197.6280975341797 Tst acc=0.9701785445213318 , Tst loss=199.19215393066406
#4 Trn acc=0.9900000095367432 , Trn loss=197.49407958984375 Tst acc=0.9549404978752136 , Tst loss=198.53636169433594
#5 Trn acc=0.9700000286102295 , Trn loss=197.83547973632812 Tst acc=0.9612500071525574 , Tst loss=198.80552673339844


KeyboardInterrupt: 