In [1]:
import pandas as pd

import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report,confusion_matrix
from sklearn.preprocessing import OneHotEncoder
import tensorflow.compat.v1 as tf
tf.compat.v1.disable_v2_behavior()

Instructions for updating:
non-resource variables are not supported in the long term


In [3]:
train_data = pd.read_csv('MNIST/sign_mnist_train.csv')
test_data = pd.read_csv('MNIST/sign_mnist_test.csv')
print(" Raw shapes:", train_data.shape, test_data.shape)

## encoding label
encoder = OneHotEncoder() 

## normalizing train images to standard 0-1 pixel values.
x_train = (train_data.iloc[:, 1:]/255).values 
y_train = encoder.fit_transform(train_data['label'].values.reshape(-1,1)).toarray()

## normalizing test images to standard 0-1 pixel values.
x_test = (test_data.iloc[:, 1:]/255).values 
y_test = encoder.fit_transform(test_data['label'].values.reshape(-1,1)).toarray()

print("\n New train/test shapes:", x_train.shape, y_train.shape, x_test.shape, y_test.shape)
print("\n Shape 1 example :", x_train[1].shape, y_train[1].shape)

 Raw shapes: (27455, 785) (7172, 785)

 New train/test shapes: (27455, 784) (27455, 24) (7172, 784) (7172, 24)

 Shape 1 example : (784,) (24,)


### Initializing Weights And Units In Layers

In [4]:
input_width  = 28
input_height = 28
input_channel = 1
input_pixels  = 784

n_conv1 = 64
n_conv2 = 128
stride_conv1 = 1
stride_conv2 = 1
filter1_k = 5
filter2_k = 5
maxpool1_k = 2
maxpool2_k = 2

n_hidden = 1024
n_out = 24

input_size_to_hidden_layer = \
    ((input_width//(maxpool1_k*maxpool2_k)) *\
    (input_height//(maxpool1_k*maxpool2_k)) * n_conv2)

In [5]:
'''
weight  to convolutional layer1      5 x 5 x 1 x 64
weight  to convolutional layer2      5 x 5 x 64 x 128
weight  to hidden layer              6272 x 1024
weight  to output layer              1024 x 24
'''
weights = {
    'wc1': tf.Variable(tf.random_normal([filter1_k, filter1_k, input_channel, n_conv1])), 
    'wc2': tf.Variable(tf.random_normal([filter2_k, filter2_k, n_conv1, n_conv2])), 
    'wh' : tf.Variable(tf.random_normal([input_size_to_hidden_layer, n_hidden])),  
    'wo' : tf.Variable(tf.random_normal([n_hidden, n_out]))
}
'''
biases  to convolutional layer1     64
biases  to convolutional layer2     128
biases  to hidden layer             1024
biases  to output layer             24
'''
biases = {
    'bc1' : tf.Variable(tf.random_normal([n_conv1])), 
    'bc2' : tf.Variable(tf.random_normal([n_conv2])), 
    'bh' : tf.Variable(tf.random_normal([n_hidden])), 
    'bo' : tf.Variable(tf.random_normal([n_out])) 
}


## Forward Propagation

In [6]:
# функция для получения выходных данных из сверточного слоя
def conv(x, weights, bias, stride = 1):
    output = tf.nn.conv2d(x, weights, padding='SAME', strides=[1, stride, stride, 1])
    output = tf.nn.bias_add(output, bias)
    output = tf.nn.relu(output) # apply activation function
    return output

In [7]:
# функция, которая возвращает выходные данные объединяющего слоя (pooling layer),
# используемого для уменьшения размера изображения, так что мы должны тренировать меньше Весов и смещений
def maxpooling(x, k): # k = {2, 5}
    return tf.nn.max_pool(
           x, padding='SAME',
           ksize=[1, k, k, 1],
           strides=[1, k, k, 1])

In [8]:
def forward_propagation(x, weights, biases):
    x = tf.reshape(x, shape = [-1, input_width, input_height, input_channel]) #~> (1, 28, 28, 1)
    
    conv1 = conv(x, weights['wc1'], biases['bc1'], stride_conv1)
    conv1_pool = maxpooling(conv1, maxpool1_k)
    
    conv2 = conv(conv1_pool, weights['wc2'], biases['bc2'], stride_conv2)
    conv2_pool = maxpooling(conv2, maxpool2_k)
    
    hidden_layer_input = tf.reshape(conv2_pool, shape = [-1, input_size_to_hidden_layer])
    hidden_layer_output = tf.nn.relu(tf.add(tf.matmul(hidden_layer_input, weights['wh']), biases['bh']))
    
    output = tf.add(tf.matmul(hidden_layer_output, weights['wo']), biases['bo'])
    return output

## Defining Variables


In [9]:
X = tf.placeholder(tf.float32, [ None, input_pixels], name='x')
Y = tf.placeholder(tf.int32,   [ None, n_out],        name='y')
pred = forward_propagation(X, weights, biases)

In [10]:
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=pred, labels=Y))

## adam optimizer on the cost
optimizer = tf.train.AdamOptimizer(learning_rate=0.011)
optimize  = optimizer.minimize(cost)

In [11]:
## new session
sess = tf.Session()
sess.run(tf.global_variables_initializer())

### Batch Gradient Descent

In [12]:
batch_size=64
a = 0

for i in range(10):
    num_batches = int(len(x_train)/batch_size)
    total_cost = 0
    for j in range(num_batches):
        batch_x = x_train[a: a+batch_size]
        batch_y = y_train[a: a+batch_size]
        c, _ = sess.run([cost, optimize], feed_dict={X:batch_x, Y:batch_y})
        total_cost += c
        a += batch_size
    a = 0
    print(f'cost at {i+1} iteration: {total_cost:.4}')

cost at 1 iteration: 3.638e+06
cost at 2 iteration: 8.83e+03
cost at 3 iteration: 3.627e+03
cost at 4 iteration: 5.462e+03
cost at 5 iteration: 4.078e+03
cost at 6 iteration: 4.17e+03
cost at 7 iteration: 4.049e+03
cost at 8 iteration: 5.477e+03
cost at 9 iteration: 5.002e+03
cost at 10 iteration: 3.039e+03


In [2]:
# testing model on training data
predictions    = tf.argmax(pred, axis=1)
correct_labels = tf.argmax(Y, axis=1)
accuracy = tf.equal(predictions, correct_labels)
predictions, labels, accuracy = sess.run([predictions, correct_labels, accuracy], feed_dict={X:x_train, Y:y_train})
print(f'Accuracy: {accuracy.sum()/len(x_train):.2%}')

Accuracy: 98.87%


In [5]:
# testing model on testing data
predictions    = tf.argmax(pred, axis=1)
correct_labels = tf.argmax(Y, axis=1)
accuracy = tf.equal(predictions, correct_labels)
predictions, labels, accuracy = sess.run([predictions, correct_labels, accuracy], feed_dict={X:x_test, Y:y_test})
accuracy = accuracy.sum()/len(x_test)
print(f'Accuracy: {accuracy:.2%}')

Accuracy: 87.96%
