## **Q2:** 
neural network: 
test accuracy for data points 6601 till the end

### Step 1 - Configuring the Project

In [0]:
# create a new directory and navigate to it
!mkdir tensorflow-demo
!cd tensorflow-demo

In [0]:
# for setting up virtual environment
!apt-get install python3-venv

In [0]:
# set uo the virtual environment
!python3 -m venv tensorflow-demo
!source tensorflow-demo/bin/activate

### Step 2 - Importing the MNIST Dataset

In [0]:
# import Tensorflow library and MNIST dataset
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data

In [28]:
# store the image data in the variable mnist
mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)  # y labels are one-hot-encoded

Extracting MNIST_data/train-images-idx3-ubyte.gz
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz


In [0]:
# split dataset into training, validation and testing set
# check the number of examples in each set
n_train = mnist.train.num_examples  # 55,000
n_validation = mnist.validation.num_examples  # 5000
n_test = mnist.test.num_examples  # 10,000

### Step 3 - Defining the Neural Network Architecture

In [0]:
# store the number of units per layer in global variables
# this allows us to alter the network architecture in one place
# our designed architecture is 'deep neural network', because of multiple hidden layers
n_input = 784  # input layer (28x28 pixels)
n_hidden1 = 512  # 1st hidden layer
n_hidden2 = 256  # 2nd hidden layer
n_hidden3 = 128  # 3rd hidden layer
n_output = 10  # output layer (0-9 digits)

In [0]:
# define hyperparameters
learning_rate = 1e-4  # how much the parameters will adjust at each step of the learning process
n_iterations = 1000  # how many times we go through the training step
batch_size = 128   # how many training samples we use at each step
dropout = 0.5  # a threshold at which we eliminate some units at random. Help prevent overfitting

### Step 4 - Building the Tensorflow Graph

In [0]:
# define three tensors
X = tf.placeholder("float", [None, n_input])  # none represents any amount
Y = tf.placeholder("float", [None, n_output])
keep_prob = tf.placeholder(tf.float32)  # to control dropout rate, initialize it as a placeholder to remain the same for all training and testing steps

In [0]:
# define weights
weights = {
    'w1': tf.Variable(tf.truncated_normal([n_input, n_hidden1], stddev=0.1)),
    'w2': tf.Variable(tf.truncated_normal([n_hidden1, n_hidden2], stddev=0.1)),
    'w3': tf.Variable(tf.truncated_normal([n_hidden2, n_hidden3], stddev=0.1)),
    'out': tf.Variable(tf.truncated_normal([n_hidden3, n_output], stddev=0.1)),
}

In [0]:
# define biases
biases = {
    'b1': tf.Variable(tf.constant(0.1, shape=[n_hidden1])),
    'b2': tf.Variable(tf.constant(0.1, shape=[n_hidden2])),
    'b3': tf.Variable(tf.constant(0.1, shape=[n_hidden3])),
    'out': tf.Variable(tf.constant(0.1, shape=[n_output]))
}

In [0]:
# set up the layers by defining the operations
layer_1 = tf.add(tf.matmul(X, weights['w1']), biases['b1'])
layer_2 = tf.add(tf.matmul(layer_1, weights['w2']), biases['b2'])
layer_3 = tf.add(tf.matmul(layer_2, weights['w3']), biases['b3'])
layer_drop = tf.nn.dropout(layer_3, keep_prob)
output_layer = tf.matmul(layer_3, weights['out']) + biases['out']

In [0]:
# define loss function
# cross-entropy(log-loss): quantify the difference between two probability distributions
cross_entropy = tf.reduce_mean(
    tf.nn.softmax_cross_entropy_with_logits(
        labels=Y, logits=output_layer
        ))
# Adam optimizer: spped up gradient descent optimization
train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy)

### Step 5 - Training and Testing

In [0]:
# define method of evaluating accuracy
correct_pred = tf.equal(tf.argmax(output_layer, 1), tf.argmax(Y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

In [0]:
# initialize a session for running the graph
init = tf.global_variables_initializer()
sess = tf.Session()
sess.run(init)

In [39]:
# train on mini batches
for i in range(n_iterations):
    batch_x, batch_y = mnist.train.next_batch(batch_size)
    sess.run(train_step, feed_dict={
        X: batch_x, Y: batch_y, keep_prob: dropout
        })

    # print loss and accuracy (per minibatch)
    if i % 100 == 0:
        minibatch_loss, minibatch_accuracy = sess.run(
            [cross_entropy, accuracy],
            feed_dict={X: batch_x, Y: batch_y, keep_prob: 1.0}
            )
        print(
            "Iteration",
            str(i),
            "\t| Loss =",
            str(minibatch_loss),
            "\t| Accuracy =",
            str(minibatch_accuracy)
            )

Iteration 0 	| Loss = 3.1963573 	| Accuracy = 0.1640625
Iteration 100 	| Loss = 0.49080488 	| Accuracy = 0.84375
Iteration 200 	| Loss = 0.3035546 	| Accuracy = 0.875
Iteration 300 	| Loss = 0.4561987 	| Accuracy = 0.8515625
Iteration 400 	| Loss = 0.21551365 	| Accuracy = 0.9375
Iteration 500 	| Loss = 0.3444941 	| Accuracy = 0.90625
Iteration 600 	| Loss = 0.27827537 	| Accuracy = 0.90625
Iteration 700 	| Loss = 0.43239567 	| Accuracy = 0.8828125
Iteration 800 	| Loss = 0.45047224 	| Accuracy = 0.875
Iteration 900 	| Loss = 0.30275312 	| Accuracy = 0.9140625


In [40]:
# run the session on the test images
test_accuracy = sess.run(accuracy, feed_dict={X: mnist.test.images[6600:10000,:], Y: mnist.test.labels[6600:10000,:], keep_prob: 1.0})
print("\nAccuracy on test set:", test_accuracy)


Accuracy on test set: 0.94529414


In [41]:
# download a new sample test image
!curl -O https://raw.githubusercontent.com/do-community/tensorflow-digit-recognition/master/test_img.png

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
  0     0    0     0    0     0      0      0 --:--:-- --:--:-- --:--:--     0  0     0    0     0    0     0      0      0 --:--:-- --:--:-- --:--:--     0100   393  100   393    0     0   2551      0 --:--:-- --:--:-- --:--:--  2535


In [0]:
import numpy as np
from PIL import Image

In [0]:
# load the test image
img = np.invert(Image.open("test_img.png").convert('L')).ravel()

In [44]:
# test the image and print the outputted label
prediction = sess.run(tf.argmax(output_layer, 1), feed_dict={X: [img]})
print ("Prediction for test image:", np.squeeze(prediction))

Prediction for test image: 2


### **Q3:**
Without dropout for the final layer(layer 3)

In [45]:
# train on mini batches
for i in range(n_iterations):
    batch_x, batch_y = mnist.train.next_batch(batch_size)
    sess.run(train_step, feed_dict={
        X: batch_x, Y: batch_y
        })

    # print loss and accuracy (per minibatch)
    if i % 100 == 0:
        minibatch_loss, minibatch_accuracy = sess.run(
            [cross_entropy, accuracy],
            feed_dict={X: batch_x, Y: batch_y, keep_prob: 1.0}
            )
        print(
            "Iteration",
            str(i),
            "\t| Loss =",
            str(minibatch_loss),
            "\t| Accuracy =",
            str(minibatch_accuracy)
            )

Iteration 0 	| Loss = 0.20418227 	| Accuracy = 0.9453125
Iteration 100 	| Loss = 0.22042751 	| Accuracy = 0.9140625
Iteration 200 	| Loss = 0.20367971 	| Accuracy = 0.9375
Iteration 300 	| Loss = 0.20530073 	| Accuracy = 0.9453125
Iteration 400 	| Loss = 0.26766694 	| Accuracy = 0.90625
Iteration 500 	| Loss = 0.18392602 	| Accuracy = 0.9453125
Iteration 600 	| Loss = 0.27930096 	| Accuracy = 0.9140625
Iteration 700 	| Loss = 0.13998422 	| Accuracy = 0.96875
Iteration 800 	| Loss = 0.1969614 	| Accuracy = 0.9453125
Iteration 900 	| Loss = 0.4407578 	| Accuracy = 0.875


In [46]:
# test accuracy on the corresponding test set
test_accuracy = sess.run(accuracy, feed_dict={X: mnist.test.images[6600:10000,:], Y: mnist.test.labels[6600:10000,:], keep_prob: 1.0})
print("\nAccuracy on test set:", test_accuracy)


Accuracy on test set: 0.9429412


In [47]:
prediction = sess.run(tf.argmax(output_layer, 1), feed_dict={X: [img]})
print ("Prediction for test image:", np.squeeze(prediction))

Prediction for test image: 2


### **Q5:**
Multinomial logistic regression

In [0]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import r2_score

In [0]:
# create multinomial logistic regression
clf = LogisticRegression(random_state=0, multi_class='multinomial', solver='newton-cg')

In [55]:
# load data
mnist_multi = input_data.read_data_sets("MNIST_data/", one_hot=False)

Extracting MNIST_data/train-images-idx3-ubyte.gz
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz


In [58]:
# train model on the training set
model = clf.fit(mnist_multi.train.images, mnist_multi.train.labels)



In [0]:
# predict the test set
y_pred = model.predict(mnist_multi.test.images[6600:10000,:])

In [73]:
r2_score(mnist_multi.test.labels[6600:10000,], y_pred)

0.8974757292079572