# Classification on MNIST using Stacked Autoencoders 

## Importing necessary libraries

In [None]:
import keras
from keras.datasets import mnist
from keras.models import Sequential,Model
from keras.layers import Dense,Input
from keras.utils import to_categorical

import numpy as np


## Loading Dataset and preprocessing

In [None]:
# Loading data
(X_train,y_train), (X_test,y_test) = mnist.load_data()

# Flattening the data
X_train = np.reshape(X_train,(60000,784)).astype('float32')
X_test = np.reshape(X_test,(10000,784)).astype('float32')

# Converting the target data to one - hot encoding
y_train = to_categorical(y_train,num_classes=10)
y_test = to_categorical(y_test,num_classes=10)

# Normalizing the data
X_train = X_train/255
X_test = X_test/255

## Autoencoder 1

784 -> 100 -> 784

**Declaration and training**

In [None]:
input_main = Input(shape=(784,))
h1 = Dense(100,activation='sigmoid')(input_main)
o1 = Dense(784,activation='sigmoid')(h1)

ae1 = Model(inputs=input_main,outputs=o1)

ae1.summary()

Model: "model_6"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_7 (InputLayer)         [(None, 784)]             0         
_________________________________________________________________
dense_22 (Dense)             (None, 100)               78500     
_________________________________________________________________
dense_23 (Dense)             (None, 784)               79184     
Total params: 157,684
Trainable params: 157,684
Non-trainable params: 0
_________________________________________________________________


In [None]:
ae1.compile(optimizer='adam',loss='mse',metrics=['accuracy'])
ae1.fit(X_train,X_train,epochs=10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x7fe648320610>

**Taking output of hidden layer as input to Autoencoder 2**

In [None]:
ae1.layers[1].output

<KerasTensor: shape=(None, 100) dtype=float32 (created by layer 'dense_22')>

In [None]:
trimmed_ae1 = Model(inputs=input_main,outputs=ae1.layers[1].output)

X_train_ae2 = trimmed_ae1.predict(X_train)
X_test_ae2 = trimmed_ae1.predict(X_test)

## Autoencoder 2

100 -> 50 -> 100

**Declaration and training**

In [None]:
input_ae2 = Input(shape=(100,))
h2 = Dense(50,activation='sigmoid')(input_ae2)
o2 = Dense(100,activation='sigmoid')(h2)

ae2 = Model(inputs=input_ae2,outputs=o2)

ae2.summary()

Model: "model_8"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_8 (InputLayer)         [(None, 100)]             0         
_________________________________________________________________
dense_24 (Dense)             (None, 50)                5050      
_________________________________________________________________
dense_25 (Dense)             (None, 100)               5100      
Total params: 10,150
Trainable params: 10,150
Non-trainable params: 0
_________________________________________________________________


In [None]:
ae2.compile(optimizer='adam',loss='mse',metrics=['accuracy'])
ae2.fit(X_train_ae2,X_train_ae2,epochs=10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x7fe648c41610>

**Taking output of hidden layer as input to classifier**

In [None]:
trimmed_ae2 = Model(inputs=input_ae2,outputs=ae2.layers[1].output)

X_train_clf = trimmed_ae2.predict(X_train_ae2)
X_test_clf = trimmed_ae2.predict(X_test_ae2)

## Classifier

50 -> 10

**Declaration and training**

In [None]:
input_clf = Input((50,))
f_output = Dense(10,activation='softmax')(input_clf)

clf = Model(inputs = input_clf, outputs = (f_output))

clf.summary()

Model: "model_10"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_9 (InputLayer)         [(None, 50)]              0         
_________________________________________________________________
dense_26 (Dense)             (None, 10)                510       
Total params: 510
Trainable params: 510
Non-trainable params: 0
_________________________________________________________________


In [None]:
clf.compile(optimizer='adam',loss='categorical_crossentropy',metrics=['accuracy'])
clf.fit(X_train_clf,y_train,epochs=10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x7fe648310b90>

## Fine-tuned Model

In [None]:
model = Sequential()
model.add(ae1.layers[0])
model.add(ae1.layers[1])
model.add(ae2.layers[1])
model.add(clf.layers[-1])

model.summary()

Model: "sequential_13"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_22 (Dense)             (None, 100)               78500     
_________________________________________________________________
dense_24 (Dense)             (None, 50)                5050      
_________________________________________________________________
dense_26 (Dense)             (None, 10)                510       
Total params: 84,060
Trainable params: 84,060
Non-trainable params: 0
_________________________________________________________________


In [None]:
model.compile(optimizer='adam',loss='categorical_crossentropy',metrics=['accuracy'])
model.fit(X_train,y_train,epochs=10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x7fe640f79a90>

## Comparing accuracy of fine-tuned and non fine-tuned classifier

In [None]:
print("Accuracy without fine-tuning: {:.2f}".format(clf.evaluate(X_test_clf,y_test)[1]))
print("Accuracy with fine-tuning: {:.2f}".format(model.evaluate(X_test,y_test)[1]))

Accuracy without fine-tuning: 0.79
Accuracy with fine-tuning: 0.97
