In [1]:
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline


In [2]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix

In [3]:
import tensorflow as tf

tf.__version__

'2.3.0'

In [4]:

from tensorflow.keras.models import Sequential,Model 
from tensorflow.keras.layers import Dense, Dropout, Flatten
from tensorflow.keras.layers import Conv2D, MaxPooling2D
from tensorflow.keras.layers import LeakyReLU

from tensorflow.keras.losses import categorical_crossentropy
from tensorflow.keras.optimizers import Adam

from tensorflow.keras.models import load_model,save_model

### Data

https://github.com/zalandoresearch/fashion-mnist

In [5]:
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.datasets import fashion_mnist



In [6]:

(X_train,y_train), (X_test,y_test) = fashion_mnist.load_data()

X_train.shape,y_train.shape,X_test.shape,y_test.shape

((60000, 28, 28), (60000,), (10000, 28, 28), (10000,))

* 60000 training samples, each is 28x28
* 10000 test samples, each is 28x28

In [7]:
classes = np.unique(y_train)
nClasses = len(classes)
print('Total number of outputs : ', nClasses)
print('Output classes : ', classes)

Total number of outputs :  10
Output classes :  [0 1 2 3 4 5 6 7 8 9]


In [12]:
print(np.max(X_train))
X_train[0,:,:]

1.0


#### Images

In [None]:
fig,(ax1,ax2) = plt.subplots(1,2,figsize=[8,8])

# Display the first image in training data

ax1.imshow(X_train[0,:,:], cmap='gray')
ax1.set_title(f'Ground Truth : {y_train[0]}')

# Display the first image in testing data

ax2.imshow(X_test[0,:,:], cmap='gray')
ax2.set_title(f'Ground Truth : {y_test[0]}');


Training and test images of ankle boots labeled as class 9.

Grayscale images with pixel values 0 to 255

### Data Preprocessing

* Reshape each image to 28x28x1.
* Convert to float32
* Scale to (0,1)

In [None]:
X_train = X_train.reshape(-1, 28,28, 1)
X_test = X_test.reshape(-1, 28,28, 1)
print(X_train.shape, X_test.shape)
X_train[0,16:23,1,0]

In [None]:
X_train = X_train.astype('float32')
X_test = X_test.astype('float32')
X_train = X_train / 255.
X_test = X_test / 255.
X_train[0,16:23,1,0]

#### One-hot encode labels

In [None]:
# Change the labels from categorical to one-hot encoding
y_train_one_hot = to_categorical(y_train)
y_test_one_hot = to_categorical(y_test)
print('One hot shape ',y_test_one_hot.shape)
# Display the change for category label using one-hot encoding
print('Original label:', y_train[0])
print('After conversion to one-hot:', y_train_one_hot[0])

#### Split training data into training data and validation data

train_X: train model on this data (80% of X_train)  
valid_X: used to validate model on this data (20% of X_train)  
train_label: labels for train_X (80% of y_train_one_hot)  
valid_label: labels for valid_X (20% of y_train_one_hot)  
  
Note: Still have X_test and y_test_one_hot for Prediction

In [None]:
train_X,valid_X,train_label,valid_label = train_test_split(X_train, y_train_one_hot, test_size=0.2, random_state=13)
train_X.shape,valid_X.shape,train_label.shape,valid_label.shape

### Network Architecture

![](TheNetwork.png)

#### Specify model structure

In [None]:
model = Sequential()

model.add(Conv2D(32, kernel_size=(3, 3),activation='linear',input_shape=(28,28,1),
                     padding='same'))
model.add(LeakyReLU(alpha=0.1))
model.add(MaxPooling2D((2, 2),padding='same'))

model.add(Conv2D(64, (3, 3), activation='linear',padding='same'))
model.add(LeakyReLU(alpha=0.1))
model.add(MaxPooling2D(pool_size=(2, 2),padding='same'))

model.add(Conv2D(128, (3, 3), activation='linear',padding='same'))
model.add(LeakyReLU(alpha=0.1))                  
model.add(MaxPooling2D(pool_size=(2, 2),padding='same'))

model.add(Flatten())

model.add(Dense(128, activation='linear'))
model.add(LeakyReLU(alpha=0.1))                  
model.add(Dense(10, activation='softmax'))

In [None]:
model.summary()

#### Number of parameters Convolutional layer

* nxm dimensions of kernel, 
* l = number in, 
* +1 for bias
* k = number out


In [None]:
# Number of parameters conv2d_2
# n,m = shape of kernel
# l = number of inputs
# k = number of outputs
n,m,l,k = 3,3,32,64
(n*m*l+1)*k

#### Number of parameters in fully connected layer

In [None]:
l,k = 2048,128
(l+1)*k


#### Number of parameters in output layer

In [None]:
l,k = 128,10
(l+1)*k

In [None]:
layer3 = model.get_layer(index=3)
layer3.name

In [None]:
model.layers

### Compile Model

Choose:

- Optimizer:  Adam  
- Loss type: categorical_crossentropy (could use binary cross-entropy)    
- Metric: accuracy

In [None]:
model.compile(loss=tf.keras.losses.categorical_crossentropy, 
              optimizer=tf.keras.optimizers.Adam(),
              metrics=['accuracy'])

### Train the model

Keras fit function returns a history object to use later to analyze performance

In [None]:
batch_size = 64 #128,256 # Minibatch size
epochs = 10

In [None]:
model_train = model.fit(train_X, train_label, batch_size=batch_size,epochs=epochs,verbose=1,
                        validation_data=(valid_X, valid_label))

### Evaluate model on test set

In [None]:
test_eval = model.evaluate(X_test, y_test_one_hot, verbose=0)
print('Test loss:', test_eval[0])
print('Test accuracy:', test_eval[1])

In [None]:
model_train.history.keys()

In [None]:
accuracy = model_train.history['accuracy']
val_accuracy = model_train.history['val_accuracy']
loss = model_train.history['loss']
val_loss = model_train.history['val_loss']
epochs = range(len(accuracy))

In [None]:
plt.plot(epochs, accuracy, 'bo', label='Training accuracy')
plt.plot(epochs, val_accuracy, 'b', label='Validation accuracy')
plt.xlabel("Epoch")
plt.title('Training and validation accuracy')
plt.legend()
plt.figure()
plt.plot(epochs, loss, 'bo', label='Training loss')
plt.plot(epochs, val_loss, 'b', label='Validation loss')
plt.xlabel("Epoch")
plt.title('Training and validation loss')
plt.legend();

Notice U-shaped curve in the Loss plot: means the model is overfitting after about epoch 4 or 5

### Dropout

Reduce overfitting by using dropout. Dropout randomly turns off a percentage of the unit during training.

Number of units to drop is a hyperparameter

In [None]:
batch_size = 64
epochs = 10


In [None]:
model = Sequential()
model.add(Conv2D(32, kernel_size=(3, 3),activation='linear',padding='same',input_shape=(28,28,1)))
model.add(LeakyReLU(alpha=0.1))
model.add(MaxPooling2D((2, 2),padding='same'))
model.add(Dropout(0.25))
model.add(Conv2D(64, (3, 3), activation='linear',padding='same'))
model.add(LeakyReLU(alpha=0.1))
model.add(MaxPooling2D(pool_size=(2, 2),padding='same'))
model.add(Dropout(0.25))
model.add(Conv2D(128, (3, 3), activation='linear',padding='same'))
model.add(LeakyReLU(alpha=0.1))                  
model.add(MaxPooling2D(pool_size=(2, 2),padding='same'))
model.add(Dropout(0.4))
model.add(Flatten())
model.add(Dense(128, activation='linear'))
model.add(LeakyReLU(alpha=0.1))           
model.add(Dropout(0.3))
model.add(Dense(10, activation='softmax'))

model.summary()

In [None]:
model.compile(loss=categorical_crossentropy, optimizer=Adam(),metrics=['accuracy'])

In [None]:
model_train_dropout = model.fit(train_X, train_label, batch_size=batch_size,epochs=epochs,
                                verbose=1,validation_data=(valid_X, valid_label))

#### Saving the model

We can do a warm restart with the saved model

In [None]:
save_model(model,"model_dropout.h5py")

### Evaluate the new model

In [None]:
test_eval = model.evaluate(X_test, y_test_one_hot, verbose=0)

In [None]:
print('Test loss:', test_eval[0])
print('Test accuracy:', test_eval[1])

In [None]:
accuracy = model_train_dropout.history['accuracy']
val_accuracy = model_train_dropout.history['val_accuracy']
loss = model_train_dropout.history['loss']
val_loss = model_train_dropout.history['val_loss']
epochs = range(len(accuracy))

In [None]:
plt.plot(epochs, accuracy, 'bo', label='Training accuracy')
plt.plot(epochs, val_accuracy, 'b', label='Validation accuracy')
plt.xlabel("Epoch")
plt.title('Training and validation accuracy')
plt.legend()
plt.figure()
plt.plot(epochs, loss, 'bo', label='Training loss')
plt.plot(epochs, val_loss, 'b', label='Validation loss')
plt.xlabel("Epoch")
plt.title('Training and validation loss')
plt.legend()
plt.show()

### Predict the Classes of the test data

In [None]:
predicted_classes = model.predict(X_test)

print(predicted_classes[0,:])
print(np.round(predicted_classes[0,:],5))
print(predicted_classes.shape)

In [None]:
predicted_classes = np.argmax(np.round(predicted_classes),axis=1)
print(f'Predicted = {predicted_classes[0]}, label = {y_test[0]}')
predicted_classes.shape, y_test.shape

In [None]:
correct = np.where(predicted_classes==y_test)[0]
print("Found %d correct labels" % len(correct))
for i, correct in enumerate(correct[:9]):
    plt.subplot(3,3,i+1)
    plt.imshow(X_test[correct].reshape(28,28), cmap='gray', interpolation='none')
    plt.title("Predicted {}, Class {}".format(predicted_classes[correct], y_test[correct]))
    plt.tight_layout()

In [None]:
incorrect = np.where(predicted_classes!=y_test)[0]
print("Found %d incorrect labels" % len(incorrect))
for i, incorrect in enumerate(incorrect[:9]):
    plt.subplot(3,3,i+1)
    plt.imshow(X_test[incorrect].reshape(28,28), cmap='gray', interpolation='none')
    plt.title("Predicted {}, Class {}".format(predicted_classes[incorrect], y_test[incorrect]))
    plt.tight_layout()

#### Confusion Matrix

In [None]:
cm = confusion_matrix(predicted_classes,y_test)
print(cm)

print(f'Accuracy: {np.trace(cm)/np.sum(cm)}')

In [None]:
m = load_model("model_dropout.h5py")
m

In [None]:
m.summary()

In [None]:
test_eval2 = m.evaluate(X_test, y_test_one_hot, verbose=0)
print('Test loss:', test_eval2[0])
print('Test accuracy:', test_eval2[1])

In [None]:
y_test.shape,y_test_one_hot.shape,predicted_classes.shape

### References

https://www.curiousily.com/posts/tensorflow-2-and-keras-quick-start-guide/

https://www.datacamp.com/community/tutorials/convolutional-neural-networks-python