In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.preprocessing import StandardScaler

In [2]:
# data loading
df_test = pd.read_csv('fashion-mnist_test.csv')
df_train = pd.read_csv('fashion-mnist_train.csv')

In [3]:
y_test = df_test['label']
y_train = df_train['label']

In [4]:
df_test.drop('label', axis=1, inplace=True)
df_train.drop('label', axis=1, inplace=True)

In [5]:
# data normalization
scaler = StandardScaler()
X_train = scaler.fit_transform(df_train)
X_test = scaler.transform(df_test)

In [6]:
# let's look at the shape
X_train.shape

(60000, 784)

### Logistic regression

In [7]:
# converts a class vector to binary class matrix
y_train = tf.keras.utils.to_categorical(y_train)
y_test = tf.keras.utils.to_categorical(y_test)

In [8]:
# logistic regression as a one layer neural network 
model = tf.keras.models.Sequential()
model.add(tf.keras.layers.Dense(10, activation='softmax', input_shape = (784,)))

2022-02-16 01:40:55.341076: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [9]:
model.compile(
    loss='categorical_crossentropy',
    optimizer=tf.keras.optimizers.SGD(),
    metrics=['accuracy']
    
)

In [10]:
%reload_ext tensorboard

In [11]:
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir='./logs')
epoch = 10

_ = model.fit(
    X_train, y_train,
    validation_data=(X_test, y_test),
    epochs=epoch,
    callbacks=[tensorboard_callback]
)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [12]:
# the graph is downloaded (linear_graph.svg) and located in the archieve
%tensorboard --logdir logs

In [13]:
_, accuracy = model.evaluate(X_test, y_test)
print('Accuracy equals to: ', round(accuracy, 2))

Accuracy equals to:  0.85


### FCNN

In [14]:
model_fcnn = tf.keras.models.Sequential()
model_fcnn.add(tf.keras.layers.Dense(64, activation='relu', input_shape = (784,)))
model_fcnn.add(tf.keras.layers.Dense(64, activation='relu'))
model_fcnn.add(tf.keras.layers.Dense(10, activation='softmax'))

In [15]:
model_fcnn.compile(
    loss='categorical_crossentropy',
    optimizer=tf.keras.optimizers.SGD(),
    metrics=['accuracy']
    
)

In [16]:
%reload_ext tensorboard

In [17]:
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir='./logs_fcnn')
epoch = 10

_ = model_fcnn.fit(
    X_train, y_train,
    validation_data=(X_test, y_test),
    epochs=epoch,
    callbacks=[tensorboard_callback]
)


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [18]:
# the graph is downloaded (fcnn_graph.svg) and located in the archieve
%tensorboard --logdir logs_fcnn

In [19]:
_, accuracy_fcnn = model_fcnn.evaluate(X_test, y_test)
print('Accuracy equals to: ', round(accuracy_fcnn, 2))

Accuracy equals to:  0.88


We see that the accuracy has improved thanks to the increased the number of weights in the network (due to the bigger number of layers). So, the model complexity has gone up and, since we have a relatively large dataset, it resulted in a higher accuracy (and not in the overfitting).

### CNN

In [20]:
X_train = X_train.reshape(60000, 28, 28)
X_test = X_test.reshape(10000, 28, 28)

In [21]:
model_cnn = tf.keras.models.Sequential()
model_cnn.add(tf.keras.layers.Input(shape=(28, 28, 1)))
model_cnn.add(tf.keras.layers.Conv2D(filters=32, kernel_size=3, activation='relu'))
model_cnn.add(tf.keras.layers.MaxPooling2D(pool_size=(2, 2)))
model_cnn.add(tf.keras.layers.Conv2D(filters=64, kernel_size=3, activation='relu'))
model_cnn.add(tf.keras.layers.MaxPooling2D(pool_size=(2, 2)))
model_cnn.add(tf.keras.layers.Flatten())
model_cnn.add(tf.keras.layers.Dense(10, activation='softmax'))

In [22]:
model_cnn.compile(
    loss='categorical_crossentropy',
    optimizer=tf.keras.optimizers.SGD(),
    metrics=['accuracy']
    
)

In [23]:
%reload_ext tensorboard

In [24]:
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir='./logs_cnn')
epoch = 10

_ = model_cnn.fit(
    X_train, y_train,
    validation_data=(X_test, y_test),
    epochs=epoch,
    callbacks=[tensorboard_callback]
)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [25]:
# the graph is downloaded (cnn_graph.svg) and located in the archieve
%tensorboard --logdir logs_cnn

In [26]:
_, accuracy_cnn = model_cnn.evaluate(X_test, y_test)
print('Accuracy equals to: ', round(accuracy_cnn, 2))

Accuracy equals to:  0.89


Accuracy still rises.

### CNN 2

In [27]:
# let's add 2 extra layers
model_cnn_double = tf.keras.models.Sequential()
model_cnn_double.add(tf.keras.layers.Input(shape=(28, 28, 1)))
model_cnn_double.add(tf.keras.layers.Conv2D(filters=32, kernel_size=3, activation='relu'))
model_cnn_double.add(tf.keras.layers.MaxPooling2D(pool_size=(2, 2)))
model_cnn_double.add(tf.keras.layers.Conv2D(filters=64, kernel_size=3, activation='relu'))
model_cnn_double.add(tf.keras.layers.MaxPooling2D(pool_size=(2, 2)))
model_cnn_double.add(tf.keras.layers.Conv2D(filters=128, kernel_size=3, activation='relu'))
model_cnn_double.add(tf.keras.layers.Conv2D(filters=64, kernel_size=3, activation='relu'))
model_cnn_double.add(tf.keras.layers.Flatten())
model_cnn_double.add(tf.keras.layers.Dense(10, activation='softmax'))

In [28]:
model_cnn_double.compile(
    loss='categorical_crossentropy',
    optimizer=tf.keras.optimizers.SGD(),
    metrics=['accuracy']
    
)

In [29]:
%reload_ext tensorboard

In [30]:
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir='./logs_cnn2')
epoch = 10

_ = model_cnn_double.fit(
    X_train, y_train,
    validation_data=(X_test, y_test),
    epochs=epoch,
    callbacks=[tensorboard_callback]
)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [31]:
# the graph is downloaded (cnn2_graph.svg) and located in the archieve
%tensorboard --logdir logs_cnn2

In [32]:
_, accuracy_cnn_double = model_cnn_double.evaluate(X_test, y_test)
print('Accuracy equals to: ', round(accuracy_cnn_double, 2))

Accuracy equals to:  0.89


As we see, the accuracy has decreased: excessive number of layers led to overfitting.

### With Batch Normalization

In [33]:
# let's get back to the first CNN model and add BatchNormalization layers
model_bn = tf.keras.models.Sequential()
model_bn.add(tf.keras.layers.Input(shape=(28, 28, 1)))
model_bn.add(tf.keras.layers.Conv2D(filters=32, kernel_size=3, activation='relu'))
model_bn.add(tf.keras.layers.BatchNormalization())
model_bn.add(tf.keras.layers.MaxPooling2D(pool_size=(2, 2)))
model_bn.add(tf.keras.layers.Conv2D(filters=64, kernel_size=3, activation='relu'))
model_bn.add(tf.keras.layers.BatchNormalization())
model_bn.add(tf.keras.layers.MaxPooling2D(pool_size=(2, 2)))
model_bn.add(tf.keras.layers.Flatten())
model_bn.add(tf.keras.layers.Dense(10, activation='softmax'))

In [34]:
model_bn.compile(
    loss='categorical_crossentropy',
    optimizer=tf.keras.optimizers.SGD(),
    metrics=['accuracy']
    
)

In [35]:
%reload_ext tensorboard

In [36]:
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir='./logs_bn')
epoch = 10

_ = model_bn.fit(
    X_train, y_train,
    validation_data=(X_test, y_test),
    epochs=epoch,
    callbacks=[tensorboard_callback]
)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [37]:
# the graph is downloaded (bn_graph.svg) and located in the archieve
%tensorboard --logdir logs_bn

In [38]:
_, accuracy_bn = model_bn.evaluate(X_test, y_test)
print('Accuracy equals to: ', round(accuracy_bn, 2))

Accuracy equals to:  0.9


As it was predicted in the description of the task, the model with Batch Normalization showed the best results in accuracy.