# Fashion MNIST Data Science Challenge: Neural Networks and Deep Learning


# Labels

Each training and test example is assigned to one of the following labels:

<li>0 T-shirt/top </li>
<li>1 Trouser</li>
<li>2 Pullover </li>
<li>3 Dress </li>
<li>4 Coat </li>
<li>5 Sandal</li>
<li>6 Shirt </li>
<li>7 Sneaker </li>
<li>8 Bag </li>
<li>9 Ankle boot </li>
--------------------------

Each row is a separate image

Column 1 is the class label.
Remaining columns are pixel numbers (784 total).
Each value is the darkness of the pixel (1 to 255)

## **Fashion Mnist Classification**
Let's build a baseline model

In [1]:
#basic packages
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import seaborn as sns
import datetime
import tensorflow as tf
from keras.utils import to_categorical
from sklearn.model_selection import train_test_split

### Data Preparation

In [2]:
#1. Get the file
data_train = pd.read_csv('train.csv')
data_validate = pd.read_csv('test.csv')

In [3]:
# Create 2 models one for shoes, bags and troussers
data_easy_to_classify = data_train.copy()

# Exchange labels of T-Shirts, pullovers, dresses, coats and shirts with common label
#data_easy_to_classify.loc[data_easy_to_classify['label'] == 0, 'label'] = 0  # t-shirt
data_easy_to_classify.loc[data_easy_to_classify['label'] == 2, 'label'] = 0  # pullover
data_easy_to_classify.loc[data_easy_to_classify['label'] == 3, 'label'] = 0  # dress
data_easy_to_classify.loc[data_easy_to_classify['label'] == 6, 'label'] = 0  # shirt

# reorganize labels
new_easy_label_dict = {
    0: 0,
    1: 1,
    2: 0,
    3: 0,
    4: 2,
    5: 3,
    6: 0,
    7: 4,
    8: 5,
    9: 6
}

easy_trafo_dict = {
    0: 0,
    1: 1,
    2: 4,
    3: 5,
    4: 7,
    5: 8,
    6: 9
}

# TODO needed?
#data_easy_to_classify.loc[data_easy_to_classify['label'] == 1, 'label'] = 1  # trouser
# data_easy_to_classify.loc[data_easy_to_classify['label'] == 4, 'label'] = 2  # coat
# data_easy_to_classify.loc[data_easy_to_classify['label'] == 5, 'label'] = 3  # sandal
# data_easy_to_classify.loc[data_easy_to_classify['label'] == 7, 'label'] = 4  # sneaker
# data_easy_to_classify.loc[data_easy_to_classify['label'] == 8, 'label'] = 5  # bag
# data_easy_to_classify.loc[data_easy_to_classify['label'] == 9, 'label'] = 6  # ankle boot

data_hard_to_classify = data_train.copy()
# drop shoes, bags and trousers
data_hard_to_classify = data_hard_to_classify[data_hard_to_classify["label"] != 1]
data_hard_to_classify = data_hard_to_classify[data_hard_to_classify["label"] != 4]
data_hard_to_classify = data_hard_to_classify[data_hard_to_classify["label"] != 5]
data_hard_to_classify = data_hard_to_classify[data_hard_to_classify["label"] != 7]
data_hard_to_classify = data_hard_to_classify[data_hard_to_classify["label"] != 8]
data_hard_to_classify = data_hard_to_classify[data_hard_to_classify["label"] != 9]

data_easy_to_classify.head()

Unnamed: 0,label,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,pixel9,...,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783,pixel784
0,0,0,0,0,0,0,0,0,1,123,...,0,0,0,0,127,150,28,0,0,0
1,0,0,0,0,0,0,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0
2,1,0,0,0,0,0,0,0,0,0,...,169,43,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,129,37,0,0,0,0,0,0,0,0
4,1,0,0,0,0,0,0,0,0,0,...,2,0,0,0,0,0,0,0,0,0


In [4]:
data_train_easy = np.array(data_easy_to_classify, dtype = 'float32') # Damit Input Daten von Keras akzeptiert werden müssen wir sie in ein Array umwandeln 
data_train_hard = np.array(data_hard_to_classify, dtype = 'float32') # Damit Input Daten von Keras akzeptiert werden müssen wir sie in ein Array umwandeln 
data_validate = np.array(data_validate, dtype='float32') 

In [5]:
x_train_easy = data_train_easy[:,1:]  # Added normalization Layer
y_train_easy = data_train_easy[:,0] #label data

x_train_hard = data_train_hard[:,1:]  # Added normalization Layer
y_train_hard = data_train_hard[:,0] #label data

#data_submission = data_validate/255  # TODO -0.5?
data_submission = data_validate

In [6]:
#reshape the array containing the images (28px x 28px and 1 channel)
image_rows = 28
image_cols = 28
image_shape = (image_rows,image_cols,1)# 1 da schwarz weiß, bei Farbbildern 3 (r,g,b)

x_train_easy = x_train_easy.reshape(x_train_easy.shape[0],*image_shape)
x_train_hard = x_train_hard.reshape(x_train_hard.shape[0],*image_shape)

data_submission = data_submission.reshape(data_submission.shape[0],*image_shape)

In [7]:
#split train data in train and validation set
x_train_easy2,x_validate_easy2,y_train_easy2,y_validate_easy2 = train_test_split(x_train_easy,y_train_easy,test_size = 0.2,shuffle=True,random_state = 12345)

x_train_hard2,x_validate_hard2,y_train_hard2,y_validate_hard2 = train_test_split(x_train_hard,y_train_hard,test_size = 0.2,shuffle=True,random_state = 12345)

In [8]:
# Display tensorflow devices to check for cuda
import tensorflow as tf
from tensorflow.python.client import device_lib
import keras
#print(device_lib.list_local_devices())

# Set GPU as device
#tf.config.experimental.set_memory_growth(tf.config.list_physical_devices('GPU')[0], True)
tf.config.set_soft_device_placement(False)
#tf.debugging.set_log_device_placement(True)
tf.device("/device:GPU:0")

<tensorflow.python.eager.context._EagerDeviceContext at 0x165f623c1c0>

In [10]:
import tensorflow as tf
import keras
from keras.models import Sequential
from keras.layers import Conv2D,MaxPooling2D,Dense,Flatten,Dropout,BatchNormalization,SpatialDropout2D,GaussianNoise,Input,Add,Activation,AveragePooling2D,ZeroPadding2D
from tensorflow.keras.regularizers import l2
from keras.optimizers import Adam
from keras.callbacks import TensorBoard# zur Visualisierung

# Creates layers for data preprocessing -> helps with generalisation
# TODO define seed globally
data_augmentation = tf.keras.Sequential([
        tf.keras.layers.experimental.preprocessing.Normalization(),
        #tf.keras.layers.experimental.preprocessing.RandomRotation(factor=0.25, fill_mode='reflect', interpolation='bilinear', seed=1234, fill_value=0.0),
        #tf.keras.layers.experimental.preprocessing.RandomZoom(height_factor=0.2, width_factor=None, fill_mode='reflect', interpolation='bilinear', seed=1234, fill_value=0.0),
        #tf.keras.layers.experimental.preprocessing.RandomContrast(factor=0.2, seed=1234),
        #tf.keras.layers.experimental.preprocessing.RandomTranslation(height_factor=0.2, width_factor=0.2, fill_mode='reflect', interpolation='bilinear', seed=1234, fill_value=0.0),
        #tf.keras.layers.experimental.preprocessing.RandomFlip(mode="horizontal_and_vertical", seed=1234)
], name="data_augmentation")

model = tf.keras.Sequential([
        data_augmentation,
        Conv2D(kernel_size=3,filters=10,activation='relu',input_shape=(28,28,1)),
        Flatten(),
        #....,
        #....,
        #....,
        #....,
        Dense(64,activation = 'relu'),
        Dense(10,activation = 'softmax')  # 10 neurons for output, softmax best according to article TODO article here
    ], name="model")

felix_net = tf.keras.Sequential([
    data_augmentation,
    Conv2D(32, kernel_size=3, activation='relu', input_shape=(28,28,1), padding='same', kernel_regularizer=l2(0.002)),
    BatchNormalization(),
    Conv2D(32, kernel_size=3, activation='relu', padding='same', kernel_regularizer=l2(0.002)),
    BatchNormalization(),
    Conv2D(32, kernel_size=3, activation='relu', padding='same', kernel_regularizer=l2(0.002)),
    BatchNormalization(),
    MaxPooling2D(pool_size=(2, 2)),
    Dropout(0.25),

    Conv2D(64, kernel_size=3, activation='relu', padding='same', kernel_regularizer=l2(0.002)),
    BatchNormalization(),
    Conv2D(64, kernel_size=3, activation='relu', padding='same', kernel_regularizer=l2(0.002)),
    BatchNormalization(),
    Conv2D(64, kernel_size=3, activation='relu', padding='same', kernel_regularizer=l2(0.002)),
    BatchNormalization(),
    MaxPooling2D(pool_size=(2, 2)),
    Dropout(0.25),


    Flatten(),
    Dense(256, activation='relu'),
    BatchNormalization(),
    Dropout(0.5),
    Dense(256, activation='relu'),
    Dropout(0.5),

    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(10, activation='softmax')
])

easy_net = tf.keras.Sequential([
    data_augmentation,
    Conv2D(32, kernel_size=3, activation='relu', input_shape=(28,28,1), padding='same'),
    BatchNormalization(),
    Conv2D(64, kernel_size=3, activation='relu', padding='same'),
    BatchNormalization(),
    MaxPooling2D(pool_size=(2, 2)),
    Dropout(0.25),

    Conv2D(128, kernel_size=3, activation='relu', padding='same'),
    BatchNormalization(),
    Conv2D(128, kernel_size=3, activation='relu', padding='same'),
    BatchNormalization(),
    MaxPooling2D(pool_size=(2, 2)),
    Dropout(0.25),


    Flatten(),
    Dense(256, activation='relu'),
    BatchNormalization(),
    Dropout(0.5),
    Dense(256, activation='relu'),
    Dropout(0.5),

    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(10, activation='softmax')
])

def multi_net_predict(dataset):
    prediction = easy_net.predict(dataset)
    prediction = np.argmax(prediction,axis = 1)
    accuracy = 0
    for pred_ind in range(len(prediction)):
        if pred_ind == 0:
            # Get second opinion
            second_op = felix_net.predict(np.array([dataset[pred_ind]]))
            second_op = np.argmax(second_op,axis = 1)
            
            prediction[pred_ind] = second_op[0]
        else:
            # Transform back to original labels
            pass # TODO needed
    
    return prediction

def multi_net_evaluate(dataset, labels):
    prediction = multi_net_predict(dataset)
    
    # TODO calculate accuracy

In [11]:
training_network = ""
batch_size = 64
learning_rate = 0.001
total_epochs = 50

batch_size = 128
learning_rate = 0.005

### Training

In [12]:
easy_history = []
hard_history = []

In [None]:
# Train easy network
easy_net.compile(optimizer=tf.keras.optimizers.SGD(learning_rate=0.05, momentum=0.9, decay=0.05/total_epochs),
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])
easy_history.append(easy_net.fit(
    x_train_easy2,
    y_train_easy2,
    epochs=total_epochs,
    batch_size=batch_size,
    verbose=1,
    validation_data=(x_validate_easy2,y_validate_easy2),
    use_multiprocessing=True,
    workers=20,
    max_queue_size=50
    ))

Epoch 1/50
Epoch 2/50

In [None]:
# Train hard network
felix_net.compile(optimizer=tf.keras.optimizers.SGD(learning_rate=0.05, momentum=0.9, decay=0.05/total_epochs),
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])
hard_history.append(felix_net.fit(
    x_train_hard2,
    y_train_hard2,
    epochs=total_epochs, # use more epochs for alexnet
    #epochs=14,
    batch_size=batch_size,
    verbose=1,
    validation_data=(x_validate_hard2,y_validate_hard2),
    use_multiprocessing=True,
    workers=20,
    max_queue_size=50
    ))

In [None]:
#model.summary()

In [None]:
score = model.evaluate(x_validate2,y_validate2,verbose=0)
print('Test Loss : {:.4f}'.format(score[0]))
print('Test Accuracy : {:.4f}'.format(score[1]))

In [None]:
# Easy Network history
plt.figure(figsize=(20, 10))

train_list = []
val_list = []

for his in easy_history:
    if his.history.get("loss") is None or his.history.get("val_loss") is None:
        continue
    train_list.extend(his.history['loss'])
    val_list.extend(his.history['val_loss'])

plt.plot(train_list, label='Loss')
plt.plot(val_list, label='Validation Loss')
plt.legend()
plt.title('Easy Training - Loss Function')

In [None]:
# Easy Network history
plt.figure(figsize=(20, 10))

train_list = []
val_list = []

for his in hard_history:
    if his.history.get("loss") is None or his.history.get("val_loss") is None:
        continue
    train_list.extend(his.history['loss'])
    val_list.extend(his.history['val_loss'])

plt.plot(train_list, label='Loss')
plt.plot(val_list, label='Validation Loss')
plt.legend()
plt.title('Hard Training - Loss Function')

In [None]:
# Plot easy history
plt.figure(figsize=(20, 10))

train_list = []
val_list = []

for his in easy_history:
    if his.history.get("accuracy") is None or his.history.get("val_accuracy") is None:
        continue
    train_list.extend(his.history['accuracy'])
    val_list.extend(his.history['val_accuracy'])

plt.plot(train_list, label='Accuracy')
plt.plot(val_list, label='Validation Accuracy')
plt.legend()
plt.title('Easy Training - Accuracy Function')

In [None]:
# Plot hard training
plt.figure(figsize=(20, 10))

train_list = []
val_list = []

for his in hard_history:
    if his.history.get("accuracy") is None or his.history.get("val_accuracy") is None:
        continue
    train_list.extend(his.history['accuracy'])
    val_list.extend(his.history['val_accuracy'])

plt.plot(train_list, label='Accuracy')
plt.plot(val_list, label='Validation Accuracy')
plt.legend()
plt.title('Hard Training - Accuracy Function')

### Submission
Submit your final notebook as **fashion_mnist_teamX.ipynb** and your predictions of the test data as a **predictions_teamX.csv**.

In [None]:
# Plot confusion matrix
import sklearn.metrics
val_pred = np.argmax(model.predict(x_validate2), axis = 1)

conf_matrix = pd.DataFrame(sklearn.metrics.confusion_matrix(y_validate2, val_pred), index=["T-shirt/top", "Trouser", "Pullover", "Dress", "Coat", "Sandal", "Shirt", "Sneaker","Bag", "Ankle boot"])
plt.figure(figsize = (10,7))
sns.heatmap(conf_matrix, annot=True)

In [None]:
data_min_val = pd.read_csv('test_w_labels.csv')
data_min_val = np.array(data_min_val, dtype="float32")
x_min_val = data_min_val[:,1:]  # Added normalization Layer
y_min_val = data_min_val[:,0] #label data

x_min_val = x_min_val.reshape(x_min_val.shape[0], 28, 28, 1)

In [None]:
min_val_pred = model.evaluate(x_min_val, y_min_val, batch_size=128)
print('Final Test Loss : {:.4f}'.format(min_val_pred[0]))
print('Final Test Accuracy : {:.4f}'.format(min_val_pred[1]))

In [None]:
# Plot confusion matrix
val_pred = np.argmax(model.predict(x_min_val), axis = 1)

conf_matrix = pd.DataFrame(sklearn.metrics.confusion_matrix(y_min_val, val_pred), index=["T-shirt/top", "Trouser", "Pullover", "Dress", "Coat", "Sandal", "Shirt", "Sneaker","Bag", "Ankle boot"])
plt.figure(figsize = (10,7))
sns.heatmap(conf_matrix, annot=True)

In [None]:
# predict results
results = model.predict(data_submission)

# select the indix with the maximum probability
results = np.argmax(results,axis = 1)

results = pd.Series(results,name="Label")
results

In [None]:
# # Save model to disk
# from datetime import datetime
# if True:
#     model.save("trained_models/model_" + training_network + "_"+ datetime.now().strftime("%d%m%Y_%H%M%S"))

In [None]:
for:

In [None]:
# data_results = pd.DataFrame(results)
# data_results.head(10)

In [None]:
# data_results.to_csv('fashion_mnist_pred_team1.csv', index=False)#Bitte statt X eure Gruppennummer einfügen! 

## Ressources
Background:
  * Book: [Neural Networks and Deep Learning, Michael Nielsen](http://neuralnetworksanddeeplearning.com) 
  * Lecture: [CS231n, Stanford University](http://cs231n.stanford.edu/)

Implementation:
  * [TensorFlow tutorials](https://www.tensorflow.org/tutorials)
  * [Keras Docs](https://www.tensorflow.org/api_docs/python/tf/keras)

## Image Sources
* http://neuralnetworksanddeeplearning.com/images/
* https://www.researchgate.net/publication/320270458/figure/fig1/AS:551197154254848@1508427050805/Mathematical-model-of-artificial-neuron.png
* https://www.w3resource.com/w3r_images/numpy-manipulation-ndarray-flatten-function-image-1.png
* https://computersciencewiki.org/images/8/8a/MaxpoolSample2.png
* https://glassboxmedicine.files.wordpress.com/2019/01/slide2.jpg?w=616
* http://neuralnetworksanddeeplearning.com/images/valley_with_ball.png
* https://upload.wikimedia.org/wikipedia/commons/thumb/8/88/Logistic-curve.svg/1200px-Logistic-curve.svg.png