In [None]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt

fashion_mnist = tf.keras.datasets.fashion_mnist
(train_images, train_labels), (validation_images, validation_labels) = fashion_mnist.load_data()

In [None]:
media = np.mean(train_images,axis=0)
desvio = np.std(train_images,axis=0)
train_images = (train_images - media)/desvio
ntr = train_images.shape[0] 
dx = train_images.shape[1]*train_images.shape[2] 
validation_images = (validation_images - media)/desvio

Inicializaciones

In [None]:
batchsize = 256
num_epochs = 3
n_hidden_units = 128
model = tf.keras.Sequential()
model.add(tf.keras.layers.Input(shape=(28,28)))
model.add(tf.keras.layers.Flatten())
model.add(tf.keras.layers.Dense(n_hidden_units, activation='relu',kernel_initializer='GlorotNormal'))
model.add(tf.keras.layers.Dense(dx,kernel_initializer='HeNormal'))
model.add(tf.keras.layers.Reshape((28, 28), input_shape=(dx,)))
model.summary()

Learning Rate Decay

In [None]:
def step_decay(epoch):
   if epoch < 2:
     lrate = 0.01
   else:
     lrate = 0.001 
   return lrate
   
lrate = tf.keras.callbacks.LearningRateScheduler(step_decay)
model.compile(optimizer=tf.keras.optimizers.Adam(),loss='MeanSquaredError')
hist = model.fit(x=train_images, y=train_images, batch_size = batchsize, epochs=num_epochs, callbacks = [lrate], validation_data=(validation_images,validation_images))

Weight Decay

In [None]:
modelwd = tf.keras.Sequential()
modelwd.add(tf.keras.layers.Flatten(input_shape=(28, 28)))
modelwd.add(tf.keras.layers.Dense(512, activation='relu',kernel_initializer='glorot_normal',kernel_regularizer=tf.keras.regularizers.l2(1e-3)))
modelwd.add(tf.keras.layers.Dense(256, activation='relu',kernel_initializer='he_normal',kernel_regularizer=tf.keras.regularizers.l2(1e-3)))
modelwd.add(tf.keras.layers.Dense(128, activation='relu',kernel_initializer='he_normal',kernel_regularizer=tf.keras.regularizers.l2(1e-3)))
modelwd.add(tf.keras.layers.Dense(10, activation='softmax',kernel_initializer='he_normal'))
modelwd.summary()
modelwd.compile(optimizer=tf.keras.optimizers.Nadam(),loss='sparse_categorical_crossentropy',metrics=['accuracy'])
hist_wd = modelwd.fit(x=train_images, y=train_labels, batch_size = batchsize, epochs=num_epochs,validation_data=(validation_images,validation_labels))

Early Stopping

In [None]:
es = tf.keras.callbacks.EarlyStopping(monitor='val_accuracy', patience=5)
modelwd.compile(optimizer=tf.keras.optimizers.Nadam(),loss='sparse_categorical_crossentropy',metrics=['accuracy'])
hist = modelwd.fit(x=train_images, y=train_labels, batch_size = batchsize, epochs=100,validation_data=(validation_images,validation_labels),callbacks=[es,lrate])

Dropout

In [None]:
modeldro = tf.keras.Sequential()
modeldro.add(tf.keras.layers.Flatten(input_shape=(28, 28)))
modeldro.add(tf.keras.layers.Dense(512, activation='relu',kernel_initializer='glorot_normal'))
modeldro.add(tf.keras.layers.Dropout(0.2)) #pdrop
modeldro.add(tf.keras.layers.Dense(256, activation='relu',kernel_initializer='he_normal'))
modeldro.add(tf.keras.layers.Dropout(0.2))
modeldro.add(tf.keras.layers.Dense(128, activation='relu',kernel_initializer='he_normal'))
modeldro.add(tf.keras.layers.Dropout(0.2))
modeldro.add(tf.keras.layers.Dense(10, activation='softmax',kernel_initializer='he_normal'))
modeldro.summary()
modeldro.compile(optimizer=tf.keras.optimizers.RMSprop(),loss='sparse_categorical_crossentropy',metrics=['accuracy'])
hist_dro = modeldro.fit(x=train_images, y=train_labels, batch_size = batchsize, epochs=num_epochs,validation_data=(validation_images,validation_labels))

Data Augmentation

In [None]:
new_images = np.zeros(train_images.shape)
for k in range(train_labels.shape[0]):
  new_images[k] = (np.fliplr(train_images[k]*desvio + media)-media)/desvio

train_images_da=np.concatenate([train_images,new_images])
train_labels_da=np.concatenate([train_labels,train_labels])

text_labels = ['T-shirt/top', 'Trouser', 'Pullover', 'Dress', 'Coat', 'Sandal', 'Shirt', 'Sneaker', 'Bag', 'Ankle boot']
plt.figure(figsize=(8,4))
for i in range(4):
    plt.subplot(2,4,2*i+1)
    plt.xticks([])
    plt.yticks([])
    plt.grid(False)
    img_index = np.random.randint(0, train_images.shape[0])
    plt.imshow(desvio*train_images[img_index]+media, cmap="gray_r")
    plt.xlabel(text_labels[train_labels[img_index]])
    plt.subplot(2,4,2*i+2)
    plt.xticks([])
    plt.yticks([])
    plt.grid(False) 
    plt.imshow(desvio*new_images[img_index]+media, cmap="gray_r")
    plt.xlabel(text_labels[train_labels[img_index]])