In [2]:
import tensorflow as tf
import keras
import tensorflow_datasets as tfds
import matplotlib.pyplot as plt
import numpy as np
from keras.datasets import fashion_mnist
from keras.preprocessing.image import ImageDataGenerator

from tensorflow.keras import models
from tensorflow.keras import layers

Using TensorFlow backend.


In [3]:
(train_X,train_y), (test_X,test_y) = fashion_mnist.load_data()
X = np.concatenate((train_X, test_X))
y = np.concatenate((train_y, test_y))

In [4]:
X = X.reshape(70000, 28, 28, 1)
y = tf.one_hot(y, 10)

In [5]:
train_num = int(70000 * 0.7)
test_num = int(70000 * 0.15)
val_num = 70000 - train_num - test_num

train_X = X[:train_num]
val_X = X[train_num : val_num + train_num]
test_X = X[val_num + train_num:]

train_y = y[:train_num]
val_y = y[train_num : val_num + train_num]
test_y = y[val_num + train_num:]

In [6]:
datagen = ImageDataGenerator(
    width_shift_range=0.1,
    height_shift_range=0.1,
    horizontal_flip=True)

In [7]:
datagen.fit(train_X)

In [8]:
# WITH MAXPOOLING
model = models.Sequential()
model.add(layers.Conv2D(14, (3, 3), activation='tanh', input_shape=(28, 28, 1)))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(12, (3, 3), activation='tanh'))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(10, (3, 3), activation='tanh'))
model.add(layers.Flatten())
model.add(layers.Dense(64, activation='relu'))
model.add(layers.Dense(10))

In [11]:
optimizer = tf.keras.optimizers.SGD(learning_rate=0.005)
loss_fn = keras.losses.CategoricalCrossentropy(from_logits=True)
losses = []
val_losses = []

epochs = 10
for epoch in range(epochs):
    step = 0
    for (x_batch_train, y_batch_train) in datagen.flow(train_X, train_y, batch_size=64):
        
        with tf.GradientTape() as tape:
            logits = model(x_batch_train)
            loss_value = loss_fn(y_batch_train, logits)
            losses.append(loss_value)

        grads = tape.gradient(loss_value, model.trainable_weights)
        optimizer.apply_gradients(zip(grads, model.trainable_weights))
        
        # Log every 200 batches.
        if step % 200 == 0:
            print(
                "Training loss (for one batch) at step %d: %.4f"
                % (step, float(loss_value))
            )
            print("Seen so far: %s samples" % ((step + 1) * 64))
        step += 1

        if step >= train_num // 64: break

Training loss (for one batch) at step 0: 2.5788
Seen so far: 64 samples
Training loss (for one batch) at step 200: 1.9739
Seen so far: 12864 samples
Training loss (for one batch) at step 400: 1.4165
Seen so far: 25664 samples
Training loss (for one batch) at step 600: 1.3247
Seen so far: 38464 samples
Training loss (for one batch) at step 0: 1.2503
Seen so far: 64 samples
Training loss (for one batch) at step 200: 0.9622
Seen so far: 12864 samples
Training loss (for one batch) at step 400: 1.0793
Seen so far: 25664 samples
Training loss (for one batch) at step 600: 1.0242
Seen so far: 38464 samples
Training loss (for one batch) at step 0: 0.9715
Seen so far: 64 samples
Training loss (for one batch) at step 200: 0.8547
Seen so far: 12864 samples
Training loss (for one batch) at step 400: 0.7553
Seen so far: 25664 samples
Training loss (for one batch) at step 600: 1.0193
Seen so far: 38464 samples
Training loss (for one batch) at step 0: 0.8697
Seen so far: 64 samples
Training loss (for 

In [12]:
# Test Accuracy
pred_train_y = model.predict(train_X)

train_labels = []
for i in range(0, len(pred_train_y)):
    train_labels.append(tf.keras.backend.eval(tf.argmax(train_y[i])))
    
pred_train_label = []
for i in range(0, len(pred_train_y)):
    pred_train_label.append(tf.keras.backend.eval(tf.argmax(pred_train_y[i])))
    
corr = 0
for i in range(0, len(pred_train_y)):
    if pred_train_label[i] == train_labels[i]:
        corr += 1
corr / train_num

0.7769183673469388

In [13]:
pred_test_y = model.predict(test_X)

test_labels = []
for i in range(0, len(pred_test_y)):
    test_labels.append(tf.keras.backend.eval(tf.argmax(test_y[i])))
    
pred_test_label = []
for i in range(0, len(pred_test_y)):
    pred_test_label.append(tf.keras.backend.eval(tf.argmax(pred_test_y[i])))
    
corr = 0
for i in range(0, len(pred_test_y)):
    if pred_test_label[i] == test_labels[i]:
        corr += 1
corr / test_num

0.7738095238095238

## Similar results obtained from given compile and fit functions

In [17]:
# WITHOUT MAXPOOLING
model = models.Sequential()
model.add(layers.Conv2D(14, (3, 3), activation='tanh', input_shape=(28, 28, 1)))
#model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(12, (3, 3), activation='tanh'))
#model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(10, (3, 3), activation='tanh'))
model.add(layers.Flatten())
model.add(layers.Dense(64, activation='relu'))
model.add(layers.Dense(10))

In [18]:
model.compile(optimizer='SGD',
              loss=keras.losses.CategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])

history = model.fit(datagen.flow(train_X, train_y, batch_size=64), 
                    epochs=10, validation_data=(val_X, val_y))

Train for 766 steps, validate on 10500 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [20]:
test_X = tf.cast(test_X, tf.float32)
pred_test_y = model.predict(test_X)

test_labels = []
for i in range(0, len(pred_test_y)):
    test_labels.append(tf.keras.backend.eval(tf.argmax(test_y[i])))
    
pred_test_label = []
for i in range(0, len(pred_test_y)):
    pred_test_label.append(tf.keras.backend.eval(tf.argmax(pred_test_y[i])))
    
corr = 0
for i in range(0, len(pred_test_y)):
    if pred_test_label[i] == test_labels[i]:
        corr += 1
corr / test_num

0.7828571428571428

# No Data Augmentation

In [6]:
# train_X = tf.cast(train_X, tf.float32)

In [15]:
# WITHOUT MAXPOOLING
model = models.Sequential()
model.add(layers.Conv2D(14, (3, 3), activation='tanh', input_shape=(28, 28, 1)))
#model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(12, (3, 3), activation='tanh'))
model.add(layers.MaxPooling2D((2, 2)))
#model.add(layers.Conv2D(10, (3, 3), activation='tanh'))
model.add(layers.Flatten())
model.add(layers.Dense(64, activation='relu'))
model.add(layers.Dense(10))

In [None]:
# For some reason the kernel dies here
'''
train_dataset = tf.data.Dataset.from_tensor_slices((train_X, train_y))

optimizer = tf.keras.optimizers.SGD(learning_rate=0.005)
loss_fn = keras.losses.CategoricalCrossentropy(from_logits=True)
losses = []

epochs = 10
for epoch in range(epochs):
    for step, (x_batch_train, y_batch_train) in enumerate(train_dataset):
        
        with tf.GradientTape() as tape:
            logits = model(x_batch_train)
            loss_value = loss_fn(y_batch_train, logits)
            losses.append(loss_value)

        grads = tape.gradient(loss_value, model.trainable_weights)
        optimizer.apply_gradients(zip(grads, model.trainable_weights))
        
        # Log every 200 batches.
        if step % 200 == 0:
            print(
                "Training loss (for one batch) at step %d: %.4f"
                % (step, float(loss_value))
            )
            print("Seen so far: %s samples" % ((step + 1) * 64))
        step += 1

        if step >= train_num // 64: break
'''

In [16]:
model.compile(optimizer='SGD',
              loss=keras.losses.CategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])

history = model.fit(train_X, train_y, epochs=10, validation_data=(val_X, val_y), batch_size=32)

Train on 49000 samples, validate on 10500 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
