In [None]:
%tensorflow_version 2.x 
%load_ext tensorboard

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
 
from tensorflow import keras
from tensorflow.keras import layers, Model
from tensorflow.keras.applications import VGG16
from tensorflow.keras.applications.vgg16 import preprocess_input
from tensorflow.keras.datasets import cifar10
from tensorflow.image import resize

from datetime import datetime
import time

In [None]:
NUM_CLASSES = 10
EPOCHS = 20
IMG_SIZE = 32
IMG_SHAPE = (IMG_SIZE, IMG_SIZE, 3)

# 1. Loading and preprocessing data

In [None]:
(train_images, train_labels), (test_images, test_labels) = cifar10.load_data()

train_images = preprocess_input(train_images)
test_images = preprocess_input(test_images)

Downloading data from https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz


## 1.1 Subsetting images (If needed)

In [None]:
def count_unique(train_labels):
  unique, counts = np.unique(train_labels, return_counts=True)
  return dict(zip(unique, counts))

In [None]:
num_per_class = 1000

In [None]:
subset_train_images = np.empty((num_per_class*10, 32, 32, 3))
subset_train_labels = np.empty((num_per_class*10, 1))

for i in range(10): 
  indices = np.random.choice(np.where(train_labels == i)[0], num_per_class, replace = False)
  subset_train_images[i*num_per_class: (i+1)*num_per_class] = train_images[indices]
  subset_train_labels[i*num_per_class: (i+1)*num_per_class] = train_labels[indices]

count_unique(subset_train_labels)

{0.0: 1000,
 1.0: 1000,
 2.0: 1000,
 3.0: 1000,
 4.0: 1000,
 5.0: 1000,
 6.0: 1000,
 7.0: 1000,
 8.0: 1000,
 9.0: 1000}

## 1.2 Convert images into tf tensors

### 1.2.1 Full dataset

In [None]:
train_labels = keras.utils.to_categorical(train_labels)
test_labels = keras.utils.to_categorical(test_labels)

train_images = tf.convert_to_tensor(train_images)
test_images = tf.convert_to_tensor(test_images)

train_labels = tf.convert_to_tensor(train_labels)
test_labels = tf.convert_to_tensor(test_labels)

print(train_images.shape, train_labels.shape)

(50000, 32, 32, 3) (50000, 10)


### 1.2.2 Subset of dataset

In [None]:
def unison_shuffled_copies(a, b):
    assert len(a) == len(b)
    p = np.random.permutation(len(a))
    return a[p,], b[p,]

In [None]:
train_images, train_labels = unison_shuffled_copies(subset_train_images, subset_train_labels)

In [None]:
train_labels = keras.utils.to_categorical(train_labels)
test_labels = keras.utils.to_categorical(test_labels)

train_images = tf.convert_to_tensor(train_images, dtype=tf.dtypes.float32)
test_images = tf.convert_to_tensor(test_images)

train_labels = tf.convert_to_tensor(train_labels)
test_labels = tf.convert_to_tensor(test_labels)

print(train_images.shape, train_labels.shape)

(10000, 32, 32, 3) (10000, 10)


# 2. Base model - VGG16

In [None]:
vgg_model = VGG16(input_shape = IMG_SHAPE,
                  include_top = False,
                  weights = 'imagenet')
                  
vgg_model.trainable = False

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/vgg16/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5


In [None]:
layers_to_use = [l for l in vgg_model.layers]

# layers_to_use.insert(1, layers.UpSampling2D(size=(7, 7)))
layers_to_use.insert(1, layers.Lambda(lambda image: tf.image.resize(image, size=(224, 224))))
layers_to_use.append(layers.Flatten())
layers_to_use.append(layers.Dense(4096, activation = 'relu', name = 'first_dense'))
layers_to_use.append(layers.Dense(4096, activation = 'relu', name = 'second_dense'))
layers_to_use.append(layers.Dense(10, activation = 'softmax', name = 'output_layer'))

base_model = keras.Sequential(
                              layers_to_use
                              )

In [None]:
mix_layer = 12

In [None]:
intermediate_layer_model = keras.Sequential(layers_to_use[0:mix_layer])

In [None]:
output_model = keras.Sequential(layers_to_use[mix_layer:])

# 3. Extended model

## 3.1 For mixup training

In [None]:
input_layer = layers.Input(25088)
first_dense = layers.Dense(4096, activation = 'relu')(input_layer)
second_dense = layers.Dense(4096, activation = 'relu')(first_dense)
output_layer = layers.Dense(10, activation = 'softmax')(second_dense)

model = keras.Model(
                    inputs = input_layer,
                    outputs = output_layer
                    )

In [None]:
model.load_weights('/content/drive/My Drive/model.h5')

In [None]:
for x_batch_val, y_batch_val in val_dataset:
    test_step(x_batch_val, y_batch_val)

print("Validation acc:", (float(val_acc_metric.result())))
val_acc_metric.reset_states()

Validation acc: 0.8551999926567078


## 3.2 For regular training

In [None]:
layers_to_use.extend((layers.Dense(4096, activation = 'relu'),
                          layers.Dense(4096, activation = 'relu'),
                          layers.Dense(10, activation = 'softmax')))

model = keras.Sequential(
                          layers_to_use 
                          )

# 4. Training with Mixup

## 4.1 Mixup with alpha for each sample of the batch

### 4.1.1 Mixup function

In [None]:
@tf.function
def data_mixup(data, labels):

    '''This function performs mixup of data and labels.
    It uses an alpha value for each sample in the batch
    data = input data
    labels = labels of input data
    '''

    num_data = int(data.shape[0]/2)

    data = tf.reshape(data, shape=(num_data, 2, *[shape for shape in data.shape[1:]]))

    labels = tf.reshape(labels, shape=(num_data, 2, 10))

    alpha = np.random.beta(0.5, 0.5, (1, num_data))
    alpha_mat = tf.convert_to_tensor(np.concatenate((alpha, (1-alpha)), axis=0), dtype = tf.dtypes.float32)

    data_list = []
    labels_list = []
    
    for i in range(num_data):
      alpha_mat[:,i]
      data_list.append((tf.tensordot(data[i], alpha_mat[:,i], axes = [0, 0])))
      labels_list.append((tf.tensordot(labels[i], alpha_mat[:,i], axes=[0, 0])))


    mixed_data = tf.stack(data_list)
    mixed_labels = tf.stack(labels_list)

    return mixed_data, mixed_labels

### 4.1.2 Training

In [None]:
BATCH = 64

train_dataset = tf.data.Dataset.from_tensor_slices((train_images, train_labels))
train_dataset = train_dataset.shuffle(buffer_size=1024).batch(BATCH)

val_dataset = tf.data.Dataset.from_tensor_slices((test_images, test_labels))
val_dataset = val_dataset.batch(64)

In [None]:
optimizer = tf.train.experimental.enable_mixed_precision_graph_rewrite(keras.optimizers.SGD(learning_rate = 0.001))
loss_fn = keras.losses.BinaryCrossentropy()

train_acc_metric = keras.metrics.CategoricalAccuracy()
val_acc_metric = keras.metrics.CategoricalAccuracy()

In [None]:
@tf.function
def train_step(x, y):

    with tf.GradientTape() as tape:

        x = intermediate_layer_model(x)
        x, y = data_mixup(x, y)
        model_output = output_model(x, training=True)

        loss_value = loss_fn(y, model_output)

    grads = tape.gradient(loss_value, base_model.trainable_weights)
    optimizer.apply_gradients(zip(grads, base_model.trainable_weights))
    train_acc_metric.update_state(y, model_output)
    return loss_value

@tf.function
def test_step(x, y):
    model_output_val = base_model(x, training=False)
    val_acc_metric.update_state(y, model_output_val)


In [None]:
train_acc_history = np.empty(EPOCHS)
val_acc_history = np.empty(EPOCHS)
loss_value_history = np.empty(EPOCHS)

for epoch in range(EPOCHS):
    print("\nStart of epoch %d" % (epoch,))
    start_time = time.time()

    for step, (x_batch_train, y_batch_train) in enumerate(train_dataset):   
        
        # x_batch_train = base_model(x_batch_train)
        # x_batch_train_mixed, y_batch_train_mixed = data_mixup(x_batch_train, y_batch_train, mixup='manifold')   

        # loss_value = train_step(x_batch_train_mixed, y_batch_train_mixed)
        loss_value = train_step(x_batch_train, y_batch_train)
      

        if step % 100 == 0 and step > 0:
            print("Training loss (for one batch)", (float(loss_value)))
            print("Training accuracy so far", float(train_acc_metric.result()))
            print("Seen so far: %s samples" % (step * BATCH))

    train_acc_history[epoch] = train_acc_metric.result()
    loss_value_history[epoch] = loss_value

    train_acc_metric.reset_states()

    for x_batch_val, y_batch_val in val_dataset:
        test_step(x_batch_val, y_batch_val)

    print("Validation acc:", (float(val_acc_metric.result())))
    print("Time taken:", (time.time() - start_time))
    val_acc_history[epoch] = val_acc_metric.result()
    val_acc_metric.reset_states()

In [None]:
model.save_weights('/content/drive/My Drive/Colab Notebooks/model_man_full_allalpha_20epochs.csv')

In [None]:
# Saving history in csv. format

hist_df = pd.DataFrame(history.history) 
 
hist_csv_file = '/content/drive/My Drive/Colab Notebooks/history.csv'
with open(hist_csv_file, mode='w') as f:
    hist_df.to_csv(f)

## 4.2 Single alpha mixup training

### 4.2.1 Mixup function

In [None]:
@tf.function
def data_mixup_single_alpha(data, labels):

    '''This function performs mixup of data and labels.
    It uses an alpha value for each sample in the batch
    data = input data
    labels = labels of input data
    '''

    num_data = int(data.shape[0]/2)

    data = tf.reshape(data, shape=(num_data, 2, *[shape for shape in data.shape[1:]]))
      
    labels = tf.reshape(labels, shape=(num_data, 2, 10))

    # alpha = np.random.uniform(0, 1, (1))
    alpha = np.random.beta(0.5, 0.5, (1))
    alpha_mat = tf.convert_to_tensor(np.concatenate((alpha, (1-alpha)), axis=0), dtype = tf.dtypes.float32)

    mixed_data = tf.tensordot(data, alpha_mat,axes=[1,0])
    mixed_labels = tf.tensordot(labels, alpha_mat,axes=[1,0])


    return mixed_data, mixed_labels

### 4.2.2 Training

In [None]:
BATCH = 64

train_dataset = tf.data.Dataset.from_tensor_slices((train_images, train_labels))
train_dataset = train_dataset.shuffle(buffer_size=1024).batch(BATCH)

val_dataset = tf.data.Dataset.from_tensor_slices((test_images, test_labels))
val_dataset = val_dataset.batch(64)

In [None]:
optimizer = tf.train.experimental.enable_mixed_precision_graph_rewrite(keras.optimizers.SGD(learning_rate = 0.0001))
loss_fn = keras.losses.BinaryCrossentropy()

train_acc_metric = keras.metrics.CategoricalAccuracy()
val_acc_metric = keras.metrics.CategoricalAccuracy()

In [None]:
@tf.function
def train_step(x, y):

    with tf.GradientTape() as tape:

        x = intermediate_layer_model(x)
        x, y = data_mixup(x, y)
        model_output = output_model(x, training=True)

        loss_value = loss_fn(y, model_output)

    grads = tape.gradient(loss_value, base_model.trainable_weights)
    optimizer.apply_gradients(zip(grads, base_model.trainable_weights))
    train_acc_metric.update_state(y, model_output)
    return loss_value

@tf.function
def test_step(x, y):
    model_output_val = base_model(x, training=False)
    val_acc_metric.update_state(y, model_output_val)


In [None]:
train_acc_history = np.empty(EPOCHS)
val_acc_history = np.empty(EPOCHS)
loss_value_history = np.empty(EPOCHS)


for epoch in range(EPOCHS):
    print("\nStart of epoch %d" % (epoch,))
    start_time = time.time()

    for step, (x_batch_train, y_batch_train) in enumerate(train_dataset):   

        # x_batch_train_mixed, y_batch_train_mixed = data_mixup_single_alpha(x_batch_train, y_batch_train)   

        loss_value = train_step(x_batch_train, y_batch_train)
      

        if step % 100 == 0 and step > 0:
            print("Training loss (for one batch)", (float(loss_value)))
            print("Training accuracy so far", float(train_acc_metric.result()))
            print("Seen so far: %s samples" % (step * BATCH))

    train_acc_history[epoch] = train_acc_metric.result()
    loss_value_history[epoch] = loss_value

    train_acc_metric.reset_states()

    for x_batch_val, y_batch_val in val_dataset:
        test_step(x_batch_val, y_batch_val)

    print("Validation acc:", (float(val_acc_metric.result())))
    print("Time taken:", (time.time() - start_time))
    val_acc_history[epoch] = val_acc_metric.result()
    val_acc_metric.reset_states()

In [None]:
# Saving history in csv. format

hist_df = pd.DataFrame(history.history) 
 
hist_csv_file = '/content/drive/My Drive/Colab Notebooks/history.csv'
with open(hist_csv_file, mode='w') as f:
    hist_df.to_csv(f)

# 5. Training without mixup

In [None]:
model.compile(optimizer = keras.optimizers.SGD(learning_rate = 0.001), 
              loss = keras.losses.CategoricalCrossentropy(), 
              metrics = keras.metrics.CategoricalAccuracy())

In [None]:
logdir="logs/fit/" + datetime.now().strftime("%Y%m%d-%H%M%S")
tensorboard_callback = keras.callbacks.TensorBoard(log_dir=logdir)

start = time.time()
history = model.fit(
                    train_images,
                    train_labels, 
                    batch_size=64,
                    epochs=1, 
                    callbacks=[tensorboard_callback],
                    validation_data=(test_images, test_labels)
                    )

end = time.time()
print(end - start)

In [None]:
# Saving history in csv. format

hist_df = pd.DataFrame(history.history) 
 
hist_csv_file = '/content/drive/My Drive/Colab Notebooks/history.csv'
with open(hist_csv_file, mode='w') as f:
    hist_df.to_csv(f)

In [None]:
%tensorboard --logdir logs

# 6. Plotting

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt


In [None]:
acc = history.history['categorical_accuracy']
val_acc = history.history['val_categorical_accuracy']

loss=history.history['loss']
val_loss=history.history['val_loss']

epochs_range = range(EPOCHS)

plt.figure(figsize=(8, 8))
plt.subplot(2, 1, 1)
plt.plot(epochs_range, acc, label='Training Accuracy')
plt.plot(epochs_range, val_acc, label='Validation Accuracy')
plt.legend(loc='lower right')
plt.title('Training and Validation Accuracy')

plt.subplot(2, 1, 2)
plt.plot(epochs_range, loss, label='Training Loss')
plt.plot(epochs_range, val_loss, label='Validation Loss')
plt.legend(loc='upper right')
plt.title('Training and Validation Loss')
plt.show()

In [None]:
y = np.arange(1, 21)

plt.figure(figsize=(10,8))
plt.plot(y, reg_pd['5000'], label='5000 per class')
plt.plot(y, reg_pd['4000'], label='4000 per class')
plt.plot(y, reg_pd['3000'], label='3000 per class')
plt.plot(y, reg_pd['2000'], label='2000 per class')
plt.plot(y, reg_pd['1000'], label='1000 per class')
plt.annotate(s = str(0.860),  xy=(19, 0.87), fontsize=12)
plt.annotate(s = str(0.856),  xy=(19, 0.85), fontsize=12)
plt.annotate(s = str(0.822),  xy=(19, 0.83), fontsize=12)
plt.annotate(s = str(0.816),  xy=(19, 0.80), fontsize=12)
plt.annotate(s = str(0.770),  xy=(19, 0.77), fontsize=12)
plt.xticks(y , ('1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13', '14', '15', '16', '17', '18', '19', '20'))
plt.legend(loc='lower right')
plt.title('Accuracy per epoch (Mixup)', fontsize=22)
plt.ylabel('Accuracy in percent', fontsize=18)
plt.xlabel('Epoch', fontsize=18)
plt.ylim([0.4, 1])
plt.tick_params(axis='both', which='major', labelsize=12)
plt.show()


In [None]:
y = np.arange(1, 21)

plt.figure(figsize=(10,8))
plt.plot(y, noreg_pd['5000'], label='5000 per class')
plt.plot(y, noreg_pd['4000'], label='4000 per class')
plt.plot(y, noreg_pd['3000'], label='3000 per class')
plt.plot(y, noreg_pd['2000'], label='2000 per class')
plt.plot(y, noreg_pd['1000'], label='1000 per class')
plt.annotate(s = str(0.864),  xy=(19, 0.87), fontsize=12)
plt.annotate(s = str(0.858),  xy=(19, 0.85), fontsize=12)
plt.annotate(s = str(0.842),  xy=(19, 0.829), fontsize=12)
plt.annotate(s = str(0.833),  xy=(19, 0.81), fontsize=12)
plt.annotate(s = str(0.791),  xy=(19, 0.78), fontsize=12)
plt.xticks(y, ('1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13', '14', '15', '16', '17', '18', '19', '20'))
plt.legend(loc = 'lower right')
plt.title('Accuracy per epoch (No Mixup)', fontsize=22)
plt.ylabel('Accuracy in percent', fontsize=18)
plt.xlabel('Epoch', fontsize=18)
plt.ylim([0.4, 1])
plt.tick_params(axis='both', which='major', labelsize=12)
plt.show()


In [None]:
plt.figure(figsize=(10,8))
plt.plot([5000, 4000, 3000, 2000, 1000], final['No Mixup'][0:5], label='No Mixup')
plt.plot([5000, 4000, 3000, 2000, 1000], final['Mixup'][0:5], label='Mixup')
plt.annotate(s = str(0.860),  xy=(4930, 0.857), fontsize=12)
plt.annotate(s = str(0.856),  xy=(4000, 0.850), fontsize=12)
plt.annotate(s = str(0.822),  xy=(3000, 0.822), fontsize=12)
plt.annotate(s = str(0.816),  xy=(2000, 0.81), fontsize=12)
plt.annotate(s = str(0.770),  xy=(1000, 0.77), fontsize=12)
plt.annotate(s = str(0.864),  xy=(4930, 0.868), fontsize=12)
plt.annotate(s = str(0.858),  xy=(4000, 0.863), fontsize=12)
plt.annotate(s = str(0.842),  xy=(3000, 0.845), fontsize=12)
plt.annotate(s = str(0.833),  xy=(2000, 0.835), fontsize=12)
plt.annotate(s = str(0.791),  xy=(1000, 0.792), fontsize=12)
plt.xticks([5000, 4000, 3000, 2000, 1000], ('5000','4000', '3000', '2000', '1000'))
plt.legend(loc = 'lower right')
plt.title('Last epoch accuracy per subset size', fontsize=22)
plt.ylabel('Accuracy in percent', fontsize=18)
plt.xlabel('Subset size', fontsize=18)
plt.ylim([0.6, 1])
plt.tick_params(axis='both', which='major', labelsize=12)
plt.show()
