In [None]:
!git clone https://github.com/seshuad/IMagenet

In [1]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import skimage.transform
import time, os, datetime
from Model import Residual_Unit
from Model import Attention_Block

In [2]:
from tensorflow.keras.datasets import cifar10
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.optimizers import Adam, SGD
from tensorflow.keras.callbacks import ReduceLROnPlateau, EarlyStopping,TensorBoard, LearningRateScheduler
from tensorflow.keras.layers import Input
from tensorflow.keras.regularizers import l2
from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras.layers import Conv2D
from tensorflow.keras.layers import MaxPooling2D
from tensorflow.keras.layers import Activation
from tensorflow.keras.layers import AveragePooling2D
from tensorflow.keras.layers import Flatten
from tensorflow.keras.layers import Dropout
from tensorflow.keras.layers import Dense
from tensorflow.keras.models import Model

In [7]:
from TinyImageNet10000 import get_data
from TinyImageNet10000 import get_id_dictionary

train_data, train_labels, test_data, test_labels = get_data(get_id_dictionary())

indx = np.arange(50000)
np.random.shuffle(indx)
train_data = train_data[indx, ]
train_labels = train_labels[indx, ]
x_train = train_data[0:45000,]
y_train = train_labels[0:45000, ]
x_val = train_data[-5000:,]
y_val = train_labels[-5000:,]
x_test = test_data
y_test = test_labels

print("train data shape:",  x_train.shape)
print("train label shape:", y_train.shape)
print("validation data shape:",  x_val.shape)
print("validation label shape:", y_val.shape)
print("test data shape:",   x_test.shape)
print("test_labels.shape:", y_test.shape)

starting loading data
finished loading data, in 45.37041354179382 seconds
train data shape: (45000, 64, 64, 3)
train label shape: (45000, 200)
validation data shape: (5000, 64, 64, 3)
validation label shape: (5000, 200)
test data shape: (10000, 64, 64, 3)
test_labels.shape: (10000, 200)


In [8]:
# define generators for training and validation data
train_datagen = ImageDataGenerator(
    featurewise_center=True,
    featurewise_std_normalization=True,
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    validation_split=0.2)

val_datagen = ImageDataGenerator(
    featurewise_center=True,
    featurewise_std_normalization=True)

test_datagen = ImageDataGenerator(
    featurewise_center=True,
    featurewise_std_normalization=True)

# compute quantities required for feature normalization
train_datagen.fit(x_train)
val_datagen.fit(x_val)
test_datagen.fit(x_test)

In [9]:
def AttentionResNet56(shape, in_channel, kernel_size, n_classes, dropout=None, regularization=0.01):

    """
    :param shape: The tuple of input data.
    :param in_channel: The 4-th dimension (channel number) of input weight matrix. For example, in_channel=3 means the input contains 3 channels.
    :param kernel_size: Integer. the shape of the kernel. For example, default kernel_size = 3 means you have a 3*3 kernel.
    :param n_classes: Integer. The number of target classes. For example, n_classes = 10 means you have 10 class labels.
    :param dropout: Float between 0 and 1. Fraction of the input units to drop.
    :param regularization: Float. Fraction of the input units to drop.
    """

    input_data = Input(shape=shape)  # 32x32x3
    x = Conv2D(in_channel, kernel_size=kernel_size, strides=2, padding='same')(input_data)  # 32x32x32
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    x = MaxPooling2D(pool_size=2, padding='same')(x)  # 16x16x32

    out_channel = in_channel * 4  # 256
    x = Residual_Unit(x, in_channel, out_channel)  # 16x16x128
    x = Attention_Block(x, skip=2)

    in_channel = out_channel // 2  # 64
    out_channel = in_channel * 4  # 256
    x = Residual_Unit(x, in_channel, out_channel, stride=2)  # 8x8x256
    x = Attention_Block(x, skip=1)

    in_channel = out_channel // 2  # 128
    out_channel = in_channel * 4  # 512
    x = Residual_Unit(x, in_channel, out_channel, stride=2)  # 4x4x512
    x = Attention_Block(x, skip=1)

    in_channel = out_channel // 2  # 256
    out_channel = in_channel * 4  # 1024
    x = Residual_Unit(x, in_channel, out_channel, stride=1)  # 4x4x1024
    x = Residual_Unit(x, in_channel, out_channel)  # 4x4x1024
    x = Residual_Unit(x, in_channel, out_channel)  # 4x4x1024

    x = AveragePooling2D(pool_size=4, strides=1)(x)  # 1x1x1024
    x = Flatten()(x)

    output = Dense(n_classes, kernel_regularizer=l2(regularization), activation='softmax')(x)
    model = Model(input_data, output)

    return model

In [6]:
model = AttentionResNet56(shape=(64,64,3), in_channel=64, kernel_size=7, n_classes=200, dropout=0.4, regularization=0.0001)

In [7]:
def lr_schedule(epoch):
    lr = 1e-2
    if epoch > 80:
        lr *= 1e-5
    elif epoch > 60:
        lr *= 1e-4
    elif epoch > 40:
        lr *= 1e-2
    elif epoch >20:
        lr *= 1e-1
    print('Learning rate:', lr)
    return lr
lr_scheduler = LearningRateScheduler(lr_schedule)

# define loss, metrics, optimizer
optimizer = Adam(lr = lr_schedule(0) )
model.compile(optimizer, loss='categorical_crossentropy', metrics=['accuracy'])

Learning rate: 0.01


In [9]:
batch_size = 512
epc = 60

start = time.time()

# training
train_generator = train_datagen.flow(x_train, y_train, batch_size=batch_size)
step_size_train = train_generator.n // train_generator.batch_size

# test
test_generator = test_datagen.flow(x_test, y_test, batch_size=batch_size)
step_size_test = test_generator.n // test_generator.batch_size

# usefull callbacks
log_dir='Logs/' + '56_ImageNet'
tensorboard_callback = TensorBoard(log_dir=log_dir, histogram_freq=1)
lr_reducer = ReduceLROnPlateau(monitor='val_accuracy', factor=0.1, patience=5, verbose=1)
early_stopper = EarlyStopping(monitor='val_accuracy', patience=15, verbose=1)

model.fit_generator(train_generator,
                    steps_per_epoch = step_size_train,
                    epochs = epc,
                    validation_data = test_generator,
                    validation_steps = step_size_test,
                    callbacks=[tensorboard_callback, lr_reducer, lr_scheduler, early_stopper])

end = time.time()
print("Time taken by above cell is {}.".format((end-start)/60))

Learning rate: 0.01
Epoch 1/60
  1/195 [..............................] - ETA: 28:05 - loss: 5.0031 - accuracy: 0.0488

ResourceExhaustedError: OOM when allocating tensor with shape[512,2048,4,4] and type float on /job:localhost/replica:0/task:0/device:GPU:0 by allocator GPU_0_bfc [Op:FusedBatchNormV3]