<a href="https://colab.research.google.com/gist/T-STAR-LTD/65a4cfbb6d554e2c8309a0ed3c9883e5/cifar10-99.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install tensorflow_addons

Looking in indexes: https://pypi.org/simple, https://pypi.ngc.nvidia.com
[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.3.2[0m[39;49m -> [0m[32;49m24.0[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpython -m pip install --upgrade pip[0m


In [2]:

import os
import tensorflow as tf
from tensorflow.keras.utils import to_categorical
from tensorflow.keras import backend as K

import tensorflow_addons as tfa
import matplotlib.pyplot as plt
import numpy as np


@tf.function
def image_augmentation(images,
                       width_shift_range=0., height_shift_range=0.,
                       rotation_range = 0.0, 
                       horizontal_flip=False,
                       vertical_flip=False,
                       cval=0.0, cutout_size=None, cutout_num=1, 
                       random_color_p=0.0):

    img_shape = images.shape[-3:]
    img_width = img_shape[1]
    img_height = img_shape[0]
    interpolation  = 'BILINEAR' if rotation_range!=0.0 else 'NEAREST'

    def transform(image):
        mirror_x = tf.cast(tf.random.uniform(shape=[], minval=0, maxval=2, dtype=tf.dtypes.int32)*2-1, tf.float32) if horizontal_flip else 1.0
        mirror_y = tf.cast(tf.random.uniform(shape=[], minval=0, maxval=2, dtype=tf.dtypes.int32)*2-1, tf.float32) if vertical_flip else 1.0
        width_shift = tf.random.uniform(shape=[], minval=-width_shift_range, maxval=width_shift_range)*img_width
        height_shift = tf.random.uniform(shape=[], minval=-height_shift_range, maxval=height_shift_range)*img_width
        zoom_x = 1.0
        zoom_y = 1.0
        center = img_width/2

        angle = tf.random.uniform(shape=[], minval=-rotation_range, maxval=rotation_range)*3.141519/180
        sinval = tf.sin(angle)
        cosval = tf.cos(angle)
        center_mat = [1.0, 0.0, center, 0.0, 1.0, center, 0.0, 0.0]
        rotate_mat = [cosval, -sinval, 0.0, sinval, cosval, 0.0, 0.0, 0.0]
        zoom_mat = [zoom_x*mirror_x, 0.0, 0.0, 0.0, zoom_y*mirror_y, 0.0, 0.0, 0.0]
        center_mat_inv = [1.0, 0.0, width_shift-center, 0.0, 1.0, height_shift-center, 0.0, 0.0]
        matrix = [center_mat, rotate_mat, zoom_mat, center_mat_inv]
        composed_matrix = tfa.image.transform_ops.compose_transforms(matrix)
        (h, w, c) = (img_shape[0], img_shape[1], img_shape[2])
        images = tf.reshape( image, [1, h, w, c] )
        images = tf.raw_ops.ImageProjectiveTransformV2(
            images=images, transforms=composed_matrix, output_shape=[h, w], 
            fill_mode='REFLECT', interpolation=interpolation)
        image = tf.reshape( images, [h, w, c] )
        return image

    def cutout(image, cval=0, cnum = 1, csize = 0.25):
        DIM = image.shape[0]
        for k in range( cnum ):
            x = tf.cast( tf.random.uniform([],0,DIM),tf.int32)
            y = tf.cast( tf.random.uniform([],0,DIM),tf.int32)

            WIDTH = tf.cast( csize*DIM,tf.int32)
            ya = tf.math.maximum(0,y-WIDTH//2)
            yb = tf.math.minimum(DIM,y+WIDTH//2)
            xa = tf.math.maximum(0,x-WIDTH//2)
            xb = tf.math.minimum(DIM,x+WIDTH//2)

            one = image[ya:yb,0:xa,:]
            two = tf.fill([yb-ya,xb-xa,3], tf.cast(cval, image.dtype) ) 
            three = image[ya:yb,xb:DIM,:]
            middle = tf.concat([one,two,three],axis=1)
            image = tf.concat([image[0:ya,:,:],middle,image[yb:DIM,:,:]],axis=0)

        image = tf.reshape(image,[DIM,DIM,3])
        return image
    
    def random_color(image, prob):
        img_tmp = tf.image.random_contrast(image, 0.8, 1.2)
        img_tmp = tf.image.random_saturation(img_tmp, 0.5, 1.5)
        # img_tmp = tf.clip_by_value(img_tmp, 0, 255)
        p = tf.random.uniform([],0.0,1.0)
        return tf.where(p<prob, tf.cast(img_tmp, image.dtype) , image)

    images = tf.map_fn(lambda image: transform(image), images)

    if cutout_size!=None and cutout_num!=0:
        images = tf.map_fn(lambda image: cutout(image,cval=cval, csize=cutout_size, cnum=cutout_num), images)

    if random_color_p!=0.0:
        images = tf.map_fn( lambda image: random_color(image, prob=random_color_p), images)

    return images



def showHistory(history):

    # Setting Parameters
    acc = history['acc']
    val_acc = history['val_acc']
    max_acc = max(val_acc)

    loss = history['loss']
    val_loss = history['val_loss']

    epochs = range(len(acc))

    plt.figure(figsize=(16,6))
 
    # Accracy
    plt.subplot(1,2,1)
    plt.plot(epochs, acc, 'r', label='Training')
    plt.plot(epochs, val_acc, 'b', label='Validation')
    plt.title('Accuracy')
    plt.grid()
    plt.legend()

    # Loss 
    plt.subplot(1,2,2)
    plt.plot(epochs, loss, 'r', label='Training')
    plt.plot(epochs, val_loss, 'b', label='Validation')
    plt.title('Loss')
    if max(max(loss),max(val_loss))>10.0:
        plt.ylim(0.0,10.0)
    plt.grid()
    plt.legend()
    plt.show()

class TransferTrainer:

    def make_dataset( self,train_data, validation_data, batch_size):
        (x_train, label_train)= train_data
        (x_test, label_test)=validation_data
        
        train_len = len(x_train)
        test_len = len(x_test)

        ds_train = tf.data.Dataset.from_tensor_slices(train_data)
        ds_train = ds_train.shuffle(train_len).batch(batch_size,drop_remainder=True)
        ds_validation = tf.data.Dataset.from_tensor_slices(validation_data)
        ds_validation = ds_validation.batch(batch_size)

        self.batch_size=batch_size
        self.ds_train = ds_train
        self.ds_validation = ds_validation

    def build_model( self, model_info, input_shape=(32,32,3),  num_classes=10, dropout=0.25,resolution=1.0):
        model_class, preprocess, original_input_size = model_info
        original_input_size = round(resolution*original_input_size)//8*8
        base_input_shape = (original_input_size,original_input_size,3)
        self.base_model = model_class(include_top=False, input_shape=base_input_shape, weights='imagenet')
        
        x = inputs = tf.keras.layers.Input(shape=input_shape)
        if input_shape != base_input_shape:
            x = tf.keras.layers.Lambda(lambda image: tf.image.resize(image, base_input_shape[0:2]), 
                                       output_shape=base_input_shape, name='stem_resize')(x)
        x = self.base_model(x, training=False)
        x = tf.keras.layers.GlobalAveragePooling2D()(x)
        x = tf.keras.layers.Dropout(dropout)(x)
        x = tf.keras.layers.Dense(num_classes)(x)
        outputs = tf.keras.layers.Activation('softmax')(x)
        self.model = tf.keras.Model(inputs, outputs)
        self.preprocess = preprocess

    def _get_augmented_dateset(self, dataset, augment=True, preprocess=None, cutout_num=0, cutout_size=0.25, shift_range=0.0, rotation_range=0.0, random_color= 0.0):
           
        if augment:
            def data_augmentation(image,label):
                image = image_augmentation(image, shift_range, shift_range, rotation_range, 
                                           horizontal_flip=True, cval=127.0, cutout_size=cutout_size, cutout_num=cutout_num,
                                           random_color_p=random_color )
                return image, label
            dataset = dataset.map(lambda image, label: data_augmentation(image, label ))

        if preprocess != None:
            def data_preprocesssing(image,label):
                image = tf.cast(image, tf.float32)
                image = preprocess(image)
                return image, label
            dataset = dataset.map( lambda image, label: data_preprocesssing(image, label) )

        dataset = dataset.prefetch(tf.data.experimental.AUTOTUNE)
        return dataset

    def train(self,burn_in_epochs, finetuning_epochs, opt_class, opt_kwargs, lr_scheduler, burnin_lr=1e-1, 
              freeze_ratio=0.0, cutout_num=1, cutout_size=0.25, shift_range=0.2, random_color=1.0, label_smoothing=0.0, steps_per_execution=None):
        loss = tf.keras.losses.CategoricalCrossentropy(label_smoothing=label_smoothing)
        acc = tf.keras.metrics.CategoricalAccuracy(name='acc')
        
        compile_kwargs = { 'loss':loss,'metrics':[acc]}
        if steps_per_execution!=None:
            compile_kwargs['steps_per_execution']=steps_per_execution

        #Train the top layer only
        self.base_model.trainable = False
        optimizer = opt_class(learning_rate=burnin_lr, **opt_kwargs)
        self.model.compile(optimizer=optimizer, **compile_kwargs)
        ds_train      = self._get_augmented_dateset(self.ds_train, True, self.preprocess, cutout_num, cutout_size , shift_range, random_color=random_color)
        ds_validation = self._get_augmented_dateset(self.ds_validation, False, self.preprocess)
        result = self.model.fit(ds_train, epochs=burn_in_epochs, validation_data=ds_validation)
        self.history = result.history

        # Unfreeze        
        self.base_model.trainable = True
        if freeze_ratio!=0.0:
            freeze_num = int(len(self.base_model.layers)*freeze_ratio)
            for i,layer in enumerate(self.base_model.layers):
                layer.trainable=False
                if i==freeze_num:
                    break

        #Train for fine-tuning
        optimizer = opt_class(**opt_kwargs)
        self.model.compile(optimizer=optimizer,  **compile_kwargs  )
        ds_train      = self._get_augmented_dateset(self.ds_train, True, self.preprocess, cutout_num, cutout_size, shift_range, random_color=random_color)
        ds_validation = self._get_augmented_dateset(self.ds_validation, False, self.preprocess)
        result = self.model.fit(ds_train, epochs=finetuning_epochs,
                            validation_data=ds_validation, callbacks=[lr_scheduler])
        for k in self.history.keys():
          self.history[k].extend(result.history[k])

        showHistory(self.history)





2024-03-03 02:18:00.397162: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-03-03 02:18:00.397223: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-03-03 02:18:00.398042: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-03-03 02:18:00.403492: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.

TensorFlow Addons (TFA) has ended development and in

In [3]:
(x_train,y_train), (x_test,y_test) = tf.keras.datasets.cifar10.load_data()

y_train = to_categorical(y_train)
y_test = to_categorical(y_test)

train_data, validation_data = (x_train,y_train), (x_test,y_test)

In [4]:
import time
import datetime
import pickle

model_dict ={
    'ResNet50V2' : (tf.keras.applications.ResNet50V2, tf.keras.applications.resnet_v2.preprocess_input, 224), 
    'ResNet101V2' : (tf.keras.applications.ResNet101V2, tf.keras.applications.resnet_v2.preprocess_input, 224), 
    'ResNet152V2' : (tf.keras.applications.ResNet152V2, tf.keras.applications.resnet_v2.preprocess_input, 224), 
    'Xception' : (tf.keras.applications.Xception, tf.keras.applications.xception.preprocess_input, 299),
    'EfficientNetB0' : (tf.keras.applications.EfficientNetB0, tf.keras.applications.efficientnet.preprocess_input, 224),
    'EfficientNetB1' : (tf.keras.applications.EfficientNetB1, tf.keras.applications.efficientnet.preprocess_input, 240),
    'EfficientNetB2' : (tf.keras.applications.EfficientNetB2, tf.keras.applications.efficientnet.preprocess_input, 260),
    'EfficientNetB3' : (tf.keras.applications.EfficientNetB3, tf.keras.applications.efficientnet.preprocess_input, 300),
    'EfficientNetB4' : (tf.keras.applications.EfficientNetB4, tf.keras.applications.efficientnet.preprocess_input, 380),
    'EfficientNetB5' : (tf.keras.applications.EfficientNetB5, tf.keras.applications.efficientnet.preprocess_input, 456),
    'EfficientNetB6' : (tf.keras.applications.EfficientNetB6, tf.keras.applications.efficientnet.preprocess_input, 528),
    'EfficientNetB7' : (tf.keras.applications.EfficientNetB7, tf.keras.applications.efficientnet.preprocess_input, 600),
    'NASNetMobile' : (tf.keras.applications.NASNetMobile, tf.keras.applications.nasnet.preprocess_input, 224),
    'InceptionResNet' : (tf.keras.applications.InceptionResNetV2, tf.keras.applications.inception_resnet_v2.preprocess_input, 299),
}

def train(use_tpu=False):

    freeze_ratio=0.0
    burnin_epochs = 5


    optimizer= tf.keras.optimizers.SGD
    opt_kwargs = {'momentum':0.9}
    batch_size = 64
    burnin_lr = 0.1
    cutout_num = 2
    cutout_size = 0.4
    random_color = 1.0
    barnin_epochs = 5
    warmup_epochs = 10
    flat_epochs = 5
    cooldown_epochs = 20
    min_lr  = 0.001
    max_lr = 0.025
    dropout = 0.5
    label_smoothing = 0.0
    resolution = 1.0

    # model_name = 'EfficientNetB0'
    model_name = 'EfficientNetB5'

    # model_name = 'ResNet50V2'
    # burnin_lr = burnin_lr/5
    # max_lr = max_lr/5
    # min_lr = min_lr/5


    trainer = TransferTrainer()
    print('make_data')
    trainer.make_dataset(train_data,validation_data, batch_size)

    total_epochs = warmup_epochs+flat_epochs+cooldown_epochs
    message = f'{model_name} {optimizer.__name__} epochs={burnin_epochs}+{warmup_epochs}+{flat_epochs}+{cooldown_epochs} batch_size={batch_size} lr={burnin_lr}/{max_lr}/{min_lr}'
    message += f' cutout={cutout_size}x{cutout_num} resolution={resolution} random_color={random_color} dropout={dropout} label_smoothing={label_smoothing}'

    print('start', message)
    start_time = time.time()
    def scheduler(epoch, lr):
        if epoch < warmup_epochs:
            return min_lr + 0.5*(max_lr-min_lr)*(1.0-np.cos(epoch/warmup_epochs*np.pi))
        elif epoch < warmup_epochs+flat_epochs:
            return max_lr
        else:
            epoch = epoch - (warmup_epochs+flat_epochs) + 1
            return min_lr + 0.5*(max_lr-min_lr)*(1.0+np.cos(epoch/cooldown_epochs*np.pi))

    lr_scheduler = tf.keras.callbacks.LearningRateScheduler(scheduler)

    trainer.build_model( model_dict[model_name], input_shape=(32,32,3), num_classes=10, 
                        resolution=resolution, dropout=dropout)

    trainer.train( barnin_epochs, total_epochs, optimizer, opt_kwargs, 
                    lr_scheduler, burnin_lr, freeze_ratio=freeze_ratio, 
                    cutout_num=cutout_num, cutout_size=cutout_size, random_color=random_color,
                    label_smoothing=label_smoothing, steps_per_execution=10 if use_tpu else 1)

    td = datetime.timedelta( seconds=time.time() - start_time )
    best = max(trainer.history['val_acc'])*100.0
    message = f'\nval_acc={best:.2f}'
    print ('finish', td, message)
    with open(f'{model_name}.history', mode='wb') as f:
        pickle.dump(trainer.history,f)

try:
    tpu = tf.distribute.cluster_resolver.TPUClusterResolver()  # TPU detection
    tf.config.experimental_connect_to_cluster(tpu)
    tf.tpu.experimental.initialize_tpu_system(tpu)
    strategy = tf.distribute.TPUStrategy(tpu) 
except ValueError:
    tpu=None
    strategy = tf.distribute.get_strategy()


with strategy.scope():
    train(tpu!=None)    




make_data


2024-03-03 02:18:04.144010: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:887] could not open file to read NUMA node: /sys/bus/pci/devices/0000:0e:00.0/numa_node
Your kernel may have been built without NUMA support.
2024-03-03 02:18:04.148999: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:887] could not open file to read NUMA node: /sys/bus/pci/devices/0000:0e:00.0/numa_node
Your kernel may have been built without NUMA support.
2024-03-03 02:18:04.149044: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:887] could not open file to read NUMA node: /sys/bus/pci/devices/0000:0e:00.0/numa_node
Your kernel may have been built without NUMA support.
2024-03-03 02:18:04.154015: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:887] could not open file to read NUMA node: /sys/bus/pci/devices/0000:0e:00.0/numa_node
Your kernel may have been built without NUMA support.
2024-03-03 02:18:04.154071: I external/local_xla/xla/stream_executor

start EfficientNetB5 SGD epochs=5+10+5+20 batch_size=64 lr=0.1/0.025/0.001 cutout=0.4x2 resolution=1.0 random_color=1.0 dropout=0.5 label_smoothing=0.0
Epoch 1/5


2024-03-03 02:18:29.084530: I external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:454] Loaded cuDNN version 8907


  1/781 [..............................] - ETA: 3:50:42 - loss: 2.4689 - acc: 0.0469

2024-03-03 02:18:31.148542: I external/local_xla/xla/service/service.cc:168] XLA service 0x7fedeff32c70 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
2024-03-03 02:18:31.148613: I external/local_xla/xla/service/service.cc:176]   StreamExecutor device (0): NVIDIA GeForce RTX 3080, Compute Capability 8.6
I0000 00:00:1709432311.196315   95478 device_compiler.h:186] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/35


2024-03-03 03:02:16.567475: W external/local_tsl/tsl/framework/bfc_allocator.cc:296] Allocator (GPU_0_bfc) ran out of memory trying to allocate 320.59MiB with freed_by_count=0. The caller indicates that this is not a failure, but this may mean that there could be performance gains if more memory were available.
2024-03-03 03:02:16.567537: W tensorflow/core/kernels/gpu_utils.cc:54] Failed to allocate memory for convolution redzone checking; skipping this check. This is benign and only means that we won't check cudnn for out-of-bounds reads and writes. This message will only be printed once.
2024-03-03 03:02:16.597343: W external/local_tsl/tsl/framework/bfc_allocator.cc:296] Allocator (GPU_0_bfc) ran out of memory trying to allocate 320.60MiB with freed_by_count=0. The caller indicates that this is not a failure, but this may mean that there could be performance gains if more memory were available.
2024-03-03 03:02:16.597438: W external/local_tsl/tsl/framework/bfc_allocator.cc:296] Alloc

ResourceExhaustedError: Graph execution error:

Detected at node model/efficientnetb5/block1b_project_conv/Conv2D-0-0-TransposeNCHWToNHWC-LayoutOptimizer defined at (most recent call last):
<stack traces unavailable>
OOM when allocating tensor with shape[64,228,228,24] and type float on /job:localhost/replica:0/task:0/device:GPU:0 by allocator GPU_0_bfc
	 [[{{node model/efficientnetb5/block1b_project_conv/Conv2D-0-0-TransposeNCHWToNHWC-LayoutOptimizer}}]]
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info. This isn't available when running in Eager mode.
 [Op:__inference_train_function_96222]

: 