<a href="https://colab.research.google.com/github/Abinesh-18/-Recognition-and-Image-Retrieval/blob/main/CSE_673_Project_21.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Gather/unzip data
from google.colab import drive
drive.mount('/content/gdrive')

# # !cd /content/gdrive/MyDrive/Recognition-and-Image-Retriveal/ && tar zxvf cars_test.tgz && tar xvzf cars_train.tgz && unzip tiny-imagenet-200.zip

!cp /content/gdrive/MyDrive/Recognition-and-Image-Retriveal/tiny-imagenet-200.zip .
!cp /content/gdrive/MyDrive/Recognition-and-Image-Retriveal/cars_train.tgz .
# !cp /content/gdrive/MyDrive/Recognition-and-Image-Retriveal/cars_test.tgz .

# !tar zxvf cars_test.tgz
!tar zxvf cars_train.tgz
!unzip tiny-imagenet-200.zip

In [None]:
# Imports
import tensorflow as tf
tf.config.run_functions_eagerly(True)

from tensorflow import keras
from tensorflow.keras import regularizers
from tensorflow.keras import layers
from keras.models import Sequential
import numpy as np
import glob
import os
import cv2

from __future__ import division

!pip3 install keras_applications

import six
from keras.models import Model
from keras.layers import Input
from keras.layers import Activation
from keras.layers import Reshape
from keras.layers import Dense
from keras.layers import Conv2D
from keras.layers import MaxPooling2D
from keras.layers import GlobalMaxPooling2D
from keras.layers import GlobalAveragePooling2D
from keras.layers import Dropout
from keras.layers.merge import add
from keras.layers import BatchNormalization
from keras.regularizers import l2
from keras import backend as K
from keras_applications.imagenet_utils import _obtain_input_shape
from tensorflow import reduce_mean
from tensorflow.keras import activations
from keras.layers import Conv3D
from keras.layers import AveragePooling3D
from keras.backend import batch_dot
from sklearn.metrics.pairwise import cosine_similarity



# Data generators + setup

In [None]:
# Dataset generator
class DataGenerator(keras.utils.Sequence):
  def __init__(self, list_IDs, labels, n_channels, n_classes, batch_size=128, dim=(64,64), shuffle=False):
    self.list_IDs = list_IDs
    self.labels = labels
    self.n_channels = n_channels
    self.n_clases = n_classes
    self.batch_size = batch_size
    self.dim = dim
    self.shuffle = shuffle
    self.on_epoch_end()

  def __len__(self):
    # Denotes number of batches per epoch
    return int(np.floor(len(self.list_IDs) / self.batch_size))

  def __getitem__(self, index):
    '''Generate one batch of data'''
    # Generate indexes of the batch
    indexes = self.indexes[index*self.batch_size:(index+1)*self.batch_size]

    # Find list of IDs
    list_IDs_temp = [self.list_IDs[k] for k in indexes]

    # Generate data
    X,y = self.__data_generation(list_IDs_temp)

    return X,y

  def on_epoch_end(self):
    '''Updates indexes after each epoch'''
    self.indexes = np.arange(len(self.list_IDs))
    if self.shuffle == True:
      np.random.shuffle(self.indexes)

  def __data_generation(self, list_IDs_temp):
    '''Generate data'''
    X = np.empty((self.batch_size, *self.dim, self.n_channels))
    y = np.empty((self.batch_size), dtype=int)

    for i,ID in enumerate(list_IDs_temp):
      img = cv2.imread(ID)
      # Resize to 64,64
      img = cv2.resize(img, (64,64))
      # Normalize image
      img = cv2.normalize(img, None, norm_type=cv2.NORM_MINMAX, dtype=cv2.CV_32F)
      X[i,] = img
      y[i] = self.labels[ID]

    return X,y

class DataGenerator1hot(keras.utils.Sequence):
  def __init__(self, list_IDs, labels, n_channels, n_classes, batch_size=128, dim=(64,64), shuffle=False):
    self.list_IDs = list_IDs
    self.labels = labels
    self.n_channels = n_channels
    self.n_clases = n_classes
    self.batch_size = batch_size
    self.dim = dim
    self.shuffle = shuffle
    self.on_epoch_end()

  def __len__(self):
    # Denotes number of batches per epoch
    return int(np.floor(len(self.list_IDs) / self.batch_size))

  def __getitem__(self, index):
    '''Generate one batch of data'''
    # Generate indexes of the batch
    indexes = self.indexes[index*self.batch_size:(index+1)*self.batch_size]

    # Find list of IDs
    list_IDs_temp = [self.list_IDs[k] for k in indexes]

    # Generate data
    X,y = self.__data_generation(list_IDs_temp)

    return X,y

  def on_epoch_end(self):
    '''Updates indexes after each epoch'''
    self.indexes = np.arange(len(self.list_IDs))
    if self.shuffle == True:
      np.random.shuffle(self.indexes)

  def __data_generation(self, list_IDs_temp):
    '''Generate data'''
    X = np.empty((self.batch_size, *self.dim, self.n_channels))
    y = np.empty((self.batch_size, self.n_clases), dtype=int)

    for i,ID in enumerate(list_IDs_temp):
      img = cv2.imread(ID)
      # Resize to 64,64
      img = cv2.resize(img, (64,64))
      # Normalize image
      img = cv2.normalize(img, None, norm_type=cv2.NORM_MINMAX, dtype=cv2.CV_32F)
      X[i,] = img
      y[i] = self.labels[ID]

    return X,y

In [None]:
### Tiny
tiny_train_paths = glob.glob('tiny-imagenet-200/train/**/images/*.JPEG', recursive=True)
tiny_test_paths = glob.glob('tiny-imagenet-200/test/images/*.JPEG')
tiny = {
    'train': [],
    'labels': {}
}

for train_path in tiny_train_paths:
  # Get label
  lbl = train_path.split('/')
  lbl = lbl[2]
  tiny['train'].append(train_path)
  tiny['labels'][train_path] = lbl

labels = list(tiny['labels'].values())
labels = list(set(labels))
labels = sorted(labels)
classes = range(len(labels))

l = dict(zip(labels,classes))

for train_path in tiny_train_paths:
  # Get label
  lbl = train_path.split('/')
  lbl = lbl[2]
  tiny['labels'][train_path] = l[lbl]

# Tiny validation (test data)
tiny_val = {
    'val': [],
    'val_labels': {},
}

with open('tiny-imagenet-200/val/val_annotations.txt', 'r') as f:
  lines = f.readlines()
  for line in lines:
    line = line.split('\t')
    p = 'tiny-imagenet-200/val/images/' + line[0]
    tiny_val['val'].append(p)
    class_name = line[1]
    lbl = l[class_name]
    # val_lbls.append(lbl)
    tiny_val['val_labels'][p] = lbl

# print(tiny['labels'][list(tiny['labels'].keys())[0]])
# print(len(tiny['labels'].keys()))
# print(len(tiny['train']))
# print(tiny_val['val_labels'])

print("Train samples: {}".format(len(tiny['train'])))
print("Validation samples: {}".format(len(tiny_val['val'])))
print("Number of classes: {}".format(len(labels)))
print("")
print("Validation data formats:")
# print("IDs:", tiny_val['val'])
# print("Labels:", tiny_val['val_labels'])

Train samples: 99997
Validation samples: 10000
Number of classes: 200

Validation data formats:


In [None]:
# Make generators
tiny_train_generator = DataGenerator(tiny['train'], tiny['labels'], n_channels=3, n_classes=200, batch_size=128, dim=(64,64), shuffle=True)
tiny_val_generator = DataGenerator(tiny_val['val'], tiny_val['val_labels'], n_channels=3, n_classes=200, batch_size=128, dim=(64,64), shuffle=True)

# Task 1 (T1): Train a custom CNN network of your choice on the Tiny Imagenet dataset and report the accuracy.
* The choice of network is up to you. (Don't use any predefined architecture like Resnet or VGG etc. for this task, Design your own)
* Make sure your model trains and produces more than 40% accuracy on the Tiny Imagenet test set.
* Proper use of dataset generators/data loaders, to only load images batch by
batch is a must (Don't use predefined API like the flow from the directory, etc.)
* Use of data augmentation, Early stopping, Learning decay ( or schedule), and
model checkpointing ( you can use available callbacks or APIs for this) is a must.
* Train the model using cross-entropy loss.

In [None]:
### Model for tiny-imagenet
input_shape = (64,64,3)

device_name = tf.test.gpu_device_name()
print(device_name)    
with tf.device(device_name):
    model = Sequential([
                        
      # Data augmentation
      layers.RandomFlip("horizontal_and_vertical", input_shape=input_shape),
      layers.RandomRotation(0.25),
      layers.RandomContrast(0.25),

      layers.Conv2D(32, kernel_size=3, padding='same', activation='relu', kernel_regularizer=regularizers.l2(0.01), kernel_initializer="he_normal", name='conv_1_32_1'),
      layers.MaxPooling2D(pool_size=2, strides=2),
      layers.BatchNormalization(),

      layers.Conv2D(64, kernel_size=3, padding='same', activation='relu', kernel_regularizer=regularizers.l2(0.01), kernel_initializer="he_normal", name='conv_2_64_1'),
      layers.MaxPooling2D(pool_size=2, strides=2),
      layers.BatchNormalization(),

      layers.Conv2D(128, kernel_size=3, padding='same', activation='relu', kernel_regularizer=regularizers.l2(0.01), kernel_initializer="he_normal", name='conv_3_128_1'),
      layers.Conv2D(128, kernel_size=3, padding='same', activation='relu', kernel_regularizer=regularizers.l2(0.01), kernel_initializer="he_normal", name='conv_3_128_2'),
      layers.MaxPooling2D(pool_size=2, strides=2),
      layers.BatchNormalization(),

      layers.Conv2D(256, kernel_size=3, padding='same', activation='relu', kernel_regularizer=regularizers.l2(0.01), kernel_initializer="he_normal", name='conv_4_256_2'),
      layers.Conv2D(256, kernel_size=3, padding='same', activation='relu', kernel_regularizer=regularizers.l2(0.01), kernel_initializer="he_normal", name='conv_4_256_1'),
      layers.MaxPooling2D(pool_size=2, strides=2),
      layers.BatchNormalization(),
        
      layers.Conv2D(512, kernel_size=3, padding='same', activation='relu', kernel_regularizer=regularizers.l2(0.01), kernel_initializer="he_normal", name='conv_5_512_1'),
      layers.Conv2D(512, kernel_size=3, padding='same', activation='relu', kernel_regularizer=regularizers.l2(0.01), kernel_initializer="he_normal", name='conv_5_512_2'),

      layers.Flatten(),
      layers.Dense(4096, activation='relu', kernel_regularizer=regularizers.l2(0.01), kernel_initializer="he_normal", name='dense_4096'),
      layers.Dropout(.5),
      layers.Dense(2048, activation='relu', kernel_regularizer=regularizers.l2(0.01), kernel_initializer="he_normal", name='dense_2048'),
      layers.Dropout(.5),
      layers.Dense(200, activation='softmax'),
  ])
    
lr_schedule = keras.optimizers.schedules.ExponentialDecay(
  initial_learning_rate=0.001,
  decay_steps=100000,
  decay_rate=0.9
)

optimizer = keras.optimizers.Adam(
    learning_rate=lr_schedule
)

sgd = keras.optimizers.SGD(learning_rate=0.01, decay=1e-6, momentum=0.9, nesterov=True)

model.compile(loss='sparse_categorical_crossentropy', optimizer=sgd, metrics=['accuracy'])
model.summary()

/device:GPU:0


2021-11-05 17:34:48.882237: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:923] could not open file to read NUMA node: /sys/bus/pci/devices/0000:27:00.0/numa_node
Your kernel may have been built without NUMA support.
2021-11-05 17:34:48.882706: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:923] could not open file to read NUMA node: /sys/bus/pci/devices/0000:27:00.0/numa_node
Your kernel may have been built without NUMA support.
2021-11-05 17:34:48.883002: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:923] could not open file to read NUMA node: /sys/bus/pci/devices/0000:27:00.0/numa_node
Your kernel may have been built without NUMA support.
2021-11-05 17:34:48.883678: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:923] could not open file to read NUMA node: /sys/bus/pci/devices/0000:27:00.0/numa_node
Your kernel may have been built without NUMA support.
2021-11-05 17:34:48.883701: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1594] Could not ident

Model: "sequential_5"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
random_flip_5 (RandomFlip)   (None, 64, 64, 3)         0         
_________________________________________________________________
random_rotation_5 (RandomRot (None, 64, 64, 3)         0         
_________________________________________________________________
random_contrast_5 (RandomCon (None, 64, 64, 3)         0         
_________________________________________________________________
conv_1_32_1 (Conv2D)         (None, 64, 64, 32)        896       
_________________________________________________________________
max_pooling2d_20 (MaxPooling (None, 32, 32, 32)        0         
_________________________________________________________________
batch_normalization_20 (Batc (None, 32, 32, 32)        128       
_________________________________________________________________
conv_2_64_1 (Conv2D)         (None, 32, 32, 64)       

In [None]:
# Create callbacks for training
checkpoint_path = 'tiny_v2'
cp_callback = tf.keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_path,
    save_weights_only=False,
    monitor='val_accuracy',
    mode='max',
    save_best_only=True)

es = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=5)

In [None]:
### Train tiny-imagenet model
# model = keras.models.load_model('tiny_v2')
# model = loaded_
# loaded_names = [layer.name for layer in loaded_.layers]
# for layer in model.layers:
#     if layer.name in loaded_names:
#         print("Setting weights for layer {} from loaded model {}...".format(layer.name, loaded_.name))
#         try:
#           layer.set_weights(loaded_.get_layer(layer.name).get_weights())
#         except:
#           continue

# model.summary()

tiny_history = model.fit(tiny_train_generator, epochs=50, validation_data=tiny_val_generator, callbacks=[cp_callback])

# Task 2 (T2): Train Resnet 18 model on Tiny Imagenet dataset.
*   Make sure that you don't take the pre-trained model, but initialize it from random.
*   If T1 is done properly, the only change is to use the Resnet18 model instead of your custom model.

In [None]:
# Implementation from https://github.com/keras-team/keras-contrib/blob/master/keras_contrib/applications/resnet.py
"""ResNet v1, v2, and segmentation models for Keras.
# Reference
- [Deep Residual Learning for Image Recognition](https://arxiv.org/abs/1512.03385)
- [Identity Mappings in Deep Residual Networks](https://arxiv.org/abs/1603.05027)
Reference material for extended functionality:
- [ResNeXt](https://arxiv.org/abs/1611.05431) for Tiny ImageNet support.
- [Dilated Residual Networks](https://arxiv.org/pdf/1705.09914) for segmentation support
- [Deep Residual Learning for Instrument Segmentation in
   Robotic Surgery](https://arxiv.org/abs/1703.08580)
  for segmentation support.
Implementation Adapted from: github.com/raghakot/keras-resnet
"""  # pylint: disable=E501

def _bn_relu(x, bn_name=None, relu_name=None):
    """Helper to build a BN -> relu block
    """
    norm = BatchNormalization(axis=CHANNEL_AXIS, name=bn_name)(x)
    return Activation("relu", name=relu_name)(norm)


def _conv_bn_relu(**conv_params):
    """Helper to build a conv -> BN -> relu residual unit activation function.
       This is the original ResNet v1 scheme in https://arxiv.org/abs/1512.03385
    """
    filters = conv_params["filters"]
    kernel_size = conv_params["kernel_size"]
    strides = conv_params.setdefault("strides", (1, 1))
    dilation_rate = conv_params.setdefault("dilation_rate", (1, 1))
    conv_name = conv_params.setdefault("conv_name", None)
    bn_name = conv_params.setdefault("bn_name", None)
    relu_name = conv_params.setdefault("relu_name", None)
    kernel_initializer = conv_params.setdefault("kernel_initializer", "he_normal")
    padding = conv_params.setdefault("padding", "same")
    kernel_regularizer = conv_params.setdefault("kernel_regularizer", l2(1.e-4))

    def f(x):
        x = Conv2D(filters=filters, kernel_size=kernel_size,
                   strides=strides, padding=padding,
                   dilation_rate=dilation_rate,
                   kernel_initializer=kernel_initializer,
                   kernel_regularizer=kernel_regularizer,
                   name=conv_name)(x)
        return _bn_relu(x, bn_name=bn_name, relu_name=relu_name)

    return f


def _bn_relu_conv(**conv_params):
    """Helper to build a BN -> relu -> conv residual unit with full pre-activation
    function. This is the ResNet v2 scheme proposed in
    http://arxiv.org/pdf/1603.05027v2.pdf
    """
    filters = conv_params["filters"]
    kernel_size = conv_params["kernel_size"]
    strides = conv_params.setdefault("strides", (1, 1))
    dilation_rate = conv_params.setdefault("dilation_rate", (1, 1))
    conv_name = conv_params.setdefault("conv_name", None)
    bn_name = conv_params.setdefault("bn_name", None)
    relu_name = conv_params.setdefault("relu_name", None)
    kernel_initializer = conv_params.setdefault("kernel_initializer", "he_normal")
    padding = conv_params.setdefault("padding", "same")
    kernel_regularizer = conv_params.setdefault("kernel_regularizer", l2(1.e-4))

    def f(x):
        activation = _bn_relu(x, bn_name=bn_name, relu_name=relu_name)
        return Conv2D(filters=filters, kernel_size=kernel_size,
                      strides=strides, padding=padding,
                      dilation_rate=dilation_rate,
                      kernel_initializer=kernel_initializer,
                      kernel_regularizer=kernel_regularizer,
                      name=conv_name)(activation)

    return f


def _shortcut(input_feature, residual, conv_name_base=None, bn_name_base=None):
    """Adds a shortcut between input and residual block and merges them with "sum"
    """
    # Expand channels of shortcut to match residual.
    # Stride appropriately to match residual (width, height)
    # Should be int if network architecture is correctly configured.
    input_shape = K.int_shape(input_feature)
    residual_shape = K.int_shape(residual)
    stride_width = int(round(input_shape[ROW_AXIS] / residual_shape[ROW_AXIS]))
    stride_height = int(round(input_shape[COL_AXIS] / residual_shape[COL_AXIS]))
    equal_channels = input_shape[CHANNEL_AXIS] == residual_shape[CHANNEL_AXIS]

    shortcut = input_feature
    # 1 X 1 conv if shape is different. Else identity.
    if stride_width > 1 or stride_height > 1 or not equal_channels:
        print('reshaping via a convolution...')
        if conv_name_base is not None:
            conv_name_base = conv_name_base + '1'
        shortcut = Conv2D(filters=residual_shape[CHANNEL_AXIS],
                          kernel_size=(1, 1),
                          strides=(stride_width, stride_height),
                          padding="valid",
                          kernel_initializer="he_normal",
                          kernel_regularizer=l2(0.0001),
                          name=conv_name_base)(input_feature)
        if bn_name_base is not None:
            bn_name_base = bn_name_base + '1'
        shortcut = BatchNormalization(axis=CHANNEL_AXIS,
                                      name=bn_name_base)(shortcut)

    return add([shortcut, residual])


def _residual_block(block_function, filters, blocks, stage,
                    transition_strides=None, transition_dilation_rates=None,
                    dilation_rates=None, is_first_layer=False, dropout=None,
                    residual_unit=_bn_relu_conv):
    """Builds a residual block with repeating bottleneck blocks.
       stage: integer, current stage label, used for generating layer names
       blocks: number of blocks 'a','b'..., current block label, used for generating
            layer names
       transition_strides: a list of tuples for the strides of each transition
       transition_dilation_rates: a list of tuples for the dilation rate of each
            transition
    """
    if transition_dilation_rates is None:
        transition_dilation_rates = [(1, 1)] * blocks
    if transition_strides is None:
        transition_strides = [(1, 1)] * blocks
    if dilation_rates is None:
        dilation_rates = [1] * blocks

    def f(x):
        for i in range(blocks):
            is_first_block = is_first_layer and i == 0
            x = block_function(filters=filters, stage=stage, block=i,
                               transition_strides=transition_strides[i],
                               dilation_rate=dilation_rates[i],
                               is_first_block_of_first_layer=is_first_block,
                               dropout=dropout,
                               residual_unit=residual_unit)(x)
        return x

    return f


def _block_name_base(stage, block):
    """Get the convolution name base and batch normalization name base defined by
    stage and block.
    If there are less than 26 blocks they will be labeled 'a', 'b', 'c' to match the
    paper and keras and beyond 26 blocks they will simply be numbered.
    """
    if block < 27:
        block = '%c' % (block + 97)  # 97 is the ascii number for lowercase 'a'
    conv_name_base = 'res' + str(stage) + block + '_branch'
    bn_name_base = 'bn' + str(stage) + block + '_branch'
    return conv_name_base, bn_name_base


def basic_block(filters, stage, block, transition_strides=(1, 1),
                dilation_rate=(1, 1), is_first_block_of_first_layer=False, dropout=None,
                residual_unit=_bn_relu_conv):
    """Basic 3 X 3 convolution blocks for use on resnets with layers <= 34.
    Follows improved proposed scheme in http://arxiv.org/pdf/1603.05027v2.pdf
    """
    def f(input_features):
        conv_name_base, bn_name_base = _block_name_base(stage, block)
        if is_first_block_of_first_layer:
            # don't repeat bn->relu since we just did bn->relu->maxpool
            x = Conv2D(filters=filters, kernel_size=(3, 3),
                       strides=transition_strides,
                       dilation_rate=dilation_rate,
                       padding="same",
                       kernel_initializer="he_normal",
                       kernel_regularizer=l2(1e-4),
                       name=conv_name_base + '2a')(input_features)
        else:
            x = residual_unit(filters=filters, kernel_size=(3, 3),
                              strides=transition_strides,
                              dilation_rate=dilation_rate,
                              conv_name_base=conv_name_base + '2a',
                              bn_name_base=bn_name_base + '2a')(input_features)

        if dropout is not None:
            x = Dropout(dropout)(x)

        x = residual_unit(filters=filters, kernel_size=(3, 3),
                          conv_name_base=conv_name_base + '2b',
                          bn_name_base=bn_name_base + '2b')(x)

        return _shortcut(input_features, x)

    return f


def bottleneck(filters, stage, block, transition_strides=(1, 1),
               dilation_rate=(1, 1), is_first_block_of_first_layer=False, dropout=None,
               residual_unit=_bn_relu_conv):
    """Bottleneck architecture for > 34 layer resnet.
    Follows improved proposed scheme in http://arxiv.org/pdf/1603.05027v2.pdf
    Returns:
        A final conv layer of filters * 4
    """
    def f(input_feature):
        conv_name_base, bn_name_base = _block_name_base(stage, block)
        if is_first_block_of_first_layer:
            # don't repeat bn->relu since we just did bn->relu->maxpool
            x = Conv2D(filters=filters, kernel_size=(1, 1),
                       strides=transition_strides,
                       dilation_rate=dilation_rate,
                       padding="same",
                       kernel_initializer="he_normal",
                       kernel_regularizer=l2(1e-4),
                       name=conv_name_base + '2a')(input_feature)
        else:
            x = residual_unit(filters=filters, kernel_size=(1, 1),
                              strides=transition_strides,
                              dilation_rate=dilation_rate,
                              conv_name_base=conv_name_base + '2a',
                              bn_name_base=bn_name_base + '2a')(input_feature)

        if dropout is not None:
            x = Dropout(dropout)(x)

        x = residual_unit(filters=filters, kernel_size=(3, 3),
                          conv_name_base=conv_name_base + '2b',
                          bn_name_base=bn_name_base + '2b')(x)

        if dropout is not None:
            x = Dropout(dropout)(x)

        x = residual_unit(filters=filters * 4, kernel_size=(1, 1),
                          conv_name_base=conv_name_base + '2c',
                          bn_name_base=bn_name_base + '2c')(x)

        return _shortcut(input_feature, x)

    return f


def _handle_dim_ordering():
    global ROW_AXIS
    global COL_AXIS
    global CHANNEL_AXIS
    if K.image_data_format() == 'channels_last':
        ROW_AXIS = 1
        COL_AXIS = 2
        CHANNEL_AXIS = 3
    else:
        CHANNEL_AXIS = 1
        ROW_AXIS = 2
        COL_AXIS = 3


def _string_to_function(identifier):
    if isinstance(identifier, six.string_types):
        res = globals().get(identifier)
        if not res:
            raise ValueError('Invalid {}'.format(identifier))
        return res
    return identifier


def ResNet(input_shape=None, classes=10, block='basic', residual_unit='v1',
           repetitions=None, initial_filters=64, activation='softmax', include_top=True,
           input_tensor=None, dropout=0.5, transition_dilation_rate=(1, 1),
           initial_strides=(1, 1), initial_kernel_size=(3, 3), initial_pooling=None,
           final_pooling=None, top='classification'):
    """Builds a custom ResNet like architecture. Defaults to ResNet50 v2.
    Args:
        input_shape: optional shape tuple, only to be specified
            if `include_top` is False (otherwise the input shape
            has to be `(224, 224, 3)` (with `channels_last` dim ordering)
            or `(3, 224, 224)` (with `channels_first` dim ordering).
            It should have exactly 3 dimensions,
            and width and height should be no smaller than 8.
            E.g. `(224, 224, 3)` would be one valid value.
        classes: The number of outputs at final softmax layer
        block: The block function to use. This is either `'basic'` or `'bottleneck'`.
            The original paper used `basic` for layers < 50.
        repetitions: Number of repetitions of various block units.
            At each block unit, the number of filters are doubled and the input size
            is halved. Default of None implies the ResNet50v2 values of [3, 4, 6, 3].
        residual_unit: the basic residual unit, 'v1' for conv bn relu, 'v2' for bn relu
            conv. See [Identity Mappings in
            Deep Residual Networks](https://arxiv.org/abs/1603.05027)
            for details.
        dropout: None for no dropout, otherwise rate of dropout from 0 to 1.
            Based on [Wide Residual Networks.(https://arxiv.org/pdf/1605.07146) paper.
        transition_dilation_rate: Dilation rate for transition layers. For semantic
            segmentation of images use a dilation rate of (2, 2).
        initial_strides: Stride of the very first residual unit and MaxPooling2D call,
            with default (2, 2), set to (1, 1) for small images like cifar.
        initial_kernel_size: kernel size of the very first convolution, (7, 7) for
            imagenet and (3, 3) for small image datasets like tiny imagenet and cifar.
            See [ResNeXt](https://arxiv.org/abs/1611.05431) paper for details.
        initial_pooling: Determine if there will be an initial pooling layer,
            'max' for imagenet and None for small image datasets.
            See [ResNeXt](https://arxiv.org/abs/1611.05431) paper for details.
        final_pooling: Optional pooling mode for feature extraction at the final
            model layer when `include_top` is `False`.
            - `None` means that the output of the model
                will be the 4D tensor output of the
                last convolutional layer.
            - `avg` means that global average pooling
                will be applied to the output of the
                last convolutional layer, and thus
                the output of the model will be a
                2D tensor.
            - `max` means that global max pooling will
                be applied.
        top: Defines final layers to evaluate based on a specific problem type. Options
            are 'classification' for ImageNet style problems, 'segmentation' for
            problems like the Pascal VOC dataset, and None to exclude these layers
            entirely.
    Returns:
        The keras `Model`.
    """
    if activation not in ['softmax', 'sigmoid', None]:
        raise ValueError('activation must be one of "softmax", "sigmoid", or None')
    if activation == 'sigmoid' and classes != 1:
        raise ValueError('sigmoid activation can only be used when classes = 1')
    if repetitions is None:
        repetitions = [3, 4, 6, 3]
    # Determine proper input shape
    input_shape = _obtain_input_shape(input_shape,
                                      default_size=32,
                                      min_size=8,
                                      data_format=K.image_data_format(),
                                      require_flatten=include_top)
    _handle_dim_ordering()
    if len(input_shape) != 3:
        raise Exception("Input shape should be a tuple (nb_channels, nb_rows, nb_cols)")

    if block == 'basic':
        block_fn = basic_block
    elif block == 'bottleneck':
        block_fn = bottleneck
    elif isinstance(block, six.string_types):
        block_fn = _string_to_function(block)
    else:
        block_fn = block

    if residual_unit == 'v2':
        residual_unit = _bn_relu_conv
    elif residual_unit == 'v1':
        residual_unit = _conv_bn_relu
    elif isinstance(residual_unit, six.string_types):
        residual_unit = _string_to_function(residual_unit)
    else:
        residual_unit = residual_unit

    # Permute dimension order if necessary
    if K.image_data_format() == 'channels_first':
        input_shape = (input_shape[1], input_shape[2], input_shape[0])
    # Determine proper input shape
    input_shape = _obtain_input_shape(input_shape,
                                      default_size=32,
                                      min_size=8,
                                      data_format=K.image_data_format(),
                                      require_flatten=include_top)

    img_input = Input(shape=input_shape, tensor=input_tensor)
    x = _conv_bn_relu(filters=initial_filters, kernel_size=initial_kernel_size,
                      strides=initial_strides)(img_input)
    if initial_pooling == 'max':
        x = MaxPooling2D(pool_size=(3, 3), strides=initial_strides, padding="same")(x)

    block = x
    filters = initial_filters
    for i, r in enumerate(repetitions):
        transition_dilation_rates = [transition_dilation_rate] * r
        transition_strides = [(1, 1)] * r
        if transition_dilation_rate == (1, 1):
            transition_strides[0] = (2, 2)
        block = _residual_block(block_fn, filters=filters,
                                stage=i, blocks=r,
                                is_first_layer=(i == 0),
                                dropout=dropout,
                                transition_dilation_rates=transition_dilation_rates,
                                transition_strides=transition_strides,
                                residual_unit=residual_unit)(block)
        filters *= 2

    # Last activation
    x = _bn_relu(block)

    # Classifier block
    if include_top and top == 'classification':
        x = GlobalAveragePooling2D()(x)
        x = Dense(units=classes, activation=activation,
                  kernel_initializer="he_normal")(x)
    elif include_top and top == 'segmentation':
        x = Conv2D(classes, (1, 1), activation='linear', padding='same')(x)

        if K.image_data_format() == 'channels_first':
            channel, row, col = input_shape
        else:
            row, col, channel = input_shape

        x = Reshape((row * col, classes))(x)
        x = Activation(activation)(x)
        x = Reshape((row, col, classes))(x)
    elif final_pooling == 'avg':
        x = GlobalAveragePooling2D()(x)
    elif final_pooling == 'max':
        x = GlobalMaxPooling2D()(x)

    model = Model(inputs=img_input, outputs=x)
    return model


def ResNet18(input_shape, classes):
    """ResNet with 18 layers and v2 residual units
    """
    return ResNet(input_shape, classes, basic_block, repetitions=[2, 2, 2, 2])


def ResNet34(input_shape, classes):
    """ResNet with 34 layers and v2 residual units
    """
    return ResNet(input_shape, classes, basic_block, repetitions=[3, 4, 6, 3])


def ResNet50(input_shape, classes):
    """ResNet with 50 layers and v2 residual units
    """
    return ResNet(input_shape, classes, bottleneck, repetitions=[3, 4, 6, 3])


def ResNet101(input_shape, classes):
    """ResNet with 101 layers and v2 residual units
    """
    return ResNet(input_shape, classes, bottleneck, repetitions=[3, 4, 23, 3])


def ResNet152(input_shape, classes):
    """ResNet with 152 layers and v2 residual units
    """
    return ResNet(input_shape, classes, bottleneck, repetitions=[3, 8, 36, 3])

In [None]:
# Create callbacks for training
checkpoint_path = 'resnet18'
cp_callback = tf.keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_path,
    save_weights_only=False,
    monitor='val_accuracy',
    mode='max',
    save_best_only=True)

es = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=5)

In [None]:
# Build ResNet18 Model
device_name = tf.test.gpu_device_name()
print(device_name)
with tf.device(device_name):
   resnet = ResNet18((64, 64, 3), 200)

lr_schedule = keras.optimizers.schedules.ExponentialDecay(
    initial_learning_rate=0.001,
    decay_steps=100000,
    decay_rate=0.9
)

adam = keras.optimizers.Adam(
    learning_rate=lr_schedule,
    # learning_rate=0.001,
)

sgd = keras.optimizers.SGD(learning_rate=0.01, decay=1e-6, momentum=0.9, nesterov=True)

resnet = keras.models.load_model('resnet18')

resnet.compile(loss='sparse_categorical_crossentropy', optimizer=sgd, metrics=['accuracy'])
resnet.summary()

2021-11-03 13:48:41.972857: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2021-11-03 13:48:42.313641: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:923] could not open file to read NUMA node: /sys/bus/pci/devices/0000:27:00.0/numa_node
Your kernel may have been built without NUMA support.
2021-11-03 13:48:42.401835: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:923] could not open file to read NUMA node: /sys/bus/pci/devices/0000:27:00.0/numa_node
Your kernel may have been built without NUMA support.
2021-11-03 13:48:42.402143: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:923] could not open file to read NUMA node: /sys/bus/pci/devices/0000:27:00.0/numa_node
Your kernel may have been built witho

/device:GPU:0
reshaping via a convolution...
reshaping via a convolution...
reshaping via a convolution...
reshaping via a convolution...
Model: "model_2"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_3 (InputLayer)            [(None, 64, 64, 3)]  0                                            
__________________________________________________________________________________________________
conv2d_40 (Conv2D)              (None, 64, 64, 64)   1792        input_3[0][0]                    
__________________________________________________________________________________________________
batch_normalization_42 (BatchNo (None, 64, 64, 64)   256         conv2d_40[0][0]                  
__________________________________________________________________________________________________
activation_34 (Activation)      (None, 64, 64, 64)   

In [None]:
resnet_history = resnet.fit(tiny_train_generator, epochs=500, validation_data=tiny_val_generator, callbacks=[cp_callback, es])

2021-11-03 13:48:51.326809: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:185] None of the MLIR Optimization Passes are enabled (registered 2)


Epoch 1/500


2021-11-03 13:48:54.118981: I tensorflow/stream_executor/cuda/cuda_dnn.cc:369] Loaded cuDNN version 8202




2021-11-03 13:50:16.008830: W tensorflow/python/util/util.cc:348] Sets are not currently considered sequences, but this may change in the future, so consider avoiding using them.


INFO:tensorflow:Assets written to: resnet18/assets




Epoch 2/500
Epoch 3/500
INFO:tensorflow:Assets written to: resnet18/assets




Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500
Epoch 12/500
Epoch 13/500
Epoch 14/500
Epoch 15/500
Epoch 16/500
Epoch 17/500
Epoch 18/500
Epoch 19/500
Epoch 20/500
Epoch 21/500
Epoch 22/500
Epoch 23/500
Epoch 24/500
Epoch 25/500
Epoch 26/500
Epoch 27/500
Epoch 28/500
Epoch 29/500
Epoch 30/500
Epoch 31/500
Epoch 32/500
Epoch 33/500
Epoch 34/500
Epoch 35/500
Epoch 36/500
Epoch 37/500
Epoch 38/500
Epoch 39/500
Epoch 40/500
Epoch 41/500
Epoch 42/500
Epoch 43/500
Epoch 44/500
Epoch 45/500
Epoch 46/500
Epoch 47/500
INFO:tensorflow:Assets written to: resnet18/assets




Epoch 48/500
Epoch 49/500
Epoch 50/500
Epoch 51/500
Epoch 52/500
Epoch 53/500
Epoch 54/500
Epoch 55/500
Epoch 56/500
Epoch 57/500
Epoch 58/500
Epoch 59/500
Epoch 60/500
Epoch 61/500
Epoch 62/500
Epoch 63/500
Epoch 64/500
Epoch 65/500
Epoch 66/500
Epoch 67/500
Epoch 68/500
Epoch 69/500
Epoch 70/500
Epoch 71/500
Epoch 72/500
Epoch 73/500
Epoch 74/500
Epoch 75/500
Epoch 76/500
Epoch 77/500
Epoch 78/500
Epoch 79/500
Epoch 80/500
Epoch 81/500
Epoch 82/500
Epoch 83/500
Epoch 84/500
Epoch 85/500
Epoch 86/500
Epoch 87/500
Epoch 88/500
Epoch 89/500
Epoch 90/500
Epoch 91/500
Epoch 92/500
Epoch 93/500


# Task 3 (T3): Augment resnet 18 with the following modules:
*   CUBS Block 1
*   CUBS Block 2
*   Add these new modules after each residual block in one of the following fashions:
* Use the design which gives you better accuracy.
* The implementation of the module is really tricky, make sure that you understand the dimensions and create the module correctly
* With this module the architecture should perform at par or better with the vanilla resnet (max drop allowed -2%)
* Train this new architecture of tiny imagenet and report the accuracy.




In [None]:
def cubs1(x):
        """CUBS 1 block"""
        h = x.shape[1]
        w = x.shape[2]
        c = x.shape[3]

        N = 512

        gap_out = GlobalAveragePooling2D()(x)
        d1_out = Dense(N)(gap_out)
        d2_out = Dense(N)(gap_out)
        d3_out = Dense(N)(gap_out)

        d1_out_t = tf.tile(tf.expand_dims(d1_out, 1), [1, N, 1])
        d2_out_t = tf.tile(tf.expand_dims(d2_out, 2), [1, 1, N])
        sim_mat = tf.math.squared_difference(d1_out_t, d2_out_t)

        soft_out = Activation(activations.softmax)(sim_mat)

        feats = tf.matmul(soft_out, tf.expand_dims(d3_out, 2))
        feats = tf.transpose(feats, [0,2,1])
  
        d4_out = Dense(c)(feats)
        gap_out = tf.expand_dims(gap_out, 1)
        pre_sig = tf.add(d4_out, gap_out)
        sig_out = Activation(activations.sigmoid)(pre_sig)

        sig_out = tf.tile(tf.expand_dims(sig_out, 1), [1,h,w,1])
        out = tf.multiply(x, sig_out)

        return out


def cubs2(x):
        """CUBS 2 block"""
        h = x.shape[1]
        w = x.shape[2]
        c = x.shape[3]

        c1_out = Conv2D(1, (1,1))(x)
        c2_out = Conv2D(1, (1,1))(x)
        c3_out = Conv2D(1, (1,1))(x)

        c1_out_t = tf.tile(tf.squeeze(c1_out), [1, h, w])
        c2_out_t = tf.tile(tf.squeeze(c2_out), [1, h, w])
        sim_mat = tf.math.squared_difference(c1_out_t, c2_out_t)

        soft_out = Activation(activations.softmax)(sim_mat)

        c3_out_t = tf.tile(tf.squeeze(c3_out), [1, h, w])

        feats = tf.multiply(soft_out, c3_out_t)
        feats = tf.reduce_sum(feats, axis=1)

        sig_out = Activation(activations.sigmoid)(feats)

        sig_out_e = tf.expand_dims(sig_out, 1)
        sig_out_e = tf.expand_dims(sig_out_e, 1)

        sig_out_t = tf.tile(sig_out_e, [1,h,w,c//(h*w)])

        out = tf.multiply(x, sig_out_t)

        return out


def _bn_relu(x, bn_name=None, relu_name=None):
    """Helper to build a BN -> relu block
    """
    norm = BatchNormalization(axis=CHANNEL_AXIS, name=bn_name)(x)
    return Activation("relu", name=relu_name)(norm)


def _conv_bn_relu(**conv_params):
    """Helper to build a conv -> BN -> relu residual unit activation function.
       This is the original ResNet v1 scheme in https://arxiv.org/abs/1512.03385
    """
    filters = conv_params["filters"]
    kernel_size = conv_params["kernel_size"]
    strides = conv_params.setdefault("strides", (1, 1))
    dilation_rate = conv_params.setdefault("dilation_rate", (1, 1))
    conv_name = conv_params.setdefault("conv_name", None)
    bn_name = conv_params.setdefault("bn_name", None)
    relu_name = conv_params.setdefault("relu_name", None)
    kernel_initializer = conv_params.setdefault("kernel_initializer", "he_normal")
    padding = conv_params.setdefault("padding", "same")
    kernel_regularizer = conv_params.setdefault("kernel_regularizer", l2(1.e-4))

    def f(x):
        x = Conv2D(filters=filters, kernel_size=kernel_size,
                   strides=strides, padding=padding,
                   dilation_rate=dilation_rate,
                   kernel_initializer=kernel_initializer,
                   kernel_regularizer=kernel_regularizer,
                   name=conv_name)(x)
        return _bn_relu(x, bn_name=bn_name, relu_name=relu_name)

    return f


def _bn_relu_conv(**conv_params):
    """Helper to build a BN -> relu -> conv residual unit with full pre-activation
    function. This is the ResNet v2 scheme proposed in
    http://arxiv.org/pdf/1603.05027v2.pdf
    """
    filters = conv_params["filters"]
    kernel_size = conv_params["kernel_size"]
    strides = conv_params.setdefault("strides", (1, 1))
    dilation_rate = conv_params.setdefault("dilation_rate", (1, 1))
    conv_name = conv_params.setdefault("conv_name", None)
    bn_name = conv_params.setdefault("bn_name", None)
    relu_name = conv_params.setdefault("relu_name", None)
    kernel_initializer = conv_params.setdefault("kernel_initializer", "he_normal")
    padding = conv_params.setdefault("padding", "same")
    kernel_regularizer = conv_params.setdefault("kernel_regularizer", l2(1.e-4))

    def f(x):
        activation = _bn_relu(x, bn_name=bn_name, relu_name=relu_name)
        return Conv2D(filters=filters, kernel_size=kernel_size,
                      strides=strides, padding=padding,
                      dilation_rate=dilation_rate,
                      kernel_initializer=kernel_initializer,
                      kernel_regularizer=kernel_regularizer,
                      name=conv_name)(activation)

    return f


def _shortcut(input_feature, residual, conv_name_base=None, bn_name_base=None):
    """Adds a shortcut between input and residual block and merges them with "sum"
    """
    # Expand channels of shortcut to match residual.
    # Stride appropriately to match residual (width, height)
    # Should be int if network architecture is correctly configured.
    input_shape = K.int_shape(input_feature)
    residual_shape = K.int_shape(residual)
    stride_width = int(round(input_shape[ROW_AXIS] / residual_shape[ROW_AXIS]))
    stride_height = int(round(input_shape[COL_AXIS] / residual_shape[COL_AXIS]))
    equal_channels = input_shape[CHANNEL_AXIS] == residual_shape[CHANNEL_AXIS]

    shortcut = input_feature
    # 1 X 1 conv if shape is different. Else identity.
    if stride_width > 1 or stride_height > 1 or not equal_channels:
        print('reshaping via a convolution...')
        if conv_name_base is not None:
            conv_name_base = conv_name_base + '1'
        shortcut = Conv2D(filters=residual_shape[CHANNEL_AXIS],
                          kernel_size=(1, 1),
                          strides=(stride_width, stride_height),
                          padding="valid",
                          kernel_initializer="he_normal",
                          kernel_regularizer=l2(0.0001),
                          name=conv_name_base)(input_feature)
        if bn_name_base is not None:
            bn_name_base = bn_name_base + '1'
        shortcut = BatchNormalization(axis=CHANNEL_AXIS,
                                      name=bn_name_base)(shortcut)

    return add([shortcut, residual])


def _residual_block(block_function, filters, blocks, stage,
                    transition_strides=None, transition_dilation_rates=None,
                    dilation_rates=None, is_first_layer=False, dropout=None,
                    residual_unit=_bn_relu_conv):
    """Builds a residual block with repeating bottleneck blocks.
       stage: integer, current stage label, used for generating layer names
       blocks: number of blocks 'a','b'..., current block label, used for generating
            layer names
       transition_strides: a list of tuples for the strides of each transition
       transition_dilation_rates: a list of tuples for the dilation rate of each
            transition
    """
    if transition_dilation_rates is None:
        transition_dilation_rates = [(1, 1)] * blocks
    if transition_strides is None:
        transition_strides = [(1, 1)] * blocks
    if dilation_rates is None:
        dilation_rates = [1] * blocks

    def f(x):
        for i in range(blocks):
            is_first_block = is_first_layer and i == 0
            x = block_function(filters=filters, stage=stage, block=i,
                               transition_strides=transition_strides[i],
                               dilation_rate=dilation_rates[i],
                               is_first_block_of_first_layer=is_first_block,
                               dropout=dropout,
                               residual_unit=residual_unit)(x)
        return x

    return f


def _block_name_base(stage, block):
    """Get the convolution name base and batch normalization name base defined by
    stage and block.
    If there are less than 26 blocks they will be labeled 'a', 'b', 'c' to match the
    paper and keras and beyond 26 blocks they will simply be numbered.
    """
    if block < 27:
        block = '%c' % (block + 97)  # 97 is the ascii number for lowercase 'a'
    conv_name_base = 'res' + str(stage) + block + '_branch'
    bn_name_base = 'bn' + str(stage) + block + '_branch'
    return conv_name_base, bn_name_base


def basic_block(filters, stage, block, transition_strides=(1, 1),
                dilation_rate=(1, 1), is_first_block_of_first_layer=False, dropout=None,
                residual_unit=_bn_relu_conv):
    """Basic 3 X 3 convolution blocks for use on resnets with layers <= 34.
    Follows improved proposed scheme in http://arxiv.org/pdf/1603.05027v2.pdf
    """
    def f(input_features):
        conv_name_base, bn_name_base = _block_name_base(stage, block)
        if is_first_block_of_first_layer:
            # don't repeat bn->relu since we just did bn->relu->maxpool
            x = Conv2D(filters=filters, kernel_size=(3, 3),
                       strides=transition_strides,
                       dilation_rate=dilation_rate,
                       padding="same",
                       kernel_initializer="he_normal",
                       kernel_regularizer=l2(1e-4),
                       name=conv_name_base + '2a')(input_features)
        else:
            x = residual_unit(filters=filters, kernel_size=(3, 3),
                              strides=transition_strides,
                              dilation_rate=dilation_rate,
                              conv_name_base=conv_name_base + '2a',
                              bn_name_base=bn_name_base + '2a')(input_features)

        if dropout is not None:
            x = Dropout(dropout)(x)

        x = residual_unit(filters=filters, kernel_size=(3, 3),
                          conv_name_base=conv_name_base + '2b',
                          bn_name_base=bn_name_base + '2b')(x)

        return _shortcut(input_features, x)

    return f


def bottleneck(filters, stage, block, transition_strides=(1, 1),
               dilation_rate=(1, 1), is_first_block_of_first_layer=False, dropout=None,
               residual_unit=_bn_relu_conv):
    """Bottleneck architecture for > 34 layer resnet.
    Follows improved proposed scheme in http://arxiv.org/pdf/1603.05027v2.pdf
    Returns:
        A final conv layer of filters * 4
    """
    def f(input_feature):
        conv_name_base, bn_name_base = _block_name_base(stage, block)
        if is_first_block_of_first_layer:
            # don't repeat bn->relu since we just did bn->relu->maxpool
            x = Conv2D(filters=filters, kernel_size=(1, 1),
                       strides=transition_strides,
                       dilation_rate=dilation_rate,
                       padding="same",
                       kernel_initializer="he_normal",
                       kernel_regularizer=l2(1e-4),
                       name=conv_name_base + '2a')(input_feature)
        else:
            x = residual_unit(filters=filters, kernel_size=(1, 1),
                              strides=transition_strides,
                              dilation_rate=dilation_rate,
                              conv_name_base=conv_name_base + '2a',
                              bn_name_base=bn_name_base + '2a')(input_feature)

        if dropout is not None:
            x = Dropout(dropout)(x)

        x = residual_unit(filters=filters, kernel_size=(3, 3),
                          conv_name_base=conv_name_base + '2b',
                          bn_name_base=bn_name_base + '2b')(x)

        if dropout is not None:
            x = Dropout(dropout)(x)

        x = residual_unit(filters=filters * 4, kernel_size=(1, 1),
                          conv_name_base=conv_name_base + '2c',
                          bn_name_base=bn_name_base + '2c')(x)

        return _shortcut(input_feature, x)

    return f


def _handle_dim_ordering():
    global ROW_AXIS
    global COL_AXIS
    global CHANNEL_AXIS
    if K.image_data_format() == 'channels_last':
        ROW_AXIS = 1
        COL_AXIS = 2
        CHANNEL_AXIS = 3
    else:
        CHANNEL_AXIS = 1
        ROW_AXIS = 2
        COL_AXIS = 3


def _string_to_function(identifier):
    if isinstance(identifier, six.string_types):
        res = globals().get(identifier)
        if not res:
            raise ValueError('Invalid {}'.format(identifier))
        return res
    return identifier


def ResNet(input_shape=None, classes=10, block='basic', residual_unit='v1',
           repetitions=None, initial_filters=64, activation='softmax', include_top=True,
           input_tensor=None, dropout=0.5, transition_dilation_rate=(1, 1),
           initial_strides=(1, 1), initial_kernel_size=(3, 3), initial_pooling=None,
           final_pooling=None, top='classification', cubs_arch=1):
    """Builds a custom ResNet like architecture. Defaults to ResNet50 v2.
    Args:
        input_shape: optional shape tuple, only to be specified
            if `include_top` is False (otherwise the input shape
            has to be `(224, 224, 3)` (with `channels_last` dim ordering)
            or `(3, 224, 224)` (with `channels_first` dim ordering).
            It should have exactly 3 dimensions,
            and width and height should be no smaller than 8.
            E.g. `(224, 224, 3)` would be one valid value.
        classes: The number of outputs at final softmax layer
        block: The block function to use. This is either `'basic'` or `'bottleneck'`.
            The original paper used `basic` for layers < 50.
        repetitions: Number of repetitions of various block units.
            At each block unit, the number of filters are doubled and the input size
            is halved. Default of None implies the ResNet50v2 values of [3, 4, 6, 3].
        residual_unit: the basic residual unit, 'v1' for conv bn relu, 'v2' for bn relu
            conv. See [Identity Mappings in
            Deep Residual Networks](https://arxiv.org/abs/1603.05027)
            for details.
        dropout: None for no dropout, otherwise rate of dropout from 0 to 1.
            Based on [Wide Residual Networks.(https://arxiv.org/pdf/1605.07146) paper.
        transition_dilation_rate: Dilation rate for transition layers. For semantic
            segmentation of images use a dilation rate of (2, 2).
        initial_strides: Stride of the very first residual unit and MaxPooling2D call,
            with default (2, 2), set to (1, 1) for small images like cifar.
        initial_kernel_size: kernel size of the very first convolution, (7, 7) for
            imagenet and (3, 3) for small image datasets like tiny imagenet and cifar.
            See [ResNeXt](https://arxiv.org/abs/1611.05431) paper for details.
        initial_pooling: Determine if there will be an initial pooling layer,
            'max' for imagenet and None for small image datasets.
            See [ResNeXt](https://arxiv.org/abs/1611.05431) paper for details.
        final_pooling: Optional pooling mode for feature extraction at the final
            model layer when `include_top` is `False`.
            - `None` means that the output of the model
                will be the 4D tensor output of the
                last convolutional layer.
            - `avg` means that global average pooling
                will be applied to the output of the
                last convolutional layer, and thus
                the output of the model will be a
                2D tensor.
            - `max` means that global max pooling will
                be applied.
        top: Defines final layers to evaluate based on a specific problem type. Options
            are 'classification' for ImageNet style problems, 'segmentation' for
            problems like the Pascal VOC dataset, and None to exclude these layers
            entirely.
    Returns:
        The keras `Model`.
    """
    if activation not in ['softmax', 'sigmoid', None]:
        raise ValueError('activation must be one of "softmax", "sigmoid", or None')
    if activation == 'sigmoid' and classes != 1:
        raise ValueError('sigmoid activation can only be used when classes = 1')
    if repetitions is None:
        repetitions = [3, 4, 6, 3]
    # Determine proper input shape
    input_shape = _obtain_input_shape(input_shape,
                                      default_size=32,
                                      min_size=8,
                                      data_format=K.image_data_format(),
                                      require_flatten=include_top)
    _handle_dim_ordering()
    if len(input_shape) != 3:
        raise Exception("Input shape should be a tuple (nb_channels, nb_rows, nb_cols)")

    if block == 'basic':
        block_fn = basic_block
    elif block == 'bottleneck':
        block_fn = bottleneck
    elif isinstance(block, six.string_types):
        block_fn = _string_to_function(block)
    else:
        block_fn = block

    if residual_unit == 'v2':
        residual_unit = _bn_relu_conv
    elif residual_unit == 'v1':
        residual_unit = _conv_bn_relu
    elif isinstance(residual_unit, six.string_types):
        residual_unit = _string_to_function(residual_unit)
    else:
        residual_unit = residual_unit

    # Permute dimension order if necessary
    if K.image_data_format() == 'channels_first':
        input_shape = (input_shape[1], input_shape[2], input_shape[0])
    # Determine proper input shape
    input_shape = _obtain_input_shape(input_shape,
                                      default_size=32,
                                      min_size=8,
                                      data_format=K.image_data_format(),
                                      require_flatten=include_top)

    img_input = Input(shape=input_shape, tensor=input_tensor)
    x = _conv_bn_relu(filters=initial_filters, kernel_size=initial_kernel_size,
                      strides=initial_strides)(img_input)
    if initial_pooling == 'max':
        x = MaxPooling2D(pool_size=(3, 3), strides=initial_strides, padding="same")(x)

    block = x
    filters = initial_filters
    for i, r in enumerate(repetitions):
        transition_dilation_rates = [transition_dilation_rate] * r
        transition_strides = [(1, 1)] * r
        if transition_dilation_rate == (1, 1):
            transition_strides[0] = (2, 2)
        block = _residual_block(block_fn, filters=filters,
                                stage=i, blocks=r,
                                is_first_layer=(i == 0),
                                dropout=dropout,
                                transition_dilation_rates=transition_dilation_rates,
                                transition_strides=transition_strides,
                                residual_unit=residual_unit)(block)
        filters *= 2

    # Last activation
    x = _bn_relu(block)

    ##############################
    ###        CUBS ARCHS      ###
    ##############################
    if cubs_arch == 1:
        x1 = cubs1(x)
        x2 = cubs2(x)
        x = add([x1,x2])
    elif cubs_arch == 2:
        x = cubs1(x)
        x = cubs2(x)
    elif cubs_arch == 3:
        x = cubs2(x)
        x = cubs1(x)

    # Classifier block
    if include_top and top == 'classification':
        x = GlobalAveragePooling2D()(x)
        x = Dense(units=classes, activation=activation,
                  kernel_initializer="he_normal")(x)
    elif include_top and top == 'segmentation':
        x = Conv2D(classes, (1, 1), activation='linear', padding='same')(x)

        if K.image_data_format() == 'channels_first':
            channel, row, col = input_shape
        else:
            row, col, channel = input_shape

        x = Reshape((row * col, classes))(x)
        x = Activation(activation)(x)
        x = Reshape((row, col, classes))(x)
    elif final_pooling == 'avg':
        x = GlobalAveragePooling2D()(x)
    elif final_pooling == 'max':
        x = GlobalMaxPooling2D()(x)

    model = Model(inputs=img_input, outputs=x)
    return model


def ResNet18_cubs_arch1(input_shape, classes):
    """ResNet18 with cubs arch 1"""
    return ResNet(input_shape, classes, basic_block, repetitions=[2, 2, 2, 2], cubs_arch=1)

def ResNet18_cubs_arch2(input_shape, classes):
    """ResNet18 with cubs arch 1"""
    return ResNet(input_shape, classes, basic_block, repetitions=[2, 2, 2, 2], cubs_arch=2)

def ResNet18_cubs_arch3(input_shape, classes):
    """ResNet18 with cubs arch 1"""
    return ResNet(input_shape, classes, basic_block, repetitions=[2, 2, 2, 2], cubs_arch=3)

# Create callbacks for training
checkpoint_path = 'resnet18_cubs_arch1'
cp_callback = tf.keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_path,
    save_weights_only=False,
    monitor='val_accuracy',
    mode='max',
    save_best_only=True)

es = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=5)

# Build ResNet18 CUBS arch Models
device_name = tf.test.gpu_device_name()
print(device_name)
with tf.device(device_name):
   resnet_cubs_arch1 = ResNet18_cubs_arch1((64, 64, 3), 200)
   resnet_cubs_arch2 = ResNet18_cubs_arch2((64, 64, 3), 200)
   resnet_cubs_arch3 = ResNet18_cubs_arch3((64, 64, 3), 200)

lr_schedule = keras.optimizers.schedules.ExponentialDecay(
    initial_learning_rate=0.001,
    decay_steps=100000,
    decay_rate=0.9
)

adam = keras.optimizers.Adam(
    learning_rate=lr_schedule,
    # learning_rate=0.001,
)

sgd = keras.optimizers.SGD(learning_rate=0.01, decay=1e-6, momentum=0.9, nesterov=True)

# resnet = keras.models.load_model('resnet18')

resnet_cubs_arch1.compile(loss='sparse_categorical_crossentropy', optimizer=sgd, metrics=['accuracy'])
# resnet_cubs_arch1.summary()

2021-11-05 19:48:55.532735: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:923] could not open file to read NUMA node: /sys/bus/pci/devices/0000:27:00.0/numa_node
Your kernel may have been built without NUMA support.
2021-11-05 19:48:55.533427: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:923] could not open file to read NUMA node: /sys/bus/pci/devices/0000:27:00.0/numa_node
Your kernel may have been built without NUMA support.
2021-11-05 19:48:55.533801: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:923] could not open file to read NUMA node: /sys/bus/pci/devices/0000:27:00.0/numa_node
Your kernel may have been built without NUMA support.
2021-11-05 19:48:55.537799: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:923] could not open file to read NUMA node: /sys/bus/pci/devices/0000:27:00.0/numa_node
Your kernel may have been built without NUMA support.
2021-11-05 19:48:55.537814: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1594] Could not ident

/device:GPU:0
reshaping via a convolution...
reshaping via a convolution...
reshaping via a convolution...
reshaping via a convolution...
reshaping via a convolution...
reshaping via a convolution...
reshaping via a convolution...
reshaping via a convolution...
reshaping via a convolution...
reshaping via a convolution...
reshaping via a convolution...
reshaping via a convolution...


In [None]:
resnet_cubs_arch1_history = resnet_cubs_arch1.fit(tiny_train_generator, epochs=500, validation_data=tiny_val_generator, callbacks=[cp_callback, es])



Epoch 1/500
INFO:tensorflow:Assets written to: resnet18_cubs_arch1/assets




Epoch 2/500
INFO:tensorflow:Assets written to: resnet18_cubs_arch1/assets




Epoch 3/500
INFO:tensorflow:Assets written to: resnet18_cubs_arch1/assets




Epoch 4/500
INFO:tensorflow:Assets written to: resnet18_cubs_arch1/assets




Epoch 5/500
INFO:tensorflow:Assets written to: resnet18_cubs_arch1/assets




Epoch 6/500
INFO:tensorflow:Assets written to: resnet18_cubs_arch1/assets




Epoch 7/500
INFO:tensorflow:Assets written to: resnet18_cubs_arch1/assets




Epoch 8/500
INFO:tensorflow:Assets written to: resnet18_cubs_arch1/assets




Epoch 9/500
Epoch 10/500
Epoch 11/500
INFO:tensorflow:Assets written to: resnet18_cubs_arch1/assets




Epoch 12/500
INFO:tensorflow:Assets written to: resnet18_cubs_arch1/assets




Epoch 13/500
Epoch 14/500
INFO:tensorflow:Assets written to: resnet18_cubs_arch1/assets




Epoch 15/500
Epoch 16/500
Epoch 17/500
INFO:tensorflow:Assets written to: resnet18_cubs_arch1/assets




Epoch 18/500
INFO:tensorflow:Assets written to: resnet18_cubs_arch1/assets




Epoch 19/500
Epoch 20/500
Epoch 21/500
INFO:tensorflow:Assets written to: resnet18_cubs_arch1/assets




Epoch 22/500
Epoch 23/500
Epoch 24/500
Epoch 25/500
INFO:tensorflow:Assets written to: resnet18_cubs_arch1/assets




Epoch 26/500
INFO:tensorflow:Assets written to: resnet18_cubs_arch1/assets




Epoch 27/500
Epoch 28/500
Epoch 29/500
Epoch 30/500
Epoch 31/500


In [None]:
checkpoint_path = 'resnet_cubs_arch2'
cp_callback = tf.keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_path,
    save_weights_only=False,
    monitor='val_accuracy',
    mode='max',
    save_best_only=True)

es = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=5)

resnet_cubs_arch2.compile(loss='sparse_categorical_crossentropy', optimizer=sgd, metrics=['accuracy'])
# resnet_cubs_arch2 = keras.models.load_model('resnet_cubs_arch2')

# resnet_cubs_arch2_history = resnet_cubs_arch2.fit(tiny_train_generator, epochs=500, validation_data=tiny_val_generator, callbacks=[cp_callback, es])

In [None]:
checkpoint_path = 'resnet_cubs_arch3'
cp_callback = tf.keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_path,
    save_weights_only=False,
    monitor='val_accuracy',
    mode='max',
    save_best_only=True)

es = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=5)

resnet_cubs_arch3.compile(loss='sparse_categorical_crossentropy', optimizer=sgd, metrics=['accuracy'])
# keras.models.save_model(resnet_cubs_arch3, 'resnet_cubs_arch3')

resnet_cubs_arch3_history = resnet_cubs_arch3.fit(tiny_train_generator, epochs=500, validation_data=tiny_val_generator, callbacks=[cp_callback, es])

INFO:tensorflow:Assets written to: resnet_cubs_arch3/assets




# Task 4 (T4): Implement retrieval task using Augmented Architecture


*   Using the new resnet architecture, train the model on the first 98 classes of the cars196 dataset. The rest 98 classes are the test set.
*   Create a function to compute recall (mentioned below) and report recall at 1,2,4,8
  * Recall @K = Number of relevant items in top K / Total number of items
* After the recalls have been recorded, crop the classification layer and add a new single 512 dimensional embedding.
* Train the whole network using the custom loss algorithm
  * After training report the recall at 1,2,4, 8



In [None]:
### Partition Cars data
import scipy.io as sio
from sklearn.metrics.pairwise import cosine_similarity

annos = sio.loadmat('cars_annos.mat')

fnames = [x[0] for x in annos['annotations']['relative_im_path'][0]]
classes = [x[0][0]-1 for x in annos['annotations']['class'][0]]

cars = {
    'train': [],
    'labels': {},
    'val': [],
    'val_labels': {}
}

for name,lbl in zip(fnames, classes):
  if lbl <= 98:
    cars['train'].append(name)
    cars['labels'][name] = lbl
    # cars['labels'][name] = np.zeros(196)
    # cars['labels'][name][lbl] = 1

  else:
    cars['val'].append(name)
    cars['val_labels'][name] = lbl
    # cars['val_labels'][name] = np.zeros(196)
    # cars['val_labels'][name][lbl] = 1

In [None]:
### Create data generators
cars_train_generator = DataGenerator(cars['train'], cars['labels'], n_channels=3, n_classes=196, batch_size=128, dim=(64,64), shuffle=True)
cars_val_generator = DataGenerator(cars['val'], cars['val_labels'], n_channels=3, n_classes=196, batch_size=128, dim=(64,64), shuffle=True)

In [None]:
### Metric functions for recall at K (1,2,4,8)

interest = 0.75

def recall_k1(y_true, y_pred):
  k = 1
  total = len(y_pred)
  relevant = 0
  arrs = np.array([x.numpy() for x in y_pred])
  arrs = np.nan_to_num(arrs)
  cos_sims = cosine_similarity(arrs)
  cos_sims = np.triu(cos_sims)
  for i in range(cos_sims.shape[0]):
    sims = cos_sims[i]
    sims.sort()
    for k_ in range(k):
      if sims[k_] >= interest:
        relevant += 1

  return relevant/total

def recall_k2(y_true, y_pred):
  k = 2
  total = len(y_pred)
  relevant = 0
  arrs = np.array([x.numpy() for x in y_pred])
  arrs = np.nan_to_num(arrs)
  cos_sims = cosine_similarity(arrs)
  cos_sims = np.triu(cos_sims)
  for i in range(cos_sims.shape[0]):
    sims = cos_sims[i]
    sims.sort()
    for k_ in range(k):
      if sims[k_] >= interest:
        relevant += 1

  return relevant/total

def recall_k4(y_true, y_pred):
  k = 4
  total = len(y_pred)
  relevant = 0
  arrs = np.array([x.numpy() for x in y_pred])
  arrs = np.nan_to_num(arrs)
  cos_sims = cosine_similarity(arrs)
  cos_sims = np.triu(cos_sims)
  for i in range(cos_sims.shape[0]):
    sims = cos_sims[i]
    sims.sort()
    for k_ in range(k):
      if sims[k_] >= interest:
        relevant += 1

  return relevant/total

def recall_k8(y_true, y_pred):
  k = 8
  total = len(y_pred)
  relevant = 0
  arrs = np.array([x.numpy() for x in y_pred])
  arrs = np.nan_to_num(arrs)
  cos_sims = cosine_similarity(arrs)
  cos_sims = np.triu(cos_sims)
  for i in range(cos_sims.shape[0]):
    sims = cos_sims[i]
    sims.sort()
    for k_ in range(k):
      if sims[k_] >= interest:
        relevant += 1

  return relevant/total

In [None]:
### Train augmented resnet architecture on cars data & report recall at 1,2,4,8
# Build ResNet18 CUBS arch 2 Model
device_name = tf.test.gpu_device_name()
print(device_name)
with tf.device(device_name):
   resnet_cubs_arch2 = ResNet18_cubs_arch2((64, 64, 3), 196)

sgd = keras.optimizers.SGD(learning_rate=0.01, decay=1e-6, momentum=0.9, nesterov=True)

checkpoint_path = 'cars1_resnet_cubs_arch2'
cp_callback = tf.keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_path,
    save_weights_only=False,
    monitor='accuracy',
    mode='max',
    save_best_only=True)

es = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=5)

resnet_cubs_arch2.compile(loss='sparse_categorical_crossentropy', optimizer=sgd, metrics=['accuracy', recall_k1, recall_k2, recall_k4, recall_k8])
resnet_cubs_arch2.summary()

/device:GPU:0
reshaping via a convolution...
reshaping via a convolution...
reshaping via a convolution...


2021-11-05 10:07:50.413295: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:923] could not open file to read NUMA node: /sys/bus/pci/devices/0000:27:00.0/numa_node
Your kernel may have been built without NUMA support.
2021-11-05 10:07:50.414097: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:923] could not open file to read NUMA node: /sys/bus/pci/devices/0000:27:00.0/numa_node
Your kernel may have been built without NUMA support.
2021-11-05 10:07:50.414521: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:923] could not open file to read NUMA node: /sys/bus/pci/devices/0000:27:00.0/numa_node
Your kernel may have been built without NUMA support.
2021-11-05 10:07:50.415433: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:923] could not open file to read NUMA node: /sys/bus/pci/devices/0000:27:00.0/numa_node
Your kernel may have been built without NUMA support.
2021-11-05 10:07:50.415447: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1594] Could not ident

reshaping via a convolution...
Model: "model_5"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_6 (InputLayer)            [(None, 64, 64, 3)]  0                                            
__________________________________________________________________________________________________
conv2d_115 (Conv2D)             (None, 64, 64, 64)   1792        input_6[0][0]                    
__________________________________________________________________________________________________
batch_normalization_105 (BatchN (None, 64, 64, 64)   256         conv2d_115[0][0]                 
__________________________________________________________________________________________________
activation_105 (Activation)     (None, 64, 64, 64)   0           batch_normalization_105[0][0]    
_____________________________________________________________

In [None]:
resnet_cubs_arch2_history = resnet_cubs_arch2.fit(cars_train_generator, epochs=50, validation_data=cars_val_generator, callbacks=[cp_callback, es])



Epoch 1/50



INFO:tensorflow:Assets written to: cars1_resnet_cubs_arch2/assets




Epoch 2/50
INFO:tensorflow:Assets written to: cars1_resnet_cubs_arch2/assets




Epoch 3/50
INFO:tensorflow:Assets written to: cars1_resnet_cubs_arch2/assets




Epoch 4/50
INFO:tensorflow:Assets written to: cars1_resnet_cubs_arch2/assets




Epoch 5/50
INFO:tensorflow:Assets written to: cars1_resnet_cubs_arch2/assets




Epoch 6/50
INFO:tensorflow:Assets written to: cars1_resnet_cubs_arch2/assets




Epoch 7/50
INFO:tensorflow:Assets written to: cars1_resnet_cubs_arch2/assets




Epoch 8/50
INFO:tensorflow:Assets written to: cars1_resnet_cubs_arch2/assets




Epoch 9/50
INFO:tensorflow:Assets written to: cars1_resnet_cubs_arch2/assets




Epoch 10/50
INFO:tensorflow:Assets written to: cars1_resnet_cubs_arch2/assets




Epoch 11/50
INFO:tensorflow:Assets written to: cars1_resnet_cubs_arch2/assets




Epoch 12/50
INFO:tensorflow:Assets written to: cars1_resnet_cubs_arch2/assets




Epoch 13/50
INFO:tensorflow:Assets written to: cars1_resnet_cubs_arch2/assets




Epoch 14/50
INFO:tensorflow:Assets written to: cars1_resnet_cubs_arch2/assets




Epoch 15/50
INFO:tensorflow:Assets written to: cars1_resnet_cubs_arch2/assets




Epoch 16/50
INFO:tensorflow:Assets written to: cars1_resnet_cubs_arch2/assets




Epoch 17/50
INFO:tensorflow:Assets written to: cars1_resnet_cubs_arch2/assets




Epoch 18/50
INFO:tensorflow:Assets written to: cars1_resnet_cubs_arch2/assets




Epoch 19/50
INFO:tensorflow:Assets written to: cars1_resnet_cubs_arch2/assets




Epoch 20/50
INFO:tensorflow:Assets written to: cars1_resnet_cubs_arch2/assets




Epoch 21/50
INFO:tensorflow:Assets written to: cars1_resnet_cubs_arch2/assets




Epoch 22/50
Epoch 23/50
INFO:tensorflow:Assets written to: cars1_resnet_cubs_arch2/assets




Epoch 24/50
INFO:tensorflow:Assets written to: cars1_resnet_cubs_arch2/assets




Epoch 25/50
INFO:tensorflow:Assets written to: cars1_resnet_cubs_arch2/assets




Epoch 26/50
INFO:tensorflow:Assets written to: cars1_resnet_cubs_arch2/assets




Epoch 27/50
INFO:tensorflow:Assets written to: cars1_resnet_cubs_arch2/assets




Epoch 28/50
INFO:tensorflow:Assets written to: cars1_resnet_cubs_arch2/assets




Epoch 29/50
Epoch 30/50
INFO:tensorflow:Assets written to: cars1_resnet_cubs_arch2/assets




Epoch 31/50
INFO:tensorflow:Assets written to: cars1_resnet_cubs_arch2/assets




Epoch 32/50
INFO:tensorflow:Assets written to: cars1_resnet_cubs_arch2/assets




Epoch 33/50
INFO:tensorflow:Assets written to: cars1_resnet_cubs_arch2/assets




Epoch 34/50
INFO:tensorflow:Assets written to: cars1_resnet_cubs_arch2/assets




Epoch 35/50
INFO:tensorflow:Assets written to: cars1_resnet_cubs_arch2/assets




Epoch 36/50
INFO:tensorflow:Assets written to: cars1_resnet_cubs_arch2/assets




Epoch 37/50
INFO:tensorflow:Assets written to: cars1_resnet_cubs_arch2/assets




Epoch 38/50
INFO:tensorflow:Assets written to: cars1_resnet_cubs_arch2/assets




Epoch 39/50
INFO:tensorflow:Assets written to: cars1_resnet_cubs_arch2/assets




Epoch 40/50
INFO:tensorflow:Assets written to: cars1_resnet_cubs_arch2/assets




Epoch 41/50
INFO:tensorflow:Assets written to: cars1_resnet_cubs_arch2/assets




Epoch 42/50
INFO:tensorflow:Assets written to: cars1_resnet_cubs_arch2/assets




Epoch 43/50
INFO:tensorflow:Assets written to: cars1_resnet_cubs_arch2/assets




Epoch 44/50
INFO:tensorflow:Assets written to: cars1_resnet_cubs_arch2/assets




Epoch 45/50
INFO:tensorflow:Assets written to: cars1_resnet_cubs_arch2/assets




Epoch 46/50
INFO:tensorflow:Assets written to: cars1_resnet_cubs_arch2/assets




Epoch 47/50
INFO:tensorflow:Assets written to: cars1_resnet_cubs_arch2/assets




Epoch 48/50
INFO:tensorflow:Assets written to: cars1_resnet_cubs_arch2/assets




Epoch 49/50
INFO:tensorflow:Assets written to: cars1_resnet_cubs_arch2/assets




Epoch 50/50
INFO:tensorflow:Assets written to: cars1_resnet_cubs_arch2/assets




In [None]:
print(resnet_cubs_arch2_history.history['val_recall_k8'])

[0.198908731341362, 0.2744295597076416, 0.2544642984867096, 0.232514888048172, 0.123511902987957, 0.01587301678955555, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]


In [None]:
### Custom Loss Function
def sim_loss(y_true, y_pred, sample_weight=None):

  # Hard positive/negative boundary
  pos = 0.75
  neg = 0.25

  alpha = 5
  beta = 50
  lam = 0.5

  losses = np.zeros(y_pred.shape[1])

  # print(y_pred.shape) (128, 512)
  arrs = np.array([x.numpy() for x in y_pred])
  arrs = np.nan_to_num(arrs)
  cos_sims = cosine_similarity(arrs)

  out_sum = 0
  for i in range(cos_sims.shape[0]):
    sims = cos_sims[i]

    sample_positives = np.array([x for x in sims if x >= pos])
    pos_sum = np.sum(np.exp(-alpha * (sample_positives-lam)))
    pos_brack = (1/alpha) * np.log(1 + pos_sum)

    sample_negatives = np.array([x for x in sims if x <= neg])
    neg_sum = np.sum(np.exp(beta * (sample_negatives-lam)))
    neg_brack = (1/beta) * np.log(1 + neg_sum)
    
    out_sum += (pos_brack + neg_brack)

  loss = out_sum/y_pred.shape[0]

  return np.full(cos_sims.shape[0], loss)

In [None]:
### Crop classification layer and add a new single 512 dimensional embedding
def cubs1(x):
        """CUBS 1 block"""
        h = x.shape[1]
        w = x.shape[2]
        c = x.shape[3]

        N = 512

        gap_out = GlobalAveragePooling2D()(x)
        d1_out = Dense(N)(gap_out)
        d2_out = Dense(N)(gap_out)
        d3_out = Dense(N)(gap_out)

        d1_out_t = tf.tile(tf.expand_dims(d1_out, 1), [1, N, 1])
        d2_out_t = tf.tile(tf.expand_dims(d2_out, 2), [1, 1, N])
        sim_mat = tf.math.squared_difference(d1_out_t, d2_out_t)

        soft_out = Activation(activations.softmax)(sim_mat)

        feats = tf.matmul(soft_out, tf.expand_dims(d3_out, 2))
        feats = tf.transpose(feats, [0,2,1])
  
        d4_out = Dense(c)(feats)
        gap_out = tf.expand_dims(gap_out, 1)
        pre_sig = tf.add(d4_out, gap_out)
        sig_out = Activation(activations.sigmoid)(pre_sig)

        sig_out = tf.tile(tf.expand_dims(sig_out, 1), [1,h,w,1])
        out = tf.multiply(x, sig_out)

        return out


def cubs2(x):
        """CUBS 2 block"""
        h = x.shape[1]
        w = x.shape[2]
        c = x.shape[3]

        c1_out = Conv2D(1, (1,1))(x)
        c2_out = Conv2D(1, (1,1))(x)
        c3_out = Conv2D(1, (1,1))(x)

        c1_out_t = tf.tile(tf.squeeze(c1_out), [1, h, w])
        c2_out_t = tf.tile(tf.squeeze(c2_out), [1, h, w])
        sim_mat = tf.math.squared_difference(c1_out_t, c2_out_t)

        soft_out = Activation(activations.softmax)(sim_mat)

        c3_out_t = tf.tile(tf.squeeze(c3_out), [1, h, w])

        feats = tf.multiply(soft_out, c3_out_t)
        feats = tf.reduce_sum(feats, axis=1)

        sig_out = Activation(activations.sigmoid)(feats)

        sig_out_e = tf.expand_dims(sig_out, 1)
        sig_out_e = tf.expand_dims(sig_out_e, 1)

        sig_out_t = tf.tile(sig_out_e, [1,h,w,c//(h*w)])

        out = tf.multiply(x, sig_out_t)

        return out


def _bn_relu(x, bn_name=None, relu_name=None):
    """Helper to build a BN -> relu block
    """
    norm = BatchNormalization(axis=CHANNEL_AXIS, name=bn_name)(x)
    return Activation("relu", name=relu_name)(norm)


def _conv_bn_relu(**conv_params):
    """Helper to build a conv -> BN -> relu residual unit activation function.
       This is the original ResNet v1 scheme in https://arxiv.org/abs/1512.03385
    """
    filters = conv_params["filters"]
    kernel_size = conv_params["kernel_size"]
    strides = conv_params.setdefault("strides", (1, 1))
    dilation_rate = conv_params.setdefault("dilation_rate", (1, 1))
    conv_name = conv_params.setdefault("conv_name", None)
    bn_name = conv_params.setdefault("bn_name", None)
    relu_name = conv_params.setdefault("relu_name", None)
    kernel_initializer = conv_params.setdefault("kernel_initializer", "he_normal")
    padding = conv_params.setdefault("padding", "same")
    kernel_regularizer = conv_params.setdefault("kernel_regularizer", l2(1.e-4))

    def f(x):
        x = Conv2D(filters=filters, kernel_size=kernel_size,
                   strides=strides, padding=padding,
                   dilation_rate=dilation_rate,
                   kernel_initializer=kernel_initializer,
                   kernel_regularizer=kernel_regularizer,
                   name=conv_name)(x)
        return _bn_relu(x, bn_name=bn_name, relu_name=relu_name)

    return f


def _bn_relu_conv(**conv_params):
    """Helper to build a BN -> relu -> conv residual unit with full pre-activation
    function. This is the ResNet v2 scheme proposed in
    http://arxiv.org/pdf/1603.05027v2.pdf
    """
    filters = conv_params["filters"]
    kernel_size = conv_params["kernel_size"]
    strides = conv_params.setdefault("strides", (1, 1))
    dilation_rate = conv_params.setdefault("dilation_rate", (1, 1))
    conv_name = conv_params.setdefault("conv_name", None)
    bn_name = conv_params.setdefault("bn_name", None)
    relu_name = conv_params.setdefault("relu_name", None)
    kernel_initializer = conv_params.setdefault("kernel_initializer", "he_normal")
    padding = conv_params.setdefault("padding", "same")
    kernel_regularizer = conv_params.setdefault("kernel_regularizer", l2(1.e-4))

    def f(x):
        activation = _bn_relu(x, bn_name=bn_name, relu_name=relu_name)
        return Conv2D(filters=filters, kernel_size=kernel_size,
                      strides=strides, padding=padding,
                      dilation_rate=dilation_rate,
                      kernel_initializer=kernel_initializer,
                      kernel_regularizer=kernel_regularizer,
                      name=conv_name)(activation)

    return f


def _shortcut(input_feature, residual, conv_name_base=None, bn_name_base=None):
    """Adds a shortcut between input and residual block and merges them with "sum"
    """
    # Expand channels of shortcut to match residual.
    # Stride appropriately to match residual (width, height)
    # Should be int if network architecture is correctly configured.
    input_shape = K.int_shape(input_feature)
    residual_shape = K.int_shape(residual)
    stride_width = int(round(input_shape[ROW_AXIS] / residual_shape[ROW_AXIS]))
    stride_height = int(round(input_shape[COL_AXIS] / residual_shape[COL_AXIS]))
    equal_channels = input_shape[CHANNEL_AXIS] == residual_shape[CHANNEL_AXIS]

    shortcut = input_feature
    # 1 X 1 conv if shape is different. Else identity.
    if stride_width > 1 or stride_height > 1 or not equal_channels:
        print('reshaping via a convolution...')
        if conv_name_base is not None:
            conv_name_base = conv_name_base + '1'
        shortcut = Conv2D(filters=residual_shape[CHANNEL_AXIS],
                          kernel_size=(1, 1),
                          strides=(stride_width, stride_height),
                          padding="valid",
                          kernel_initializer="he_normal",
                          kernel_regularizer=l2(0.0001),
                          name=conv_name_base)(input_feature)
        if bn_name_base is not None:
            bn_name_base = bn_name_base + '1'
        shortcut = BatchNormalization(axis=CHANNEL_AXIS,
                                      name=bn_name_base)(shortcut)

    return add([shortcut, residual])


def _residual_block(block_function, filters, blocks, stage,
                    transition_strides=None, transition_dilation_rates=None,
                    dilation_rates=None, is_first_layer=False, dropout=None,
                    residual_unit=_bn_relu_conv):
    """Builds a residual block with repeating bottleneck blocks.
       stage: integer, current stage label, used for generating layer names
       blocks: number of blocks 'a','b'..., current block label, used for generating
            layer names
       transition_strides: a list of tuples for the strides of each transition
       transition_dilation_rates: a list of tuples for the dilation rate of each
            transition
    """
    if transition_dilation_rates is None:
        transition_dilation_rates = [(1, 1)] * blocks
    if transition_strides is None:
        transition_strides = [(1, 1)] * blocks
    if dilation_rates is None:
        dilation_rates = [1] * blocks

    def f(x):
        for i in range(blocks):
            is_first_block = is_first_layer and i == 0
            x = block_function(filters=filters, stage=stage, block=i,
                               transition_strides=transition_strides[i],
                               dilation_rate=dilation_rates[i],
                               is_first_block_of_first_layer=is_first_block,
                               dropout=dropout,
                               residual_unit=residual_unit)(x)
        return x

    return f


def _block_name_base(stage, block):
    """Get the convolution name base and batch normalization name base defined by
    stage and block.
    If there are less than 26 blocks they will be labeled 'a', 'b', 'c' to match the
    paper and keras and beyond 26 blocks they will simply be numbered.
    """
    if block < 27:
        block = '%c' % (block + 97)  # 97 is the ascii number for lowercase 'a'
    conv_name_base = 'res' + str(stage) + block + '_branch'
    bn_name_base = 'bn' + str(stage) + block + '_branch'
    return conv_name_base, bn_name_base


def basic_block(filters, stage, block, transition_strides=(1, 1),
                dilation_rate=(1, 1), is_first_block_of_first_layer=False, dropout=None,
                residual_unit=_bn_relu_conv):
    """Basic 3 X 3 convolution blocks for use on resnets with layers <= 34.
    Follows improved proposed scheme in http://arxiv.org/pdf/1603.05027v2.pdf
    """
    def f(input_features):
        conv_name_base, bn_name_base = _block_name_base(stage, block)
        if is_first_block_of_first_layer:
            # don't repeat bn->relu since we just did bn->relu->maxpool
            x = Conv2D(filters=filters, kernel_size=(3, 3),
                       strides=transition_strides,
                       dilation_rate=dilation_rate,
                       padding="same",
                       kernel_initializer="he_normal",
                       kernel_regularizer=l2(1e-4),
                       name=conv_name_base + '2a')(input_features)
        else:
            x = residual_unit(filters=filters, kernel_size=(3, 3),
                              strides=transition_strides,
                              dilation_rate=dilation_rate,
                              conv_name_base=conv_name_base + '2a',
                              bn_name_base=bn_name_base + '2a')(input_features)

        if dropout is not None:
            x = Dropout(dropout)(x)

        x = residual_unit(filters=filters, kernel_size=(3, 3),
                          conv_name_base=conv_name_base + '2b',
                          bn_name_base=bn_name_base + '2b')(x)

        return _shortcut(input_features, x)

    return f


def bottleneck(filters, stage, block, transition_strides=(1, 1),
               dilation_rate=(1, 1), is_first_block_of_first_layer=False, dropout=None,
               residual_unit=_bn_relu_conv):
    """Bottleneck architecture for > 34 layer resnet.
    Follows improved proposed scheme in http://arxiv.org/pdf/1603.05027v2.pdf
    Returns:
        A final conv layer of filters * 4
    """
    def f(input_feature):
        conv_name_base, bn_name_base = _block_name_base(stage, block)
        if is_first_block_of_first_layer:
            # don't repeat bn->relu since we just did bn->relu->maxpool
            x = Conv2D(filters=filters, kernel_size=(1, 1),
                       strides=transition_strides,
                       dilation_rate=dilation_rate,
                       padding="same",
                       kernel_initializer="he_normal",
                       kernel_regularizer=l2(1e-4),
                       name=conv_name_base + '2a')(input_feature)
        else:
            x = residual_unit(filters=filters, kernel_size=(1, 1),
                              strides=transition_strides,
                              dilation_rate=dilation_rate,
                              conv_name_base=conv_name_base + '2a',
                              bn_name_base=bn_name_base + '2a')(input_feature)

        if dropout is not None:
            x = Dropout(dropout)(x)

        x = residual_unit(filters=filters, kernel_size=(3, 3),
                          conv_name_base=conv_name_base + '2b',
                          bn_name_base=bn_name_base + '2b')(x)

        if dropout is not None:
            x = Dropout(dropout)(x)

        x = residual_unit(filters=filters * 4, kernel_size=(1, 1),
                          conv_name_base=conv_name_base + '2c',
                          bn_name_base=bn_name_base + '2c')(x)

        return _shortcut(input_feature, x)

    return f


def _handle_dim_ordering():
    global ROW_AXIS
    global COL_AXIS
    global CHANNEL_AXIS
    if K.image_data_format() == 'channels_last':
        ROW_AXIS = 1
        COL_AXIS = 2
        CHANNEL_AXIS = 3
    else:
        CHANNEL_AXIS = 1
        ROW_AXIS = 2
        COL_AXIS = 3


def _string_to_function(identifier):
    if isinstance(identifier, six.string_types):
        res = globals().get(identifier)
        if not res:
            raise ValueError('Invalid {}'.format(identifier))
        return res
    return identifier


def ResNet(input_shape=None, classes=10, block='basic', residual_unit='v1',
           repetitions=None, initial_filters=64, activation='softmax', include_top=True,
           input_tensor=None, dropout=0.5, transition_dilation_rate=(1, 1),
           initial_strides=(1, 1), initial_kernel_size=(3, 3), initial_pooling=None,
           final_pooling=None, top='classification', cubs_arch=1):
    """Builds a custom ResNet like architecture. Defaults to ResNet50 v2.
    Args:
        input_shape: optional shape tuple, only to be specified
            if `include_top` is False (otherwise the input shape
            has to be `(224, 224, 3)` (with `channels_last` dim ordering)
            or `(3, 224, 224)` (with `channels_first` dim ordering).
            It should have exactly 3 dimensions,
            and width and height should be no smaller than 8.
            E.g. `(224, 224, 3)` would be one valid value.
        classes: The number of outputs at final softmax layer
        block: The block function to use. This is either `'basic'` or `'bottleneck'`.
            The original paper used `basic` for layers < 50.
        repetitions: Number of repetitions of various block units.
            At each block unit, the number of filters are doubled and the input size
            is halved. Default of None implies the ResNet50v2 values of [3, 4, 6, 3].
        residual_unit: the basic residual unit, 'v1' for conv bn relu, 'v2' for bn relu
            conv. See [Identity Mappings in
            Deep Residual Networks](https://arxiv.org/abs/1603.05027)
            for details.
        dropout: None for no dropout, otherwise rate of dropout from 0 to 1.
            Based on [Wide Residual Networks.(https://arxiv.org/pdf/1605.07146) paper.
        transition_dilation_rate: Dilation rate for transition layers. For semantic
            segmentation of images use a dilation rate of (2, 2).
        initial_strides: Stride of the very first residual unit and MaxPooling2D call,
            with default (2, 2), set to (1, 1) for small images like cifar.
        initial_kernel_size: kernel size of the very first convolution, (7, 7) for
            imagenet and (3, 3) for small image datasets like tiny imagenet and cifar.
            See [ResNeXt](https://arxiv.org/abs/1611.05431) paper for details.
        initial_pooling: Determine if there will be an initial pooling layer,
            'max' for imagenet and None for small image datasets.
            See [ResNeXt](https://arxiv.org/abs/1611.05431) paper for details.
        final_pooling: Optional pooling mode for feature extraction at the final
            model layer when `include_top` is `False`.
            - `None` means that the output of the model
                will be the 4D tensor output of the
                last convolutional layer.
            - `avg` means that global average pooling
                will be applied to the output of the
                last convolutional layer, and thus
                the output of the model will be a
                2D tensor.
            - `max` means that global max pooling will
                be applied.
        top: Defines final layers to evaluate based on a specific problem type. Options
            are 'classification' for ImageNet style problems, 'segmentation' for
            problems like the Pascal VOC dataset, and None to exclude these layers
            entirely.
    Returns:
        The keras `Model`.
    """
    if activation not in ['softmax', 'sigmoid', None]:
        raise ValueError('activation must be one of "softmax", "sigmoid", or None')
    if activation == 'sigmoid' and classes != 1:
        raise ValueError('sigmoid activation can only be used when classes = 1')
    if repetitions is None:
        repetitions = [3, 4, 6, 3]
    # Determine proper input shape
    input_shape = _obtain_input_shape(input_shape,
                                      default_size=32,
                                      min_size=8,
                                      data_format=K.image_data_format(),
                                      require_flatten=include_top)
    _handle_dim_ordering()
    if len(input_shape) != 3:
        raise Exception("Input shape should be a tuple (nb_channels, nb_rows, nb_cols)")

    if block == 'basic':
        block_fn = basic_block
    elif block == 'bottleneck':
        block_fn = bottleneck
    elif isinstance(block, six.string_types):
        block_fn = _string_to_function(block)
    else:
        block_fn = block

    if residual_unit == 'v2':
        residual_unit = _bn_relu_conv
    elif residual_unit == 'v1':
        residual_unit = _conv_bn_relu
    elif isinstance(residual_unit, six.string_types):
        residual_unit = _string_to_function(residual_unit)
    else:
        residual_unit = residual_unit

    # Permute dimension order if necessary
    if K.image_data_format() == 'channels_first':
        input_shape = (input_shape[1], input_shape[2], input_shape[0])
    # Determine proper input shape
    input_shape = _obtain_input_shape(input_shape,
                                      default_size=32,
                                      min_size=8,
                                      data_format=K.image_data_format(),
                                      require_flatten=include_top)

    img_input = Input(shape=input_shape, tensor=input_tensor)
    x = _conv_bn_relu(filters=initial_filters, kernel_size=initial_kernel_size,
                      strides=initial_strides)(img_input)
    if initial_pooling == 'max':
        x = MaxPooling2D(pool_size=(3, 3), strides=initial_strides, padding="same")(x)

    block = x
    filters = initial_filters
    for i, r in enumerate(repetitions):
        transition_dilation_rates = [transition_dilation_rate] * r
        transition_strides = [(1, 1)] * r
        if transition_dilation_rate == (1, 1):
            transition_strides[0] = (2, 2)
        block = _residual_block(block_fn, filters=filters,
                                stage=i, blocks=r,
                                is_first_layer=(i == 0),
                                dropout=dropout,
                                transition_dilation_rates=transition_dilation_rates,
                                transition_strides=transition_strides,
                                residual_unit=residual_unit)(block)
        filters *= 2

    # Last activation
    x = _bn_relu(block)

    ##############################
    ###        CUBS ARCHS      ###
    ##############################
    if cubs_arch == 1:
        x1 = cubs1(x)
        x2 = cubs2(x)
        x = add([x1,x2])
    elif cubs_arch == 2:
        x = cubs1(x)
        x = cubs2(x)
    elif cubs_arch == 3:
        x = cubs2(x)
        x = cubs1(x)

    # Classifier block
    if include_top and top == 'classification':
        x = GlobalAveragePooling2D()(x)
        # # New 512 embedding
        x = Dense(units=512, activation='relu',
                  kernel_initializer="he_normal")(x)
        # x = Dense(units=classes, activation=activation,
        #           kernel_initializer="he_normal")(x)
    elif include_top and top == 'segmentation':
        x = Conv2D(classes, (1, 1), activation='linear', padding='same')(x)

        if K.image_data_format() == 'channels_first':
            channel, row, col = input_shape
        else:
            row, col, channel = input_shape

        x = Reshape((row * col, classes))(x)
        x = Activation(activation)(x)
        x = Reshape((row, col, classes))(x)
    elif final_pooling == 'avg':
        x = GlobalAveragePooling2D()(x)
    elif final_pooling == 'max':
        x = GlobalMaxPooling2D()(x)

    model = Model(inputs=img_input, outputs=x)
    return model


def ResNet18_cubs_arch1(input_shape, classes):
    """ResNet18 with cubs arch 1"""
    return ResNet(input_shape, classes, basic_block, repetitions=[2, 2, 2, 2], cubs_arch=1)

def ResNet18_cubs_arch2(input_shape, classes):
    """ResNet18 with cubs arch 1"""
    return ResNet(input_shape, classes, basic_block, repetitions=[2, 2, 2, 2], cubs_arch=2)

def ResNet18_cubs_arch3(input_shape, classes):
    """ResNet18 with cubs arch 1"""
    return ResNet(input_shape, classes, basic_block, repetitions=[2, 2, 2, 2], cubs_arch=3)

# Build ResNet18 CUBS arch Models
device_name = tf.test.gpu_device_name()
print(device_name)
with tf.device(device_name):
   resnet_cubs_arch2 = ResNet18_cubs_arch2((64, 64, 3), 196)

# Create callbacks for training
checkpoint_path = 'cars_resnet18_cubs_arch2_embedding_loss'
cp_callback = tf.keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_path,
    save_weights_only=False,
    monitor='val_recall_k8',
    mode='max',
    save_best_only=True)

es = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=5)

sgd = keras.optimizers.SGD(learning_rate=1, decay=1e-6, momentum=0.9, nesterov=True)

# resnet = keras.models.load_model('resnet18')

resnet_cubs_arch2.compile(loss=sim_loss, optimizer=sgd, metrics=[recall_k1, recall_k2, recall_k4, recall_k8])
resnet_cubs_arch2.summary()

/device:GPU:0
reshaping via a convolution...
reshaping via a convolution...
reshaping via a convolution...


2021-11-05 12:49:48.266195: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:923] could not open file to read NUMA node: /sys/bus/pci/devices/0000:27:00.0/numa_node
Your kernel may have been built without NUMA support.
2021-11-05 12:49:48.267172: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:923] could not open file to read NUMA node: /sys/bus/pci/devices/0000:27:00.0/numa_node
Your kernel may have been built without NUMA support.
2021-11-05 12:49:48.267635: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:923] could not open file to read NUMA node: /sys/bus/pci/devices/0000:27:00.0/numa_node
Your kernel may have been built without NUMA support.
2021-11-05 12:49:48.268518: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:923] could not open file to read NUMA node: /sys/bus/pci/devices/0000:27:00.0/numa_node
Your kernel may have been built without NUMA support.
2021-11-05 12:49:48.268531: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1594] Could not ident

reshaping via a convolution...
Model: "model_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_2 (InputLayer)            [(None, 64, 64, 3)]  0                                            
__________________________________________________________________________________________________
conv2d_23 (Conv2D)              (None, 64, 64, 64)   1792        input_2[0][0]                    
__________________________________________________________________________________________________
batch_normalization_21 (BatchNo (None, 64, 64, 64)   256         conv2d_23[0][0]                  
__________________________________________________________________________________________________
activation_21 (Activation)      (None, 64, 64, 64)   0           batch_normalization_21[0][0]     
_____________________________________________________________

In [None]:
### Train w/ new embedding + custom loss & report validation recall at 1,2,4,8
cars_embedding_history = resnet_cubs_arch2.fit(cars_train_generator, epochs=20, validation_data=cars_val_generator, callbacks=[cp_callback, es])



Epoch 1/20





2021-11-05 12:52:12.467971: W tensorflow/python/util/util.cc:348] Sets are not currently considered sequences, but this may change in the future, so consider avoiding using them.


INFO:tensorflow:Assets written to: cars_resnet18_cubs_arch2_embedding_loss/assets




Epoch 2/20
INFO:tensorflow:Assets written to: cars_resnet18_cubs_arch2_embedding_loss/assets




Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [None]:
print(cars_embedding_history.history['val_recall_k8'])

[0.0824652761220932, 0.28125, 0.28125, 0.28125, 0.28125, 0.28125, 0.28125, 0.28125, 0.28125, 0.28125, 0.28125, 0.28125, 0.28125, 0.28125, 0.28125, 0.28125, 0.28125, 0.28125, 0.28125, 0.28125]
