In [None]:
import tensorflow as tf
tf.config.experimental.list_physical_devices()

In [None]:
import os

os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2"

import argparse
import pandas as pd
import xml.etree.ElementTree as ET
import matplotlib.pyplot as plt 
import matplotlib.patches as patches

import numpy as np
import cv2
from glob import glob
from tensorflow.keras.callbacks import ModelCheckpoint, CSVLogger, ReduceLROnPlateau, EarlyStopping, TensorBoard
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import BinaryCrossentropy, SparseCategoricalCrossentropy,CategoricalCrossentropy
from sklearn.preprocessing import LabelEncoder

from tqdm import tqdm
from sklearn.metrics import accuracy_score
from tensorflow.keras.models import load_model, Model
# from tensorflow.keras import models
from tensorflow.keras.layers import *


def xml_to_csv(path):
    xml_list = []
    for file in os.scandir(path):
        if file.is_file() and file.name.endswith(('.xml')):
            xml_file = os.path.join(path, file.name)
            tree = ET.parse(xml_file)
            root = tree.getroot()
            for member in root.findall('object'):
                value = (root.find('filename').text,
                        int(root.find('size')[0].text),
                        int(root.find('size')[1].text),
                        member[0].text,
                        int(member[5][0].text),
                        int(member[5][1].text),
                        int(member[5][2].text),
                        int(member[5][3].text) )
                xml_list.append(value)

    column_name = ['filename', 'width', 'height', 'class', 'xmin', 'xmax', 'ymin', 'ymax']
    xml_df = pd.DataFrame(xml_list, columns=column_name)
    return xml_df


if __name__ == "__main__":
    path_train = '/kaggle/input/custom-dataset-new/train'
    path_valid = '/kaggle/input/custom-dataset-new/valid'
    path_test = '/kaggle/input/custom-dataset-new/test'
    
    train = xml_to_csv(path_train)
    valid = xml_to_csv(path_valid)
    test = xml_to_csv(path_test)
    
    print('Successfully converted xml to csv.')

In [None]:
train.head()

In [None]:
label_encoder = LabelEncoder()
original_values = train['class'].unique()

train['class_encoded'] = label_encoder.fit_transform(train['class'])
valid['class_encoded'] = label_encoder.fit_transform(valid['class'])
test['class_encoded'] = label_encoder.fit_transform(test['class'])

In [None]:
pre_dict = train[['class', 'class_encoded']].drop_duplicates()
class_dict= pd.Series(pre_dict.class_encoded.values, index=pre_dict['class']).to_dict()
class_dict = sorted(class_dict.items(), key=lambda x:x[1])
class_dict = dict(class_dict)
class_dict

In [None]:
def ShowImage(data, path, number=None):
    fig, axes = plt.subplots(number, 2, figsize=(20, 60))
    for i in range(number):  
        img = plt.imread(os.path.join(path, data['filename'][i]))
        x, y, width, height  = data['xmin'][i], data['ymin'][i], data['xmax'][i]-data['xmin'][i], data['ymax'][i]-data['ymin'][i]
        rect = patches.Rectangle((x, y),
                             width, height,
                             linewidth = 2,
                             edgecolor = 'r',
                             facecolor = 'none')
        axes[i, 0].imshow(img)
        axes[i, 1].imshow(img)
        axes[i, 1].add_patch(rect)
        axes[i, 0].set_title(train['class'][i])
        axes[i, 1].set_title(train['class'][i])
        axes[i, 0].axis("off")
        axes[i, 1].axis("off")
    plt.tight_layout()
    plt.show()

if __name__ == "__main__":
    ShowImage(train , path_train, number=6) 

In [None]:
global height
global width
global num_classes
global AUTOTUNE

def load_labels(data):
    names = data['class'].unique().tolist()
    return names

def load_data(path, data, classes, p_data=None):
    images = []
    bboxes = []
    labels = []
    
    
    for index, row in data.iterrows():
        name = row['filename']
        x1 = int(row['xmin'])
        y1 = int(row['ymin'])
        x2 = int(row['xmax'])
        y2 = int(row['ymax'])
        label = int(row['class_encoded'])
        
        if p_data == 'train':
            image = os.path.join(path, "train", name)
        elif p_data == 'valid':
            image = os.path.join(path, "valid" , name)
        else:
            image = os.path.join(path, "test" , name)

        bbox = [x1, y1, x2, y2]

        images.append(image)
        bboxes.append(bbox)
        labels.append(label)

    return images, bboxes, labels

def read_image_bbox(path, bbox, label_index):
    path = path.decode()
    image = cv2.imread(path, cv2.IMREAD_COLOR)
    h, w, _ = image.shape
    image = cv2.resize(image, (width, height))
    image = (image - 127.5) / 127.5 ## [-1, +1]
    image = image.astype(np.float32)

    x1, y1, x2, y2 = bbox

    norm_x1 = float(x1/w)
    norm_y1 = float(y1/h)
    norm_x2 = float(x2/w)
    norm_y2 = float(y2/h)
    norm_bbox = np.array([norm_x1, norm_y1, norm_x2, norm_y2], dtype=np.float32)

    label = [0] * num_classes
    label[label_index] = 1 #Fix here
    class_label = np.array(label, dtype=np.float32)

    return image, norm_bbox, class_label

def parse(image, bbox, label):
    image, bbox, label = tf.numpy_function(read_image_bbox, [image, bbox, label], [tf.float32, tf.float32, tf.float32])
    image.set_shape((height, width, 3))
    bbox.set_shape((4))
    label.set_shape((num_classes))
    return (image), (bbox, label)

def tf_dataset(images, bboxes, labels, batch_size, shuffle=False):
    ds = tf.data.Dataset.from_tensor_slices((images, bboxes, labels))
    ds = ds.map(parse, num_parallel_calls=AUTOTUNE)
    if shuffle:
        ds = ds.cache().shuffle(1000).batch(batch_size).prefetch(buffer_size=AUTOTUNE)
    else:
        ds = ds.cache().batch(batch_size).prefetch(buffer_size=AUTOTUNE)   
    return ds

def load_dataset(path, data_type, classes):
    
    train, valid, test = data_type[0], data_type[1], data_type[2]
    
    train_images, train_bboxes, train_labels = load_data(path, train, classes, 'train')
    
    valid_images, valid_bboxes, valid_labels = load_data(path, valid, classes, 'valid')
    
    test_images, test_bboxes, test_labels = load_data(path, test, classes, 'test')

    return (train_images, train_bboxes, train_labels), (valid_images, valid_bboxes, valid_labels), (test_images, test_bboxes, test_labels)

if __name__ == "__main__":

    height = 320
    width = 320
    batch_size = 32
    AUTOTUNE = tf.data.AUTOTUNE
    
    data_type = [train, valid, test]
    path = '/kaggle/input/custom-dataset-new'
    
    classes = load_labels(train)
    num_classes = len(classes)
    
    (train_images, train_bboxes, train_labels), (valid_images, valid_bboxes, valid_labels), (test_images, test_bboxes, test_labels) = load_dataset(path, data_type, classes)
    print(f"Classes: {classes}")
    print(f"Total class : {num_classes}")
    print(f"Train: {len(train_images)} - {len(train_bboxes)} - {len(train_labels)}")
    print(f"Valid: {len(valid_images)} - {len(valid_bboxes)} - {len(valid_labels)}")
    print(f"Test : {len(test_images)} - {len(test_bboxes)} - {len(test_labels)}")

In [None]:
train_ds = tf_dataset(train_images, train_bboxes, train_labels, batch_size, shuffle=True)
valid_ds = tf_dataset(valid_images, valid_bboxes, valid_labels, batch_size )

In [None]:
for batch in train_ds.take(1):  
    (images) ,(bboxes, labels) = batch
    for i in range(len(images)): 
        image = images[i].numpy()  
        image = (image * 127.5 + 127.5).astype('uint8')  
        bbox = bboxes[i].numpy()  
        label = labels[i].numpy() 
        plt.figure(figsize=(8, 8))
        plt.imshow(image[...,::-1])
        h, w, _ = image.shape
        x1, y1, x2, y2 = bbox * [w, h, w, h]  
        plt.gca().add_patch(plt.Rectangle((x1, y1), x2 - x1, y2 - y1, linewidth=2, edgecolor='red', facecolor='none'))
        class_id = label.argmax()
        text = [i for i in class_dict if class_dict[i]== int(class_id)]
        plt.title(f"Label: {text[0]}")
        plt.axis('off')
        plt.show()

In [None]:
# class Mobilenet_V2():
#     def __init__(self, *, inp_shape = (320,320,3), rho = 1.0 , alpha = 1.0, expansion = 6.0, classes = 2, droppout = 0.0):
#         assert alpha > 0 and alpha <= 1 ,'Error, my Mobilenet_V2 can only accept  alpha > 0 and alpha <= 1'
#         assert rho > 0 and rho <= 1 ,'Error, my Mobilenet_V2 can only accept  rho > 0 and rho <= 1'
#         self._inp_shape = inp_shape
#         self._rho = rho
#         self._alpha = alpha
#         self._expansion = expansion
#         self._classes = classes
#         self._droppout = droppout
#     def _depthwiseconv(self, *, strides: int):
#         return models.Sequential([
#             DepthwiseConv2D(kernel_size= (3,3), strides= strides, padding= 'same' if strides == 1 else 'valid', use_bias= False),
#             BatchNormalization(),
#             ReLU(max_value= 6.)
#         ])
#     def _pointwiseconv(self, *, filters: int, linear: bool):
#         layer = models.Sequential([
#             Conv2D(filters= int(filters * self._alpha), kernel_size= (1,1), strides= (1,1), padding= 'same', use_bias= False),
#             BatchNormalization(),
#         ])
#         if linear == False:
#             layer.add(ReLU(max_value= 6.))
#         return layer
#     def _standardconv(self):
#         return models.Sequential([
#             Conv2D(filters= 32, kernel_size= (3,3), strides= (2,2), use_bias= False),
#             BatchNormalization(),
#             ReLU(max_value= 6.)
#         ])
#     def _inverted_residual_block_(self, x, *, strides_depthwise: int, filter_pointwise: int, expansion: int):
#         filter = int(filter_pointwise * self._alpha)
#         fx = self._pointwiseconv(filters= filter * expansion, linear= False)(x)
#         fx = self._depthwiseconv(strides= strides_depthwise)(fx)
#         fx = self._pointwiseconv(filters= filter , linear= True)(fx)
#         if strides_depthwise == 1 and x.shape[-1] == filter_pointwise:
#             return add([fx,x])
#         else:
#             return fx
#     def _bottleneck_block_(self, x, *,  s: int, c: int, t: int, n: int):
#         x = self._inverted_residual_block_(x, strides_depthwise= s, filter_pointwise= c, expansion= t)
#         for i in range(n-1):
#             x = self._inverted_residual_block_(x, strides_depthwise= 1, filter_pointwise= c, expansion= t)
#         return x 
#     def build(self):
#         print("Loading model Mobilenetv2...")
#         feature_map = int(self._rho * self._inp_shape[0])
#         img_inp = Input(shape= (feature_map, feature_map,3))
#         x = self._standardconv()(img_inp)
#         x = self._bottleneck_block_(x, s= 1, c= 16, t= 1, n= 1)
#         x = self._bottleneck_block_(x, s= 2, c= 24, t= self._expansion, n= 2)
#         x = self._bottleneck_block_(x, s= 2, c= 32, t= self._expansion, n= 3)
#         x = self._bottleneck_block_(x, s= 2, c= 64, t= self._expansion, n= 4)
#         x = self._bottleneck_block_(x, s= 1, c= 96, t= self._expansion, n= 3)
#         x = self._bottleneck_block_(x, s= 2, c= 160, t= self._expansion, n= 3)
#         x = self._bottleneck_block_(x, s= 1, c= 320, t= self._expansion, n= 1)
#         x = self._pointwiseconv(filters= 1280, linear= False)(x)
#         x = GlobalAveragePooling2D()(x)
#         x = Dropout(self._droppout)(x)
        
#         bbox = Dense(4, activation="sigmoid", name="bbox")(x)
#         label = Dense(self._classes, activation='softmax', name="label")(x)
        
#         print("Success")
        
#         return models.Model(img_inp, outputs=[bbox, label])


In [None]:
# model = Mobilenet_V2(inp_shape=(height,width,3), rho=1.0, alpha=1.0, expansion=6.0, classes=2, droppout=0.5).build()

In [None]:
def InceptionV3():
    input_layer = Input(shape=(320 , 320 , 3))
    
    x = StemBlock(input_layer)
    
    x = InceptionBlock_A(prev_layer = x ,nbr_kernels = 32)
    x = InceptionBlock_A(prev_layer = x ,nbr_kernels = 64)
    x = InceptionBlock_A(prev_layer = x ,nbr_kernels = 64)
    
    x = ReductionBlock_A(prev_layer = x )
    
    x = InceptionBlock_B(prev_layer = x  , nbr_kernels = 128)
    x = InceptionBlock_B(prev_layer = x , nbr_kernels = 160)
    x = InceptionBlock_B(prev_layer = x , nbr_kernels = 160)
    x = InceptionBlock_B(prev_layer = x , nbr_kernels = 192)
    
#     Aux = auxiliary_classifier(prev_Layer = x)
    
    x = ReductionBlock_B(prev_layer = x)
    
    x = InceptionBlock_C(prev_layer = x)
    x = InceptionBlock_C(prev_layer = x)
    
    x = GlobalAveragePooling2D()(x)
    x = Dense(units=2048, activation='relu') (x)
    x = Dropout(rate = 0.2) (x)
    
    bbox = Dense(4, activation="sigmoid", name="bbox")(x)
    label = Dense(4, activation='softmax', name="label")(x)
    
    
    model = Model(inputs = input_layer , outputs = [bbox , label] , name = 'Inception-V3')
    
    return model


def conv_with_Batch_Normalisation(prev_layer , nbr_kernels , filter_Size , strides =(1,1) , padding = 'same'):
    x = Conv2D(filters=nbr_kernels, kernel_size = filter_Size, strides=strides , padding=padding)(prev_layer)
    x = BatchNormalization(axis=3)(x)
    x = Activation(activation='relu')(x)
    return x


def StemBlock(prev_layer):
    x = conv_with_Batch_Normalisation(prev_layer, nbr_kernels = 32, filter_Size=(3,3) , strides=(2,2))
    x = conv_with_Batch_Normalisation(x, nbr_kernels = 32, filter_Size=(3,3))
    x = conv_with_Batch_Normalisation(x, nbr_kernels = 64, filter_Size=(3,3))
    x = MaxPool2D(pool_size=(3,3) , strides=(2,2)) (x)
    x = conv_with_Batch_Normalisation(x, nbr_kernels = 80, filter_Size=(1,1))
    x = conv_with_Batch_Normalisation(x, nbr_kernels = 192, filter_Size=(3,3))
    x = MaxPool2D(pool_size=(3,3) , strides=(2,2)) (x)
    return x    
    

def InceptionBlock_A(prev_layer  , nbr_kernels):
    
    branch1 = conv_with_Batch_Normalisation(prev_layer, nbr_kernels = 64, filter_Size = (1,1))
    branch1 = conv_with_Batch_Normalisation(branch1, nbr_kernels=96, filter_Size=(3,3))
    branch1 = conv_with_Batch_Normalisation(branch1, nbr_kernels=96, filter_Size=(3,3))
    
    branch2 = conv_with_Batch_Normalisation(prev_layer, nbr_kernels=48, filter_Size=(1,1))
    branch2 = conv_with_Batch_Normalisation(branch2, nbr_kernels=64, filter_Size=(3,3)) # may be 3*3
    
    branch3 = AveragePooling2D(pool_size=(3,3) , strides=(1,1) , padding='same') (prev_layer)
    branch3 = conv_with_Batch_Normalisation(branch3, nbr_kernels = nbr_kernels, filter_Size = (1,1))
    
    branch4 = conv_with_Batch_Normalisation(prev_layer, nbr_kernels=64, filter_Size=(1,1))
    
    output = concatenate([branch1 , branch2 , branch3 , branch4], axis=3)
    
    return output


def InceptionBlock_B(prev_layer , nbr_kernels):
    
    branch1 = conv_with_Batch_Normalisation(prev_layer, nbr_kernels = nbr_kernels, filter_Size = (1,1))
    branch1 = conv_with_Batch_Normalisation(branch1, nbr_kernels = nbr_kernels, filter_Size = (7,1))
    branch1 = conv_with_Batch_Normalisation(branch1, nbr_kernels = nbr_kernels, filter_Size = (1,7))
    branch1 = conv_with_Batch_Normalisation(branch1, nbr_kernels = nbr_kernels, filter_Size = (7,1))    
    branch1 = conv_with_Batch_Normalisation(branch1, nbr_kernels = 192, filter_Size = (1,7))
    
    branch2 = conv_with_Batch_Normalisation(prev_layer, nbr_kernels = nbr_kernels, filter_Size = (1,1))
    branch2 = conv_with_Batch_Normalisation(branch2, nbr_kernels = nbr_kernels, filter_Size = (1,7))
    branch2 = conv_with_Batch_Normalisation(branch2, nbr_kernels = 192, filter_Size = (7,1))
    
    branch3 = AveragePooling2D(pool_size=(3,3) , strides=(1,1) , padding ='same') (prev_layer)
    branch3 = conv_with_Batch_Normalisation(branch3, nbr_kernels = 192, filter_Size = (1,1))
    
    branch4 = conv_with_Batch_Normalisation(prev_layer, nbr_kernels = 192, filter_Size = (1,1))
    
    output = concatenate([branch1 , branch2 , branch3 , branch4], axis = 3)
    
    return output    

    
def InceptionBlock_C(prev_layer):
    
    branch1 = conv_with_Batch_Normalisation(prev_layer, nbr_kernels = 448, filter_Size = (1,1))
    branch1 = conv_with_Batch_Normalisation(branch1, nbr_kernels = 384, filter_Size = (3,3))
    branch1_1 = conv_with_Batch_Normalisation(branch1, nbr_kernels = 384, filter_Size = (1,3))    
    branch1_2 = conv_with_Batch_Normalisation(branch1, nbr_kernels = 384, filter_Size = (3,1))
    branch1 = concatenate([branch1_1 , branch1_2], axis = 3)
    
    branch2 = conv_with_Batch_Normalisation(prev_layer, nbr_kernels = 384, filter_Size = (1,1))
    branch2_1 = conv_with_Batch_Normalisation(branch2, nbr_kernels = 384, filter_Size = (1,3))
    branch2_2 = conv_with_Batch_Normalisation(branch2, nbr_kernels = 384, filter_Size = (3,1))
    branch2 = concatenate([branch2_1 , branch2_2], axis = 3)
    
    branch3 = AveragePooling2D(pool_size=(3,3) , strides=(1,1) , padding='same')(prev_layer)
    branch3 = conv_with_Batch_Normalisation(branch3, nbr_kernels = 192, filter_Size = (1,1))
    
    branch4 = conv_with_Batch_Normalisation(prev_layer, nbr_kernels = 320, filter_Size = (1,1))
    
    output = concatenate([branch1 , branch2 , branch3 , branch4], axis = 3)
    
    return output


def ReductionBlock_A(prev_layer):
    
    branch1 = conv_with_Batch_Normalisation(prev_layer, nbr_kernels = 64, filter_Size = (1,1))
    branch1 = conv_with_Batch_Normalisation(branch1, nbr_kernels = 96, filter_Size = (3,3))
    branch1 = conv_with_Batch_Normalisation(branch1, nbr_kernels = 96, filter_Size = (3,3) , strides=(2,2) ) #, padding='valid'
    
    branch2 = conv_with_Batch_Normalisation(prev_layer, nbr_kernels = 384, filter_Size=(3,3) , strides=(2,2) )
    
    branch3 = MaxPool2D(pool_size=(3,3) , strides=(2,2) , padding='same')(prev_layer)
    
    output = concatenate([branch1 , branch2 , branch3], axis = 3)
    
    return output


def ReductionBlock_B(prev_layer):
    
    branch1 = conv_with_Batch_Normalisation(prev_layer, nbr_kernels = 192, filter_Size = (1,1))
    branch1 = conv_with_Batch_Normalisation(branch1, nbr_kernels = 192, filter_Size = (1,7))
    branch1 = conv_with_Batch_Normalisation(branch1, nbr_kernels = 192, filter_Size = (7,1))
    branch1 = conv_with_Batch_Normalisation(branch1, nbr_kernels = 192, filter_Size = (3,3) , strides=(2,2) , padding = 'valid')
    
    branch2 = conv_with_Batch_Normalisation(prev_layer, nbr_kernels = 192, filter_Size = (1,1) )
    branch2 = conv_with_Batch_Normalisation(branch2, nbr_kernels = 320, filter_Size = (3,3) , strides=(2,2) , padding='valid' )

    branch3 = MaxPool2D(pool_size=(3,3) , strides=(2,2) )(prev_layer)
    
    output = concatenate([branch1 , branch2 , branch3], axis = 3)
    
    return output


def auxiliary_classifier(prev_Layer):
    x = AveragePooling2D(pool_size=(5,5) , strides=(3,3)) (prev_Layer)
    x = conv_with_Batch_Normalisation(x, nbr_kernels = 128, filter_Size = (1,1))
    x = Flatten()(x)
    x = Dense(units = 768, activation='relu') (x)
    x = Dropout(rate = 0.2) (x)
    x = Dense(units = 4, activation='softmax') (x)
    return x

In [None]:
model = InceptionV3()

In [None]:
model.summary()

In [None]:
import math

def ciou_loss(target,  output):
    
    target = target * tf.cast(target != 0, tf.float32)
    output = output * tf.cast(target != 0, tf.float32)

    x1g, y1g, x2g, y2g = tf.split(value=target, num_or_size_splits=4, axis=1)
    x1, y1, x2, y2 = tf.split(value=output, num_or_size_splits=4, axis=1)
    
    w_pred = x2 - x1
    h_pred = y2 - y1
    w_gt = x2g - x1g
    h_gt = y2g - y1g

    x_center = (x2 + x1) / 2
    y_center = (y2 + y1) / 2
    x_center_g = (x1g + x2g) / 2
    y_center_g = (y1g + y2g) / 2

    xc1 = tf.minimum(x1, x1g)
    yc1 = tf.minimum(y1, y1g)
    xc2 = tf.maximum(x2, x2g)
    yc2 = tf.maximum(y2, y2g)
    
    ###iou term###
    xA = tf.maximum(x1g, x1)
    yA = tf.maximum(y1g, y1)
    xB = tf.minimum(x2g, x2)
    yB = tf.minimum(y2g, y2)

    interArea = tf.maximum(0.0, (xB - xA + 1)) * tf.maximum(0.0, yB - yA + 1)

    boxAArea = (x2g - x1g +1) * (y2g - y1g +1)
    boxBArea = (x2 - x1 +1) * (y2 - y1 +1)

    iouk = interArea / (boxAArea + boxBArea - interArea + 1e-10)
    ###
    
    ###distance term###
    c = ((xc2 - xc1) ** 2) + ((yc2 - yc1) ** 2) +1e-7
    d = ((x_center - x_center_g) ** 2) + ((y_center - y_center_g) ** 2)
    u = d / c
    ###

    ###aspect-ratio term###
    arctan = tf.atan(w_gt/(h_gt + 1e-10))-tf.atan(w_pred/(h_pred + 1e-10))
    v = (4 / (math.pi ** 2)) * tf.pow((tf.atan(w_gt/(h_gt + 1e-10))-tf.atan(w_pred/(h_pred + 1e-10))),2)
    S = 1 - iouk
    alpha = v / (S + v + 1e-10)
    w_temp = 2 * w_pred
    ar = (8 / (math.pi ** 2)) * arctan * ((w_pred - w_temp) * h_pred)
    ###
    
    ###calculate ciou###
    ciouk = iouk - (u + alpha * ar)
    ciouk = (1 - ciouk)
    ###
    
    return ciouk

In [None]:
lr = 1e-4  
num_epochs = 200

model.compile(
    loss = {
#         "bbox": BinaryCrossentropy(from_logits=False),
        "bbox": ciou_loss,
        "label": CategoricalCrossentropy(from_logits=False) ,
    },
    optimizer=Adam(lr),
    metrics={
        "bbox": ['acc'], 
        "label": ['acc'] 
    }
)

callbacks = [
    ModelCheckpoint('best_model.keras', verbose=1, save_best_only=True),
    ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=20, min_lr=1e-6, verbose=1),
    CSVLogger('log.csv', append=True),
    EarlyStopping(monitor='val_loss', patience=50, restore_best_weights=False, verbose=1),
]

history= model.fit(
    train_ds,
    epochs=num_epochs,
    validation_data=valid_ds,
    callbacks=callbacks
)
model.save('final_model.keras')

In [None]:
plt.figure(figsize=(18,6))

plt.subplot(1, 3, 1)
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model loss')
plt.ylabel('loss')
plt.xlabel('epochs')
plt.legend(['train','valid'], loc='upper right')

plt.subplot(1, 3, 2)
plt.title('Model bbox accuracy')
plt.ylabel('accuracy')
plt.xlabel('epochs')
plt.plot(history.history['bbox_acc'])
plt.plot(history.history['val_bbox_acc'])
plt.legend(['train','valid'], loc='upper right')

plt.subplot(1, 3, 3)
plt.title('Model label accuracy')
plt.ylabel('accuracy')
plt.xlabel('epochs')
plt.plot(history.history['label_acc'])
plt.plot(history.history['val_label_acc'])
plt.legend(['train','valid'], loc='upper right')

plt.show()

In [None]:
def cal_iou(y_true, y_pred):
    x1 = max(y_true[0], y_pred[0])
    y1 = max(y_true[1], y_pred[1])
    x2 = min(y_true[2], y_pred[2])
    y2 = min(y_true[3], y_pred[3])

    intersection_area = max(0, x2 - x1 + 1) * max(0, y2 - y1 + 1)

    true_area = (y_true[2] - y_true[0] + 1) * (y_true[3] - y_true[1] + 1)
    bbox_area = (y_pred[2] - y_pred[0] + 1) * (y_pred[3] - y_pred[1] + 1)

    iou = intersection_area / float(true_area + bbox_area - intersection_area)
    return iou

if __name__ == "__main__":
    
    model = tf.keras.models.load_model('/kaggle/working/best_model.keras', custom_objects={"ciou_loss":ciou_loss})

    print(f"Test : {len(test_images)} - {len(test_bboxes)} - {len(test_labels)}")

    mean_iou = []
    pred_labels = []
    images = []

    for image, true_bbox, true_labels in tqdm(zip(test_images, test_bboxes, test_labels), total=len(test_images)):
        name = image.split("/")[-1]

        image = cv2.imread(image, cv2.IMREAD_COLOR)
        x = cv2.resize(image, (width, height))
        x = (x - 127.5) / 127.5
        x = np.expand_dims(x, axis=0)
        
        true_x1, true_y1, true_x2, true_y2 = true_bbox

        pred_bbox, label = model.predict(x, verbose=0)
        pred_bbox = pred_bbox[0]
#         print(pred_bbox)
        label_index = np.argmax(label[0])
        pred_labels.append(label_index)
#         print(label_index)
#         print(true_labels)

        pred_x1 = int(pred_bbox[0] * image.shape[1])
        pred_y1 = int(pred_bbox[1] * image.shape[0])
        pred_x2 = int(pred_bbox[2] * image.shape[1])
        pred_y2 = int(pred_bbox[3] * image.shape[0])

        iou = cal_iou(true_bbox, [pred_x1, pred_y1, pred_x2, pred_y2])
        mean_iou.append(iou)

        image = cv2.rectangle(image, (true_x1, true_y1), (true_x2, true_y2), (255, 0, 0), 2) ## BLUE
        image = cv2.rectangle(image, (pred_x1, pred_y1), (pred_x2, pred_y2), (0, 0, 255), 2) ## RED
        
        font_size = 1.5
        pred_class_name = [i for i in class_dict if class_dict[i]== int(label_index)]
        cv2.putText(image, str(pred_class_name[0]) + ' '+ '(pred)', (80, 300), cv2.FONT_HERSHEY_SIMPLEX, font_size, (0, 0, 255), 2)

        font_size = 1.5
        true_class_name = [i for i in class_dict if class_dict[i]== int(true_labels)]

        cv2.putText(image, str(true_class_name[0])+ ' '+ '(true)', (80, 350), cv2.FONT_HERSHEY_SIMPLEX, font_size, (255, 0, 0), 2)

        font_size = 1.5
        cv2.putText(image, f"IoU: {iou:.4f}", (80, 400), cv2.FONT_HERSHEY_SIMPLEX, font_size, (255, 0, 0), 2)
        images.append(image)

    score = np.mean(mean_iou, axis=0)
    mean_acc = accuracy_score(test_labels, pred_labels)
    print(f"Mean IoU: {score:.4f} - Acc: {mean_acc:.4f}")

In [None]:
import matplotlib.pyplot as plt

plt.figure(figsize=(20, 20)) 
for i in range(len(images[:100])):
    ax = plt.subplot(10, 10, i + 1) 
    plt.imshow(images[i])
    plt.axis("off")

plt.subplots_adjust(wspace=0, hspace=0)
plt.show()