In [21]:
import os
import cv2
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers, models
import xml.etree.ElementTree as ET

## Function to Parse xml files

In [22]:
def parse_annotation(xml_file):
    print("XML File Path:", xml_path)
    tree= ET.parse(xml_file)
    root= tree.getroot()
    objects= root.findall('object')
    annotations= []

    for obj in objects:
        name= obj.find('name').text
        bbox= obj.find('bndbox')
        xmin= int(bbox.find('xmin').text)
        ymin= int(bbox.find('ymin').text)
        xmax= int(bbox.find('xmax').text)
        ymax= int(bbox.find('ymax').text)

        image_height= int(root.find('size').find('height').text)
        image_width= int(root.find('size').find('width').text)
        image_channels= 3


        annotations.append((xmin, ymin, xmax, ymax))

    return annotations

## Function to Preprocess Data: Read, Resize, Rescale, Parse annotations

In [23]:
def preprocess_data(image_path, xml_path, target_size= (224, 224)):
    img= cv2.imread(image_path)
    img= cv2.resize(img, target_size)
    img= img/(255.0)
    annotations= parse_annotation(xml_path)
    return img, annotations

## Function to Create Bounding Box for the Target Object

In [24]:
def create_bound_box(img,bbox):
    
    print(bbox)
    xmin, ymin, xmax, ymax= bbox
    
    cv2.rectangle(img, (xmin, ymin), (xmax, ymax), (0, 0 , 255), 3)
    
    return img

## Uploading Train and Test Data

In [25]:
folder_train= r'C:\Users\sushant.gupta\Desktop\Project Work\Image Dataset\Train'

In [26]:
folder_test= r'C:\Users\sushant.gupta\Desktop\Project Work\Image Dataset\Test'

## Image Augmentation

In [27]:
def H_flip_image_and_bbox(image, bbox):
    # Horizontally flip the image
    flipped_image = tf.keras.layers.RandomFlip(mode= "horizontal", seed= 42)(image)
    
    # Adjust bounding box coordinates
    image_width = tf.cast(tf.shape(image)[1], tf.float32)
    xmin, ymin, xmax, ymax = tf.unstack(bbox, axis=-1)
    ymin= tf.cast(ymin, tf.float32)
    ymax= tf.cast(ymax, tf.float32)
    flipped_xmin = image_width - tf.cast(xmax, tf.float32)
    flipped_xmax = image_width - tf.cast(xmin, tf.float32)
    flipped_bbox = tf.stack([flipped_xmin, ymin, flipped_xmax, ymax], axis=-1)
    
    return flipped_image, flipped_bbox

In [28]:
def V_flip_image_and_bbox(image, bbox):
    # Horizontally flip the image
    flipped_image = tf.keras.layers.RandomFlip(mode= "vertical", seed= 42)(image)
    
    # Adjust bounding box coordinates
    image_height = tf.cast(tf.shape(image)[0], tf.float32)
    xmin, ymin, xmax, ymax = tf.unstack(bbox, axis=-1)
    xmin= tf.cast(xmin, tf.float32)
    xmax= tf.cast(xmax, tf.float32)
    flipped_ymin = image_height - tf.cast(ymax, tf.float32)
    flipped_ymax = image_height - tf.cast(ymin, tf.float32)
    flipped_bbox = tf.stack([xmin, flipped_ymin, xmax, flipped_ymax], axis=-1)
    
    return flipped_image, flipped_bbox

## Loading Training Images and Annotations:

In [29]:
train_images= []
train_annotations= []

In [30]:
for filename in os.listdir(folder_train):
    if filename.endswith('.jpg'):
        image_path= os.path.join(folder_train, filename)
        xml_filename= filename[: -4] + '.xml'
        xml_path= os.path.join(folder_train, xml_filename)

        if os.path.exists(xml_path):
            img, annotations= preprocess_data(image_path, xml_path)
            train_images.append(img)
            train_annotations.append(annotations)
            h_img, h_bbx= H_flip_image_and_bbox(img, annotations)
            train_images.append(h_img)
            train_annotations.append(h_bbx)
            v_img, v_bbx= V_flip_image_and_bbox(img, annotations)
            train_images.append(v_img)
            train_annotations.append(v_bbx)
            contrast_img= tf.keras.layers.RandomContrast(factor= 0.2, seed= 42)(img)
            train_images.append(img)
            train_annotations.append(annotations)
            
        else:
            print(f"There exists no xml_file for the png_file {filename}.")

XML File Path: C:\Users\sushant.gupta\Desktop\Project Work\Image Dataset\Train\FLIR0277.xml
XML File Path: C:\Users\sushant.gupta\Desktop\Project Work\Image Dataset\Train\FLIR0278.xml
XML File Path: C:\Users\sushant.gupta\Desktop\Project Work\Image Dataset\Train\FLIR0279.xml
XML File Path: C:\Users\sushant.gupta\Desktop\Project Work\Image Dataset\Train\FLIR0280.xml
XML File Path: C:\Users\sushant.gupta\Desktop\Project Work\Image Dataset\Train\FLIR0281.xml
XML File Path: C:\Users\sushant.gupta\Desktop\Project Work\Image Dataset\Train\FLIR0282.xml
XML File Path: C:\Users\sushant.gupta\Desktop\Project Work\Image Dataset\Train\FLIR0288.xml
XML File Path: C:\Users\sushant.gupta\Desktop\Project Work\Image Dataset\Train\FLIR0289.xml
XML File Path: C:\Users\sushant.gupta\Desktop\Project Work\Image Dataset\Train\FLIR0290.xml
XML File Path: C:\Users\sushant.gupta\Desktop\Project Work\Image Dataset\Train\FLIR0291.xml
There exists no xml_file for the png_file FLIR0315.jpg.
XML File Path: C:\Users\

### Converting Training Dataset into Array Format

In [34]:
x_train= np.array(train_images)
y_train= np.array(train_annotations)

MemoryError: Unable to allocate 1.25 GiB for an array with shape (1112, 224, 224, 3) and data type float64

In [None]:
x_train.shape

In [None]:
y_train.shape

## Custom Intersection over Union Metric

In [None]:
def iou_metric(y_ground, y_predn):

    y_ground= tf.cast(y_ground, tf.float32)
    y_predn= tf.cast(y_predn, tf.float32)

    # Coordinates of Intersection
    x1= tf.maximum(y_ground[..., 0], y_predn[..., 0])
    y1= tf.maximum(y_ground[..., 1], y_predn[..., 1])
    x2= tf.minimum(y_ground[..., 2], y_predn[..., 2])
    y2= tf.minimum(y_ground[..., 3], y_predn[..., 3])

    # Area of Intersection
    intrstn_area= tf.maximum(0.0, x2-x1)*tf.maximum(0.0, y2-y1)

    # Area of union of boxes
    ground_area= (y_ground[..., 2] - y_ground[..., 0])*(y_ground[...,3] - y_ground[..., 1])
    predn_area= (y_predn[..., 2] - y_predn[..., 0])*(y_predn[...,3] - y_predn[..., 1])
    un_area= ground_area + predn_area - intrstn_area
    
    int_over_un= intrstn_area/un_area

    return tf.reduce_mean(int_over_un)

## Pretrained Model for Transfer Learning

In [None]:
base_model= tf.keras.applications.xception.Xception(weights= 'imagenet', include_top= False)
avg= tf.keras.layers.GlobalAveragePooling2D()(base_model.output)
loc_output= tf.keras.layers.Dense(4)(avg)
model= tf.keras.Model(inputs= base_model.input, outputs= loc_output)
model.compile(loss= 'mse', optimizer='adam', metrics=[iou_metric])

In [None]:
model.fit(x_train, y_train, epochs= 10)

## Unfreezing Lower Layers for Learning

In [None]:
for layer in base_model.layers[56:]:
    layer.trainable= True

In [None]:
model.compile(loss= 'mse', optimizer='adam', metrics=[iou_metric])

In [None]:
# Display the summary of the base model to view its architecture and input shape
model.fit(x_train, y_train, epochs= 15)

In [None]:
train_annotations[:6]

## Preparing Test Data

In [None]:
test_images= []
test_annotations= []

In [None]:
for filename in os.listdir(folder_test):
    if filename.endswith('.jpg'):
        img_path= os.path.join(folder_test, filename)
        xml_path= os.path.join(folder_test, filename[:-4] + '.xml')

        if os.path.exists(xml_path):
            img, annotations= preprocess_data(img_path, xml_path)
            test_images.append(img)
            test_annotations.append(annotations)
        else:
            print(f"There exists no xml_file for the png_file {filename}.")

In [None]:
x_test= np.array(test_images)
y_test= np.array(test_annotations)

In [None]:
predictions= model.predict(x_test)

In [None]:
predictions

In [None]:
fin_predictions= np.round(predictions).astype(int)

In [None]:
for i in range(len(fin_predictions)):
    image_with_boxes = create_bound_box(test_images[i].copy(), fin_predictions[i])
    cv2.imshow("Image with Bounding Boxes", image_with_boxes)
    cv2.waitKey(0)
    cv2.destroyAllWindows()
    