In [10]:
import warnings
warnings.filterwarnings("ignore")

In [None]:
!pip install opencv-python==4.8.0.76


In [None]:
import os
import cv2
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [2]:
IMAGE_DIR = '../dataset/images'
ANNOTATIONS_DIR = '../dataset/annotations'

class_names = [d for d in os.listdir(IMAGE_DIR) if os.path.isdir(os.path.join(IMAGE_DIR, d))]
csv_files = [f for f in os.listdir(ANNOTATIONS_DIR) if f.endswith('.csv')]

In [3]:
class_names

['airplane', 'face', 'motorcycle']

In [4]:
csv_files

['airplane.csv', 'face.csv', 'motorcycle.csv']

In [None]:
images = []
labels = []
annotations = []

for i in range(len(class_names)):
    class_name = class_names[i]
    class_dir = os.path.join(IMAGE_DIR, class_name)

    csv_file_name = csv_files[i]
    csv_path = os.path.join(ANNOTATIONS_DIR, csv_file_name)
    df = pd.read_csv(csv_path)

    for image_name in os.listdir(class_dir):
            image_path = os.path.join(class_dir, image_name)

            image = cv2.imread(image_path)
            h, w, _ = image.shape

            ann = df[df['image_name'] == image_name].iloc[0,1:].tolist()

            ann[0] = ann[0] / w
            ann[1] = ann[1] / h
            ann[2] = ann[2] / w
            ann[3] = ann[3] / h

            image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
            image = cv2.resize(image, (224, 224))

            images.append(image)
            labels.append([class_names.index(class_name)])
            annotations.append([ann])


In [None]:
images = np.array(images)
labels = np.array(labels)
annotations = np.array(annotations)

In [None]:
images.shape

In [None]:
i = 0

img = images[i]
bbox = (annotations[i][0] * 224).astype(int)
img_ann = cv2.rectangle(img.copy(),bbox[:2],bbox[2:],color=(0,0,255),thickness=2)
plt.imshow(img_ann)

In [None]:
import keras
import tensorflow as tf
from tensorflow.keras.layers import Flatten, Dropout, Dense, Input, MaxPool2D, Conv2D
from tensorflow.keras.models import Model

In [None]:
# input
input_model = Input(shape=(224, 224, 3), name='input_layer')

# Conv
conv = Conv2D(filters=32, kernel_size=(3,3), strides=1, padding='same', activation='relu')(input_model)
conv = MaxPool2D()(conv)
conv = Conv2D(filters=64, kernel_size=(3,3), strides=1, padding='same', activation='relu')(conv)
conv = MaxPool2D()(conv)
conv = Conv2D(filters=128, kernel_size=(3,3), strides=1, padding='same', activation='relu')(conv)
conv = MaxPool2D()(conv)
conv = Conv2D(filters=256, kernel_size=(3,3), strides=1, padding='same', activation='relu')(conv)
conv = MaxPool2D()(conv)
conv = Conv2D(filters=512, kernel_size=(3,3), strides=1, padding='same', activation='relu')(conv)
conv = MaxPool2D()(conv)

# flatten
flatten = Flatten()(conv)

# class label
clf = Dense(128, activation="relu")(flatten)
clf = Dropout(0.5)(clf)
clf = Dense(3, activation="softmax", name="class")(clf)

# bounding box
bbox = Dense(128, activation="relu")(flatten)
bbox = Dropout(0.5)(bbox)
bbox = Dense(4, activation="sigmoid", name="bbox")(bbox)

# make model
model = Model(inputs=input_model, outputs=(clf, bbox))

In [None]:
with open("../models/model_summary.txt", "w", encoding="utf-8") as f:
    model.summary(print_fn=lambda x: f.write(x + "\n"))

In [None]:

opt = tf.optimizers.Adam(learning_rate=0.001)

loss = {
    'class':tf.losses.SparseCategoricalCrossentropy(),
    'bbox': tf.losses.MeanSquaredError()}

loss_weights = {
    'class': 1.0,
    'bbox': 1.0
}

metrics = {
    'class': tf.metrics.SparseCategoricalAccuracy(),
    'bbox': tf.metrics.MeanSquaredError()
}

model.compile(optimizer=opt,loss=loss,metrics=metrics,loss_weights=loss_weights)

In [None]:
callbacks = [keras.callbacks.ModelCheckpoint(filepath='../models/best_model.keras',monitor='val_loss'),
            keras.callbacks.ReduceLROnPlateau(monitor="val_loss" ,factor=0.1,patience=12)]

In [None]:
X_train = images / 255
y_train = [labels,annotations]

In [None]:
history = model.fit(X_train, y_train,batch_size=32,epochs=100,validation_split=0.15,verbose=1,callbacks=callbacks)

In [None]:
model = keras.saving.load_model('../models/best_model.keras')

In [None]:
image_path = '../test/images.jpg'
image = cv2.imread(image_path)
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
image = cv2.resize(image, (224, 224))

X = tf.expand_dims(image, axis=0) / 255

In [None]:
plt.imshow(image)
plt.show()

In [None]:
class_perd, bbox_pred = model.predict(X)
class_pred_label = class_names[class_perd.argmax()]

In [None]:
bbox_pred = (bbox_pred * 224).astype('int')
cord1 = bbox_pred[0][:2]
cord2 = bbox_pred[0][2:]
img_test_bbox = cv2.rectangle(image,cord1,cord2,color=(0,0,255),thickness=2)
img_test_bbox = cv2.putText(img_test_bbox,class_pred_label,cord1,cv2.FONT_HERSHEY_COMPLEX,0.6,color=(0,0,255))

In [None]:
plt.imshow(img_test_bbox)