# Custom CNN to show how YOLOv5 works 

In [None]:
import numpy as np
import cv2
import os
import imageio
import keras
import random
import shutil
import matplotlib.pyplot as plt

from tensorflow.keras import backend as K
import tensorflow as tf

from IPython.display import display, Image

from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from sklearn.preprocessing import LabelEncoder

from keras import regularizers
from keras.models import Sequential
from keras.layers import Dense, Conv2D, Flatten, MaxPooling2D, Activation, Dropout
from keras.utils import to_categorical

In [None]:
"""
Shuffle the images and labels
"""


# img_files = [f for f in os.listdir("data/images/") if f.endswith(".jpg") or f.endswith(".png")]
# label_files = [f.replace(".jpg", ".txt").replace(".png", ".txt") for f in img_files]

# img_label_pairs = list(zip(img_files, label_files))

# random.shuffle(img_label_pairs)

# if not os.path.exists("shuffled_data/images"):
#     os.makedirs("shuffled_data/images")
# if not os.path.exists("shuffled_data/labels"):
#     os.makedirs("shuffled_data/labels")

# for i, (img, label) in enumerate(img_label_pairs):
#     shutil.copy2("data/images/" + img, "shuffled_data/images/img_{}.jpg".format(i))
#     shutil.copy2("data/labels/" + label, "shuffled_data/labels/img_{}.txt".format(i))

In [None]:
"""
Split the shuffled dataset into training and validation images
"""


# images = os.listdir("shuffled_data/images")
# labels = os.listdir("shuffled_data/labels")

# images_train, images_val, labels_train, labels_val = train_test_split(images, labels, test_size=0.2, random_state=42)

# # Create the training and validation folders if they don't already exist
# if not os.path.exists("shuffled_data/train"):
#     os.makedirs("shuffled_data/train/images")
#     os.makedirs("shuffled_data/train/labels")

# if not os.path.exists("shuffled_data/valid"):
#     os.makedirs("shuffled_data/valid/images")
#     os.makedirs("shuffled_data/valid/labels")

# # Copy the training images and labels to the training folder
# for image, label in zip(images_train, labels_train):
#     shutil.copy2(f"shuffled_data/images/{image}", "shuffled_data/train/images/")
#     shutil.copy2(f"shuffled_data/labels/{label}", "shuffled_data/train/labels/")

# # Copy the validation images and labels to the validation folder
# for image, label in zip(images_val, labels_val):
#     shutil.copy2(f"shuffled_data/images/{image}", "shuffled_data/valid/images/")
#     shutil.copy2(f"shuffled_data/labels/{label}", "shuffled_data/valid/labels/")

In [None]:
"""
Get the shape of the training and validation images to confirm correct size
Expected output: (747, 187)
"""

train_path =  "shuffled_data/train/images"
valid_path =  "shuffled_data/valid/images"

def get_shape(train_path, valid_path):
    train_size = len([f for f in os.listdir(train_path) if f.endswith('.jpg')])
    val_size = len([f for f in os.listdir(valid_path) if f.endswith('.jpg')])


    return train_size, val_size

get_shape(train_path, valid_path)

In [None]:
"""
Visualise the labels on random images
"""

# Change coords from 0-1 to pixle location. Label files in the format (name, xc, yc, w, h)
def denormalize_coords(xc, yc, w, h, image_width, image_height):
    x1 = int((xc - w / 2) * image_width)
    y1 = int((yc - h / 2) * image_height)
    x2 = int((xc + w / 2) * image_width)
    y2 = int((yc + h / 2) * image_height)
    return x1, y1, x2, y2

# Show the bounding box by plotting a cv2 rectangle at the denormalized coords
def display_bbox(image_names):
    for image_name in image_names:
        image_path = "shuffled_data/train/images/{}".format(image_name.replace('.txt', '.jpg'))
        label_path = "shuffled_data/train/labels/{}".format(image_name)

        image = cv2.imread(image_path)
        image_height, image_width, _ = image.shape

        with open(label_path, "r") as file:
            lines = file.readlines()

        for line in lines:
            class_name, xc, yc, w, h = line.strip().split(' ')

            x1, y1, x2, y2 = denormalize_coords(float(xc), float(yc), float(w), float(h), image_width, image_height)
            cv2.rectangle(image, (x1, y1), (x2, y2), (150, 150, 0), 2)
            cv2.putText(image, class_name, (x1, y1), cv2.FONT_HERSHEY_SIMPLEX, 1, (150, 150, 0), 2, cv2.LINE_AA)
            print("Normalized points: ", xc, yc, w, h)
            print("Denormalized points: ", x1, y1, x2, y2)
            
        # save output images to /output_images/
        cv2.imwrite("output_images/{}.jpg".format(image_name), image)
        display(Image("output_images/{}.jpg".format(image_name)))


image_names = [f for f in os.listdir("shuffled_data/train/labels/") if f.endswith(".txt")]
random_image_names = random.sample(image_names, 3)

display_bbox(random_image_names)
print(random_image_names)

In [None]:
"""
Load the images and labels as an np.array to be used in the model
    Expected train size: (747, 416, 416, 3) 
    Expected valid size: (187, 416, 416, 3)
    Expected names size: 24
"""

def get_images(path):
    images = []
    for filename in os.listdir(path):
        if filename.endswith(".jpg"):
            image = imageio.imread(os.path.join(path, filename))
            images.append(image)
    return np.array(images)

def get_labels(path):
    labels = []
    for filename in os.listdir(path):
        if filename.endswith(".txt"):
            with open(os.path.join(path, filename), 'r') as f:
                name, xc, yc, x1, y1 = f.read().strip().split()
                labels.append(name)
    label_encoder = LabelEncoder()
    label_encoder.fit(labels)
    one_hot_labels = to_categorical(label_encoder.transform(labels))
    return one_hot_labels

def get_names(path):
    integers = []
    for filename in os.listdir(path):
        if filename.endswith(".txt"):
            with open(os.path.join(path, filename), 'r') as f:
                integer = int(f.read().strip().split()[0])
                integers.append(integer)
    unique_name = set(integers)
    return len(unique_name)


train_images_path = "shuffled_data/train/images"
train_labels_path = "shuffled_data/train/labels"
valid_images_path = "shuffled_data/valid/images"
valid_labels_path = "shuffled_data/valid/labels"

x_train = get_images(train_images_path)
y_train = get_labels(train_labels_path)
x_val = get_images(valid_images_path)
y_val = get_labels(valid_labels_path)
num_classes = get_names(train_labels_path)

print("Train image size: ", x_train.shape, "\nTrain label size: ", x_train.shape)
print("\n\nValid image size: ", x_val.shape, "\nValid label size: ", x_val.shape)
print("\n\nClasses in dataset set: ", num_classes)

In [None]:
"""
Four layer CNN
Adam Optimizer
Binary Crossentropy with Logits Loss Function
Swish activation function
    Swish = x * sigmoid(x)
    Sigmoid = 1 / 1 + exp(-x)
"""

num_of_epochs, rr, dropout, lr, init_loss = 20, 0.1, 0.5, 0.01, 1
opt = keras.optimizers.Adam(learning_rate=0.001)

loss = tf.keras.losses.BinaryCrossentropy(from_logits=True)


def swish(x):
    return x * keras.activations.sigmoid(x)

model = Sequential()

# Add the first convolutional layer
model.add(Conv2D(32, (3,3), input_shape=(416, 416, 3)))
model.add(Activation(swish))
model.add(MaxPooling2D(pool_size=(2,2)))

# Add the second convolutional layer
model.add(Conv2D(64, (3,3)))   
model.add(Activation(swish))
model.add(MaxPooling2D(pool_size=(2,2)))

# Add the third convolutional layer
model.add(Conv2D(128, (3,3)))
model.add(Activation(swish))
model.add(MaxPooling2D(pool_size=(2,2)))

# Add the fourth convolutional layer
model.add(Conv2D(128, (3,3)))
model.add(Activation(swish))
model.add(MaxPooling2D(pool_size=(2,2)))

# Flatten the output from the convolutional layers
model.add(Flatten())

# Add the fully connected layer
model.add(Dense(128))
model.add(Activation(swish))

# Add the output layer
model.add(Dense(num_classes, activation=None))


# Compile the model
model.compile(loss=loss, optimizer=opt, metrics=['accuracy'])

# Train the model
history = model.fit(x_train, y_train, batch_size=16, epochs=num_of_epochs, validation_data=(x_val, y_val))

In [None]:
"""
To display the number of layers, parameters, abd gradients of the model
Warnings about large numbers are disabled
"""
import warnings

def display_model_info(model):
    num_layers = len(model.layers)
    num_params = model.count_params()
    trainable_params = np.sum([K.count_params(w) for w in model.trainable_weights])
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        num_gradients = trainable_params * num_classes * num_of_epochs
    print(f"Model Summary: {num_layers} layers, {num_params} parameters, {num_gradients} gradients")

display_model_info(model)


In [None]:
"""
Generates a confusion matrix based off the model
"""
# Generate predictions on the validation data
y_pred = model.predict(x_val)

# Convert the predictions into binary class labels
y_pred_class = np.round(y_pred)

# Calculate the confusion matrix
cm = confusion_matrix(y_val.argmax(axis=1), y_pred_class.argmax(axis=1))

# Plot the confusion matrix as a heatmap
plt.imshow(cm, cmap='Blues')
plt.colorbar()
plt.xlabel('Predicted labels')
plt.ylabel('True labels')
plt.show()

In [None]:
"""
Plots the models accuracy and loss
"""

def plot_loss_accuracy(history):

    train_loss = history.history['loss']
    val_loss = history.history['val_loss']
    train_acc = history.history['accuracy']
    val_acc = history.history['val_accuracy']

    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(10, 5))

    ax1.plot(train_loss, label='Training Loss')
    ax1.plot(val_loss, label='Validation Loss')
    ax1.set_xlabel('Epoch')
    ax1.set_ylabel('Loss')
    ax1.legend()

    ax2.plot(train_acc, label='Training Accuracy')
    ax2.plot(val_acc, label='Validation Accuracy')
    ax2.set_xlabel('Epoch')
    ax2.set_ylabel('Accuracy')
    ax2.legend()

    plt.show()

plot_loss_accuracy(history)

In [None]:
test_path = "shuffled_data/train/images/img_921.jpg"

image = cv2.imread(test_path)
image = cv2.resize(image, (416, 416))
image = np.array(image)
image = image / 255.0


image = np.expand_dims(image, axis=0)

# Make a prediction
prediction = model.predict(image)

# Print the predicted class
predicted_class = np.argmax(prediction)
print(f"The predicted class is {predicted_class}.")