# Multi-class object detection 

### This is a multi-class object detection problem. Let's solve it using state-of-the-art YOLOv8

In [1]:
# Importing necessary libraries
import os
import shutil
import cv2
import random
import matplotlib.pyplot as plt
import numpy as np
import copy
import wandb

In [None]:
# Installing ultralytics
!pip install ultralytics

[0m

In [None]:
from ultralytics import YOLO

You will need a unique API key to log in to Weights & Biases. 

1. If you don't have a Weights & Biases account, you can go to https://wandb.ai/site and create a FREE account.
2. Access your API key: https://wandb.ai/authorize.

There are two ways you can login using a Kaggle kernel:

1. Run a cell with `wandb.login()`. It will ask for the API key, which you can copy + paste in.
2. You can also use Kaggle secrets to store your API key and use the code snippet below to login. Check out this [discussion post](https://www.kaggle.com/product-feedback/114053) to learn more about Kaggle secrets. 



In [None]:
from kaggle_secrets import UserSecretsClient
user_secrets = UserSecretsClient()
secret_value_0 = user_secrets.get_secret("wandb_api")

In [None]:
wandb.login(key=secret_value_0)

In [None]:
# Defining data path and listing its contents
ip_datapath='/kaggle/input/african-wildlife'
os.listdir(ip_datapath)

In [None]:
# Defining standard image size for YOLO v8
image_size=640

# Creating working directories

In [None]:
# Creating paths for separate images and labels
curr_path=os.getcwd()
imgtrainpath = os.path.join(curr_path,'images','train')
imgvalpath=os.path.join(curr_path,'images','validation')
imgtestpath=os.path.join(curr_path,'images','test')

labeltrainpath=os.path.join(curr_path,'labels','train')
labelvalpath=os.path.join(curr_path,'labels','validation')
labeltestpath=os.path.join(curr_path,'labels','test')

# Creating directories for all paths defined
os.makedirs(imgtrainpath)
os.makedirs(imgvalpath)
os.makedirs(imgtestpath)
os.makedirs(labeltrainpath)
os.makedirs(labelvalpath)
os.makedirs(labeltestpath)

 Let's take images and labels of all the classes to working space and also rename them according to their class which can be buffalo, elephant, rhino or zebra.

In [None]:
for dirname in os.listdir(ip_datapath):
    dirpath=os.path.join(ip_datapath, dirname)
    for file in os.listdir(dirpath):
        filepath=os.path.join(dirpath, file)
        newname=dirname+'_'+file
        if file.endswith((".txt")): # if label file, take it to label train path
            shutil.copy(filepath, labeltrainpath)
            path=os.path.join(labeltrainpath, file)
            newpath=os.path.join(labeltrainpath, newname)
        elif file.endswith((".jpg", ".JPG")): # if image file, resize and take it to image train path
            img_resized=cv2.resize(cv2.imread(filepath), (image_size, image_size))
            path=os.path.join(imgtrainpath, file)
            cv2.imwrite(path, img_resized)
            newpath=os.path.join(imgtrainpath, newname)
        os.rename(path, newpath) # Rename the file (label or image)
                

In [None]:
# Checking the number of image and label files for all classes
len(os.listdir(imgtrainpath)), len(os.listdir(labeltrainpath))

# Extracting and visualising test images

Let's create a difficult test set to see how good our model can be.

1. Of all the images, we will include those images in test set which have objects of different classes. So all test images will have at least 2 objects of different classes. For example, a buffalo and a zebra in a single image is qualified to be part of the test data set. There can be multiple objects too. So 2 elephants and 1 buffalo in a single image is also perfectly valid.

2. For the training and validation set, we will have images of only one object class. For example, any image in training and validation set can have one type of animal only, say rhino. Although there can be multiple rhinos in the same image.



In [None]:
# function to check if all elements in the list are actually the same
def are_all_elements_same(lst):
    if not lst:
        return True  # An empty list is considered to have all elements the same.

    first_element = lst[0]
    for element in lst[1:]:
        if element != first_element:
            return False

    return True

Let's extract all the images with different types of animals present in the same image

In [None]:
for file in os.listdir(labeltrainpath):
    classes_list=[]
    with open(os.path.join(labeltrainpath, file), "r") as f:
        for line in f:
            class_id,_,_,_,_=line.strip().split(" ")
            classes_list.append(class_id) # creating list of all unique animal types in given image
            
    # Checking if different types of animals are present in image
    if not are_all_elements_same(classes_list): 
        filepath=os.path.join(labeltrainpath, file)
        newpath=os.path.join(labeltestpath, file)
        shutil.move(filepath, newpath) # moving label file to test path
        basename=os.path.splitext(file)[0]
        print(basename) # printing the image name
        imgfilename=basename+'.jpg'
        oldimgfilepath=os.path.join(imgtrainpath, imgfilename)
        newimgfilepath=os.path.join(imgtestpath, imgfilename) 
        shutil.move(oldimgfilepath, newimgfilepath) # moving image to test path

    

The above printed images are all those which have more than one type of animal in it, although the name suggests that there is just one animal. Let's check few of these images

In [None]:
plt.figure(figsize=(30,30))
for i in range(6):
    test_image=os.path.join(imgtestpath, os.listdir(imgtestpath)[i])
    ax=plt.subplot(3,2,i+1)
    
    # Display actual image
    plt.imshow(cv2.imread(test_image)) 
    plt.xticks([])
    plt.yticks([])

We can see how the above images have different types of animals in the same image

In [None]:
# Checking the size of test dataset
len(os.listdir(imgtestpath)), len(os.listdir(labeltestpath))

In [None]:
# Checking the size of training(+validation) dataset
len(os.listdir(imgtrainpath)), len(os.listdir(labeltrainpath))

# Separating training and validation dataset

Let's now partition the training dataset into training and validation dataset

In [None]:
# moving 20% of data to validation

factor=0.2 

for file in random.sample(os.listdir(imgtrainpath), int(len(os.listdir(imgtrainpath))*factor)):
    basename=os.path.splitext(file)[0]
    textfilename=basename+'.txt'
    labelfilepath=os.path.join(labeltrainpath, textfilename)
    labeldestpath=os.path.join(labelvalpath, textfilename)
    imgfilepath=os.path.join(imgtrainpath, file)
    imgdestpath=os.path.join(imgvalpath, file)
    shutil.move(imgfilepath, imgdestpath)
    shutil.move(labelfilepath, labeldestpath)

In [None]:
# Checking the size of training dataset
len(os.listdir(imgtrainpath)), len(os.listdir(labeltrainpath))

In [None]:
# Checking the size of validation dataset
len(os.listdir(imgvalpath)), len(os.listdir(labelvalpath))

# Image visualisation with bounding box

It's time to write a function to obtain bounding box coordinates of the images from the label files The label files contain information about image type, image centre coordinates and image size.

In [None]:
# function to obtain bounding box coordinates from text label files
def get_bbox_from_label(text_file_path):
    bbox_list=[]
    with open(text_file_path, "r") as file:
        for line in file:
            class_id,x_centre,y_centre,width,height=line.strip().split(" ")
            x1=(float(x_centre)+(float(width)/2))*image_size
            x0=(float(x_centre)-(float(width)/2))*image_size
            y1=(float(y_centre)+(float(height)/2))*image_size
            y0=(float(y_centre)-(float(height)/2))*image_size
            
            vertices=np.array([[int(x0), int(y0)], [int(x1), int(y0)], 
                               [int(x1),int(y1)], [int(x0),int(y1)]])
            bbox_list.append(vertices)      
    return tuple(bbox_list)

In [None]:
# defining red color in RGB to draw bounding box
red=(255,0,0) 

In [None]:
# Drawing bounding box for random images in training data
plt.figure(figsize=(30,30))
for i in range(1,8,2):
    k=random.randint(0, len(os.listdir(imgtrainpath))-1)
    img_path=os.path.join(imgtrainpath, sorted(os.listdir(imgtrainpath))[k])
    label_path=os.path.join(labeltrainpath, sorted(os.listdir(labeltrainpath))[k])
    bbox=get_bbox_from_label(label_path) # extracting bounding box coordinates
    image=cv2.imread(img_path)
    image_copy=copy.deepcopy(image)
    ax=plt.subplot(4, 2, i)
    plt.imshow(image) # displaying image
    plt.xticks([])
    plt.yticks([])
    cv2.drawContours(image_copy, bbox, -1, red, 2) # drawing bounding box on copy of image
    ax=plt.subplot(4, 2, i+1)
    plt.imshow(image_copy) # displaying image with bounding box
    plt.xticks([])
    plt.yticks([])

# Creating config file

In [None]:
# defining newline variable for config file
newline='\n'

Let's declare the contents of configuration file

In [None]:
# Starting with a comment in config file
ln_1='# Train/val/test sets'+newline

# train, val and test path declaration
ln_2='train: ' +"'"+imgtrainpath+"'"+newline
ln_3='val: ' +"'" + imgvalpath+"'"+newline
ln_4='test: ' +"'" + imgtestpath+"'"+newline
ln_5=newline
ln_6='# Classes'+newline

# names of the classes declaration
ln_7='names:'+newline
ln_8='  0: buffalo'+newline
ln_9='  1: elephant'+newline
ln_10='  2: rhino'+newline
ln_11='  3: zebra'

config_lines=[ln_1, ln_2, ln_3, ln_4, ln_5, ln_6, ln_7, ln_8, ln_9, ln_10, ln_11]

In [None]:
# Creating path for config file
config_path=os.path.join(curr_path, 'config.yaml')
config_path

In [None]:
# Writing config file
with open(config_path, 'w') as f:
    f.writelines(config_lines)

# Model training

In [None]:
# Using YOLO's pretrained model architecture and weights for training
model=YOLO('yolov8m.yaml').load('yolov8m.pt')

In [None]:
# Training the model
results=model.train(data=config_path, epochs=100, iou=0.5, conf=0.01)

Results can be converted to a zip file using the following command which is commented right now. This zip file can be downloaded later if results are to be analysed locally

In [None]:
# !zip -r results.zip /kaggle/working/runs/detect/train

mAP50 is the average precision value obtained by model at 50% IoU. This is the default metric used by YOLOv8 for object detection tasks.

Let's see how the training progressed with epochs by visualizing the plots

In [None]:
plt.figure(figsize=(30,30))
trainingresult_path=os.path.join(curr_path, 'runs', 'detect', 'train')
results_png=cv2.imread(os.path.join(trainingresult_path,'results.png'))
plt.imshow(results_png)

#### All losses- Box loss, class loss, dfl loss are decreasing with epochs.
#### All metrics- Precision, Recall, mAP50 and mAP50-95 are increasing with epochs

# Model Performance 

 Let's write functions for evaluating model metrics and displaying plots

In [None]:
# function for evaluating model metrics map50
def evaluate_map50(trainedmodel, data_path, dataset='val'):
    metrics=trainedmodel.val(data=data_path, split=dataset)
    map50=round(metrics.box.map50, 3)
    print("The mAP of model for all images on {0} dataset is {1}".format(dataset,map50))
    return metrics, map50

In [None]:
# function for displaying plots created by YOLO
def display_curves(root_path):
    plt.figure(figsize=(50,50))
    
    #displaying p curve
    p_curve=cv2.imread(os.path.join(root_path,'P_curve.png'))
    ax=plt.subplot(5,1,1)
    plt.imshow(p_curve)
    
    #displaying r curve
    r_curve=cv2.imread(os.path.join(root_path,'R_curve.png'))
    ax=plt.subplot(5,1,2)
    plt.imshow(r_curve)
    
    #displaying pr curve
    pr_curve=cv2.imread(os.path.join(root_path,'PR_curve.png'))
    ax=plt.subplot(5,1,3)
    plt.imshow(pr_curve)
    
    #displaying f1 curve
    f1_curve=cv2.imread(os.path.join(root_path,'F1_curve.png'))
    ax=plt.subplot(5,1,4)
    plt.imshow(f1_curve)
    
    #displaying confusion matrix
    confusion_matrix=cv2.imread(os.path.join(root_path,'confusion_matrix.png'))
    ax=plt.subplot(5,1,5)
    plt.imshow(confusion_matrix)
    

In [None]:
# Evaluating train metrics
train_metrics, train_map50=evaluate_map50(model, config_path, dataset='train')

Here we see class 'all' in addition to the 4 classes. 'all' here aggregates the true positives, false positives and false negatives across all classses to calculate a single precision and recall value for the entire dataset

Let me explain how P and R are defined for 'all':

Let's say I have the following counts for each class:

1. True Positives for Buffalo: TP_b

2. False Positives for Buffalo: FP_b

3. False Negatives for Buffalo: FN_b

Following the same for other classes as well: elephant, rhino, and zebra.

Now, I calculate the total true positives, total false positives, and total false negatives across all classes:

- Total True Positives (TP_t): TP_b + TP_z + TP_r + TP_e

- Total False Positives (FP_t): FP_b + FP_z + FP_r + FP_e

- Total False Negatives (FN_t): FN_b + FN_z + FN_r + FN_e

Now,

* P_all= TP_t/(TP_t + FP_t)
* R_all= TP_t/(TP_t + FN_t

In [None]:
# Path storing model's performance on training dataset
train_path=os.path.join(curr_path, 'runs', 'detect', 'val') #val is a misnomer, it is actually measuring validation on training dataset

In [None]:
# Display plots on training data
display_curves(train_path)

In [None]:
# Evaluating val metrics
val_metrics, val_map50=evaluate_map50(model, config_path, dataset='val')

In [None]:
# Path storing model's performance on validation dataset
val_path=os.path.join(curr_path, 'runs', 'detect', 'val2') 

In [None]:
# Display plots on validation data
display_curves(val_path)

In [None]:
# Evaluating test metrics
test_metrics, test_map50=evaluate_map50(model, config_path, dataset='test')

#### Obtained quite good mAP on a challenging test dataset

In [None]:
# Path storing model's performance on test dataset
test_path=os.path.join(curr_path, 'runs', 'detect', 'val3') #val3 is a misnomer, it is actually measuring validation on test dataset

In [None]:
# Display plots on test data
display_curves(test_path) 

The above plots are not smooth because we have very less data (only 18 images) in test dataset

# Visualizing model's performance on random test images

In [None]:
plt.figure(figsize=(30,30))
m=random.randint(0, 17) # Selecting random image number from 18 images in test dataset
for i in range(1,6,2):
    test_image=os.path.join(imgtestpath, os.listdir(imgtestpath)[m])
    ax=plt.subplot(3,2,i)
    
    # Display actual image
    plt.imshow(cv2.imread(test_image)) 
    plt.xticks([])
    plt.yticks([])
    plt.title("Actual image", fontsize = 40)
    
    # Predict 
    res = model(test_image)
    res_plotted = res[0].plot()
    ax=plt.subplot(3,2,i+1)
    
    # Display image with predictions
    plt.imshow(res_plotted)
    plt.title("Image with predictions", fontsize = 40)
    plt.xticks([])
    plt.yticks([])
    m=m+1

# Concluding remarks

Model's performance on a challenging test dataset is quite good as it is able to detect different types of wild animals present in the same image 