[![Fixel Algorithms](https://fixelalgorithms.co/images/CCExt.png)](https://fixelalgorithms.gitlab.io)

# AI for System Engineers and Project Managers

## Deep Learning - Computer Vision - Object Detection of Ships / Vessels

Training a YOLO Model for Ships / Vessels detection in aerial images.

> Notebook by:
> - Royi Avital RoyiAvital@fixelalgorithms.com

## Revision History

| Version | Date       | User        |Content / Changes                                                   |
|---------|------------|-------------|--------------------------------------------------------------------|
| 1.0.000 | 01/03/2025 | Royi Avital | First version                                                      |

[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/FixelAlgorithmsTeam/FixelCourses/blob/master/AIProgram/2024_02/0037FeaturesTransform.ipynb)

In [None]:
# Import Packages

# General Tools
import numpy as np
import scipy as sp
import pandas as pd

# Machine Learning

# Deep Learning
from ultralytics import YOLO
from ultralytics import settings as dYoloSettings

# Image Processing
from PIL import Image, ImageOps
import skimage as ski

# Miscellaneous
import math
import os
import pickle
from platform import python_version
import random
import onedrivedownloader #<! https://github.com/loribonna/onedrivedownloader

# Typing
from typing import Callable, Dict, List, Optional, Self, Set, Tuple, Union

# Visualization
import matplotlib as mpl
from matplotlib.patches import Rectangle
import matplotlib.pyplot as plt
import seaborn as sns

# Jupyter
from IPython import get_ipython

## Notations

* <font color='red'>(**?**)</font> Question to answer interactively.
* <font color='blue'>(**!**)</font> Simple task to add code for the notebook.
* <font color='green'>(**@**)</font> Optional / Extra self practice.
* <font color='brown'>(**#**)</font> Note / Useful resource / Food for thought.

Code Notations:

```python
someVar    = 2; #<! Notation for a variable
vVector    = np.random.rand(4) #<! Notation for 1D array
mMatrix    = np.random.rand(4, 3) #<! Notation for 2D array
tTensor    = np.random.rand(4, 3, 2, 3) #<! Notation for nD array (Tensor)
tuTuple    = (1, 2, 3) #<! Notation for a tuple
lList      = [1, 2, 3] #<! Notation for a list
dDict      = {1: 3, 2: 2, 3: 1} #<! Notation for a dictionary
oObj       = MyClass() #<! Notation for an object
dfData     = pd.DataFrame() #<! Notation for a data frame
dsData     = pd.Series() #<! Notation for a series
hObj       = plt.Axes() #<! Notation for an object / handler / function handler
```

### Code Exercise

 - Single line fill

 ```python
 vallToFill = ???
 ```

 - Multi Line to Fill (At least one)

```python
# You need to start writing
?????
```

 - Section to Fill

```python
#===========================Fill This===========================#
# 1. Explanation about what to do.
# !! Remarks to follow / take under consideration.
mX = ???

?????
#===============================================================#
```

In [None]:
# Configuration
# %matplotlib inline

seedNum = 512
np.random.seed(seedNum)
random.seed(seedNum)

# Matplotlib default color palette
lMatPltLibclr = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd', '#8c564b', '#e377c2', '#7f7f7f', '#bcbd22', '#17becf']
# sns.set_theme() #>! Apply SeaBorn theme

runInGoogleColab = 'google.colab' in str(get_ipython())

In [None]:
# Constants

FIG_SIZE_DEF    = (8, 8)
ELM_SIZE_DEF    = 50
CLASS_COLOR     = ('b', 'r')
EDGE_COLOR      = 'k'
MARKER_SIZE_DEF = 10
LINE_WIDTH_DEF  = 2

PROJECT_NAME     = 'FixelCourses'
DATA_FOLDER_PATH = 'DataSets'
BASE_FOLDER      = os.getcwd()[:len(os.getcwd()) - (os.getcwd()[::-1].lower().find(PROJECT_NAME.lower()[::-1]))]

L_IMG_EXT = ['.png', '.jpeg', '.jpg']

In [None]:
# Courses Packages



In [None]:
# General Auxiliary Functions

class YoloImageSet():
    def __init__(self: Self, folderPath: str, *, lImgExt: List[str] = L_IMG_EXT, imgFolderName: str = 'images', lblFolderName: str = 'labels') -> None:

        imgFolderPath = os.path.join(folderPath, imgFolderName)
        lblFolderPath = os.path.join(folderPath, lblFolderName)
        lF = sorted(os.listdir(imgFolderPath))
        # Valid file: Is a file, has a valid image extension, and has a corresponding TXT file
        lFiles = [fileName for fileName in lF if (os.path.isfile(os.path.join(imgFolderPath, fileName)) 
                                                  and (os.path.splitext(fileName)[1] in lImgExt)
                                                  and (os.path.isfile(os.path.join(lblFolderPath, os.path.splitext(fileName)[0] + '.txt'))))]
        
        # Build the labels data: Class, Bounding Box
        lCls = [] #<! Class on all images
        lY   = []
        for itmName in lFiles:
            fileName, fileExt = os.path.splitext(itmName)

            with open(os.path.join(lblFolderPath, fileName + '.txt'), 'r') as hFile:

                lLines  = hFile.readlines()
                numRect = len(lLines)
                vYi = np.zeros(numRect, dtype = np.float32)
                mBi = np.zeros((numRect, 4), dtype = np.float32)
                for ii, line in enumerate(lLines):
                    lLabels = line.split(' ')
                    lLabels = [float(x) for x in lLabels]
                    vYi[ii] = lLabels[0]
                    lCls.append(int(lLabels[0])) 
                    mBi[ii] = lLabels[1:5]
            
            lY.append(np.column_stack((vYi, mBi)))
            # lY.append(np.c_[vYi, mBi])
        
        self._folderPath     = folderPath
        self._imgFolderPath  = imgFolderPath
        self._lblFolderPath  = lblFolderPath
        self._lFiles         = lFiles
        self._lY             = lY
        self._lCls           = lCls
        self._numSamples     = len(lFiles)
    
    def __len__( self: Self ) -> int:

        return self._numSamples
    
    def __getitem__( self: Self, idx: int ) -> Tuple[np.ndarray, np.ndarray]:

        oI  = Image.open(os.path.join(self._imgFolderPath, self._lFiles[idx]))
        oI  = ImageOps.exif_transpose(oI)
        mI  = np.array(oI)
        tXi = ski.util.img_as_float32(mI)
        # tXi = np.transpose(tXi, (2, 0, 1))
        mYi = self._lY[idx]

        return tXi, mYi


def PlotBox( mI: np.ndarray, vLabel: Union[int, np.ndarray], mBox: np.ndarray, *, hA: Optional[plt.Axes] = None, dLabelText: Optional[Dict[int, str]] = None ) -> plt.Axes:
    # Assumes data in YOLO Format: [x, y, w, h] (Center, Height, Width)

    if hA is None:
        dpi = 72
        numRows, numCols = mI.shape[:2]
        hF, hA = plt.subplots(figsize = (int(np.ceil(numCols / dpi) + 1), int(np.ceil(numRows / dpi) + 1)))

    hA.imshow(mI, extent = [0, 1, 1, 0]) #<! "Normalized Image"
    hA.grid(False)

    mBox = np.atleast_2d(mBox)
    vLabel = np.atleast_1d(vLabel)
    numObj = mBox.shape[0]
    for ii in range(numObj):
        if dLabelText is not None:
            labelText = dLabelText[vLabel[ii]]
        else:
            labelText = '_'
        PlotBBox(hA, vLabel[ii], mBox[ii], labelText)

    return hA

def PlotBBox( hA: plt.Axes, boxLabel: int, vBox: np.ndarray, labelText: str = '_' ) -> plt.Axes:
    # Assumes data in YOLO Format: [x, y, w, h] (Center, Height, Width)
    # Legend Text: https://stackoverflow.com/questions/24680981

    edgeColor = hA._get_lines.get_next_color()

    rectPatch = Rectangle((vBox[0] - (vBox[2] / 2), vBox[1] - (vBox[3] / 2)), vBox[2], vBox[3], linewidth = 2, edgecolor = edgeColor, facecolor = (0, 0, 0, 0), label = labelText) #<! Requires the alpha component in the face color
    hA.add_patch(rectPatch)
    hA.text(vBox[0] - (vBox[2] / 2), vBox[1] - (vBox[3] / 2), s = boxLabel, color = 'w', verticalalignment = 'bottom', bbox = {'color': edgeColor}, fontdict = {'size': 16})
    hA.plot(vBox[0], vBox[1], 'x', mew = 5, ms = 10, color = edgeColor)

    return hA

def PlotCollage( oYoloSet: YoloImageSet, numRows: int, numCols: int, dLabelText: Dict[int, str] ) -> plt.Figure:

    numSamples = len(oYoloSet)

    hF, vHa = plt.subplots(nrows = numRows, ncols = numCols, figsize = (numRows * 3, numCols * 3))
    vHa = vHa.flat
    lImgIdx = random.choices(range(numSamples), k = numRows * numCols)

    for ii, hA in enumerate(vHa):
        tI, mY = oYoloSet[lImgIdx[ii]]
        vLabel = mY[:, 0].astype(np.int32)
        mBox   = mY[:, 1:]
        hA     = PlotBox(tI, vLabel, mBox, hA = hA, dLabelText = dLabelText)
        hA.set_title(f'File #: {lImgIdx[ii]:04d}')

## Object Detection

Object Detection was one of the first tasks to be "_solved_" by Deep Learning models.  
There are 2 main approaches to Object Detection:

1. A Two Phases Model   
   The model basically tries to "segment" areas of high probability and let another model extracts the class and the bounding models.  
   In early days considered ot be a more accurate approach though slower.
2. A Single Phase Model  
   The model combines the detection and regression of the box into a single model.
   Pioneered by the SSD and YOLO Models.


### YOLO Style Detection Tensor

![](https://i.imgur.com/CE1Ef7g.png)


* <font color='brown'>(**#**)</font> There are known datasets for object detection: [COCO Dataset](https://cocodataset.org), [PASCAL VOC](http://host.robots.ox.ac.uk/pascal/VOC/).   
  They also define standards for the labeling system.  
  Training them is on the scale of days.
* <font color='brown'>(**#**)</font> [Object Detection Annotation Formats](https://albumentations.ai/docs/getting_started/bounding_boxes_augmentation).
* <font color='brown'>(**#**)</font> Review of Object Detection approaches is given by Lilian Weng: [Part 1: Gradient Vector, HOG, and SS](https://lilianweng.github.io/posts/2017-10-29-object-recognition-part-1), [Part 2: CNN, DPM and Overfeat](https://lilianweng.github.io/posts/2017-12-15-object-recognition-part-2), [Part 3: R-CNN Family](https://lilianweng.github.io/posts/2017-12-31-object-recognition-part-3), [Part 4: Fast Detection Models](https://lilianweng.github.io/posts/2018-12-27-object-recognition-part-4).
* <font color='brown'>(**#**)</font> A different approach by the SSD Architecture: [SSD object detection: Single Shot MultiBox Detector for real-time processing](https://scribe.rip/9bd8deac0e06), [Review: SSD — Single Shot Detector (Object Detection)](https://scribe.rip/851a94607d11).

In [None]:
# Parameters

# Data
datasetName = 'ShipsAerialImages'
datasetUrl  = 'https://technionmail-my.sharepoint.com/:u:/g/personal/royia_technion_ac_il/EaWzsh8MduNKt-V2DuAoipgBC0sVzyTBoOFBIxxBP3M6iA'
dLabels     = {0: 'Ship'}
dataConfig  = 'Data.yaml'

# Pre Processing

# Model

# Training
numEpoch = 100

# Data Visualization


## Generate / Load Data


In [None]:
# Verify Data is Available

dataSetPath = os.path.join(BASE_FOLDER, DATA_FOLDER_PATH, datasetName)

if not os.path.isdir(dataSetPath):
    # Download, unzip and remove ZIP file
    onedrivedownloader.download(datasetUrl, os.path.join(BASE_FOLDER, DATA_FOLDER_PATH, datasetName + '.zip'), unzip = True, clean = True)

In [None]:
# Generate Data 

oYoloTrainSet = YoloImageSet(os.path.join(dataSetPath, 'train'))
oYoloValSet   = YoloImageSet(os.path.join(dataSetPath, 'validation'))
oYoloTestSet  = YoloImageSet(os.path.join(dataSetPath, 'test'))

print(f'Number of Samples (Train)     : {len(oYoloTrainSet)}')
print(f'Number of Samples (Validation): {len(oYoloValSet)}')
print(f'Number of Samples (Test)      : {len(oYoloTestSet)}')


### Plot Data

In [None]:
# Plot the Data

hA = PlotCollage(oYoloTrainSet, 3, 3, dLabelText = dLabels)

* <font color='brown'>(**#**)</font> Some of the images are not well annotated.

## Train a YOLO Model

The most popular training framework for YOLO models is given by [Ultralytics](https://github.com/ultralytics/ultralytics).

* <font color='brown'>(**#**)</font> While the models are usually MIT licensed, using the Ultralytics framework in commercial context requires a license.

In [None]:
# YOLO Settings

print(dYoloSettings)
dYoloSettings.update({'datasets_dir': dataSetPath})
dYoloSettings.update({'clearml': False})
dYoloSettings.update({'comet': False})
dYoloSettings.update({'dvc': False})
dYoloSettings.update({'hub': False})
dYoloSettings.update({'mlflow': False})
dYoloSettings.update({'neptune': False})
dYoloSettings.update({'raytune': False})
print(dYoloSettings)

In [None]:
# Yolo Model

oModel = YOLO('yolo11n.pt', task = 'detect')
dTrainResult = oModel.train(data = os.path.join(dataSetPath, dataConfig), epochs = numEpoch)

In [None]:
# Analysis

# Get training path
trainPath = str(dTrainResult.save_dir)

# Save the output
with open(os.path.join(trainPath, 'dTrainResult.pkl'), 'wb') as hFile:
    pickle.dump(dTrainResult, hFile)

# DataFrame of the training process
dfResults = pd.read_csv(os.path.join(trainPath, 'results.csv'))
dfResults




* <font color='brown'>(**#**)</font> The [Distributed Focal Loss](https://arxiv.org/abs/2006.04388) (DFL) is an adaptation of the Focal Loss to better deal with imbalanced data as in most Object Detection tasks.  
See [Review - Generalized Focal Loss: Learning Qualified and Distributed Bounding Boxes for Dense Object Detection](https://patrick-llgc.github.io/Learning-Deep-Learning/paper_notes/gfocal.html), [StackOverflow - The DFL Loss in YOLO](https://stackoverflow.com/questions/75950283), [Distribution Focal Loss for Dense Object Detection](https://www.linkedin.com/pulse/distribution-focal-loss-dense-object-detection-dsaisolutions-wek4f).

In [None]:
lResults = oModel.predict(os.path.join(dataSetPath, 'test', 'images', 'd0011_png.rf.5f4c5ec53b76258a7bc2af23efb4a908.jpg'), 
               visualize = True, show = False, save = False)

In [None]:
for ii, oRes in enumerate(lResults):
    tIBgr = oRes.plot(show = False)  #<! BGR numpy array
    tI    = Image.fromarray(tIBgr[..., ::-1])  #<! RGB PIL image
    oRes.save(filename = os.path.join(trainPath, f'TestResult{ii:04}.png'))