In [None]:
%%capture
%%bash

#wget https://github.com/nyp-sit/iti121-2025s2/raw/refs/heads/main/L6/data/goldfish_v1.zip
wget https://github.com/EN-303/CapyFlower/raw/refs/heads/main/assignment2.zip

#mkdir -p datasets/assignment1/
unzip assignment2.zip -d datasets/

In [None]:
%%capture
!pip install ultralytics

## Training the Model

YOLOv11 comes with different sizes of pretrained models: yolo11n, yolo11s, .... They differs in terms of their sizes, inference speeds and mean average precision:

<img src="https://github.com/nyp-sit/iti121-2025s2/blob/main/L6/assets/yolo11-models.png?raw=true" width="70%"/>


We will use the small pretrained model yolo11s and finetune it on our custom dataset.


### Setup the logging

Ultralytics support logging to `wandb`, `comet.ml` and `tensorboard` and `mlflow` out of the box. Here we only enable wandb.

You need to create an account at [`wandb`](https://wandb.ai) and get the API key from https://wandb.ai/authorize.

*For mlflow users, you can refer to Ultralytics's mlflow integration here: https://docs.ultralytics.com/integrations/mlflow/*


In [None]:
from ultralytics import settings

settings.update({"wandb": True,
                 "clearml": False,
                 "comet": False})

### Training

For a complete listing of train settings, you can see [here](https://docs.ultralytics.com/modes/train/#train-settings).

You can also specify the type of data [augmentation](https://docs.ultralytics.com/modes/train/#augmentation-settings-and-hyperparameters)  you want as part of the train pipeline.

You can monitor your training progress at wandb (the link is given in the train output below)


In [None]:
import torch
torch.cuda.empty_cache()

In [None]:
!ls -la datasets/assignment2/train/images | wc -l

In [None]:
from numpy import flipud
from ultralytics import YOLO
from ultralytics import settings

model = YOLO("yolo11s.pt")  # Load a pre-trained YOLO model

#v2 - 1
# result = model.train(data="datasets/assignment2/data.yaml",
#                      epochs=30,
#                      save_period=1, #save at each epochs
#                      batch=64,#64 images at once
#                      device=0, #gpu 0
#                      mosaic=1.0,
#                      mixup=0.2,
#                      scale=0.7,
#                      fliplr=0.5,
#                      hsv_s=0.8,  # chg s
#                      hsv_v=0.5,  # chg v
#                      project='assignment2_v2',
#                      plots=True)

# #v2 - 2
# result = model.train(data="datasets/assignment2/data.yaml",
#                      epochs=50,
#                      save_period=1, #save at each epochs
#                      batch=64,#64 images at once
#                      device=0, #gpu 0
#                      mosaic=0.5,
#                      mixup=0.0,
#                      scale=0.5,
#                      fliplr=0.5,
#                      hsv_s=0.5,  # chg s
#                      hsv_v=0.3,  # chg v
#                      project='assignment2_v2',
#                      plots=True)

# #v2 - 3
# result = model.train(data="datasets/assignment2/data.yaml",
#                      epochs=50,
#                      save_period=1, #save at each epochs
#                      batch=64,#64 images at once
#                      device=0, #gpu 0
#                      mosaic=0.5,
#                      close_mosaic=10,
#                      scale=0.5,
#                      fliplr=0.5,
#                      hsv_h=0.5,  # chg h
#                      hsv_s=0.5,  # chg s
#                      hsv_v=0.3,  # chg v - brightness
#                      project='assignment2_v2',
#                      plots=True)

# #v2 - 4
# result = model.train(data="datasets/assignment2/data.yaml",
#                      epochs=80,
#                      save_period=1, #save at each epochs
#                      batch=16,
#                      device=0, #gpu 0
#                      mosaic=0.5,
#                      close_mosaic=10,
#                      scale=0.5,
#                      fliplr=0.5,
#                      hsv_s=0.4,  # chg s
#                      hsv_v=0.3,  # chg v - brightness
#                      project='assignment2_v2',
#                      plots=True)

#v2 - 5
result = model.train(data="datasets/assignment2/data.yaml",
                     epochs=80,
                     save_period=1, #save at each epochs
                     batch=64,
                     device=0, #gpu 0
                     mosaic=0.5,
                     close_mosaic=10,
                     scale=0.5,
                     fliplr=0.5,
                     hsv_s=0.4,  # chg s
                     hsv_v=0.3,  # chg v - brightness
                     project='assignment2_v2',
                     plots=True)

We can run the best model (using the best checkpoint) against the validation dataset to see the overall model performance on validation set.  

You should see around `0.95` for `mAP50`, and `0.45` for `mAP50-95`.

In [None]:
from ultralytics import YOLO

model = YOLO("assignment2_v2/train4/weights/best.pt")
validation_results = model.val(data="datasets/assignment2/data.yaml", device="0")

In [None]:
from ultralytics import YOLO

model = YOLO("assignment2_v2/train5/weights/best.pt")
validation_results = model.val(data="datasets/assignment2/data.yaml", device="0")

## Export and Deployment

Your model is in pytorch format (.pt). You can export the model to various format, e.g. TorchScript, ONNX, OpenVINO, TensorRT, etc. depending on your use case, and deployment platform (e.g. CPU or GPU, etc)

You can see the list of [supported formats](https://docs.ultralytics.com/modes/export/#export-formats)  and the option they support in terms of further optimization (such as imagesize, int8, half-precision, etc) in the ultralytics site.

Ultralytics provide a utility function to benchmark your model using different supported formats automatically. You can uncomment the code in the following code cell to see the benchmark result. If you are benchmarking for CPU only, the change the `device=0` to `device='cpu'`.  

**Beware: it will take quite a while to complete the benchmark**

In [None]:
# from ultralytics.utils.benchmarks import benchmark

# # Benchmark on GPU (device=0 means the 1st GPU device)
# benchmark(model="goldfish_v1/train/weights/best.pt", data="datasets/data.yaml", imgsz=640, half=False, device=0)

Export Train 4 model

In [None]:
# model = YOLO("assignment2/train/weights/best.pt")
# exported_path = model.export(format="openvino", int8=True)

model = YOLO("assignment2_v2/train4/weights/best.pt")
exported_path = model.export(format="openvino", int8=True)

## Inference

Let's test our model on some sample pictures. You can optionally specify the confidence threshold (e.g. `conf=0.5`), and the IoU (e.g. `iou=0.6`) for the NMS. The model will only output the bounding boxes of those detection that exceeds the confidence threshould and the IoU threshold.  

In [None]:
import ultralytics
from ultralytics import YOLO
from PIL import Image

#source = 'https://raw.githubusercontent.com/nyp-sit/iti121-2025S2/refs/heads/main/L6/samples/goldfish_sample.jpg'
source = 'https://github.com/EN-303/CapyFlower/raw/refs/heads/main/sample/combine_002.png'

#model = YOLO("assignment2/train/weights/best_int8_openvino_model", task='detect')
model = YOLO("assignment2_v2/train4/weights/best.pt", task='detect')

result = model(source, conf=0.5, iou=0.6)

# Visualize the results
for i, r in enumerate(result):
    print(r)
    # Plot results image
    im_bgr = r.plot()  # BGR-order numpy array
    im_rgb = Image.fromarray(im_bgr[..., ::-1])  # RGB-order PIL image

    # Show results to screen (in supported environments)
    r.show()

    # Save results to disk
    r.save(filename=f"results{i}.jpg")

## Download the Model

If you are training your model on Google Colab, you will download the exported OpenVINO model to a local PC. If you are training your model locally, then the exported model should already be on your local PC.

Run the following code to zip up the OpenVINO folder and download to local PC.

*Note: If you encountered error message "NotImplementedError: A UTF-8 locale is required. Got ANSI_X3.4-1968", uncomment the following cell and run it.*


In [None]:
# import locale
# locale.getpreferredencoding = lambda: "UTF-8"

In [None]:
%%bash
mv ./assignment2_v2/train4/weights/best_int8_openvino_model/ .
zip -r assignment2_train4_openvino_model.zip best_int8_openvino_model

# Now go to best_openvino_model to download the best_openvino_model.zip file

## Streaming

We can also do real-time detection on a video or camera steram.

The code below uses openCV library to display video in a window, and can only be run locally on a local laptop.




### Video File

You need `OpenCV` to run the following code.  In your conda environment, install `opencv` for python using the following command:

```
pip3 install opencv-python
```
or
```
conda install opencv
```

Let's donwload the sample video file.

In [None]:
# !wget https://github.com/EN-303/CapyFlower/raw/refs/heads/main/sample/capyflower.mp4

### Streaming and display video

In [None]:
from ultralytics import YOLO
import cv2
from google.colab.patches import cv2_imshow

model = YOLO("assignment2_v2/train4/weights/best.pt", task='detect')

video_path = "capyflower.mp4"  # Local video file
cap = cv2.VideoCapture(video_path)

while cap.isOpened():
    success, frame = cap.read()
    if not success:
        break

    results = model(frame, device="cpu")
    annotated_frame = results[0].plot()

    # Display frame in Colab
    cv2_imshow(annotated_frame)

cap.release()


### Detect and write to a video file

In [None]:
from ultralytics import YOLO
import cv2
# from tqdm import tqdm
from tqdm.auto import tqdm

def write_video(video_in_filepath, video_out_filepath, model):
    # Open the video file

    video_reader = cv2.VideoCapture(video_in_filepath)

    nb_frames = int(video_reader.get(cv2.CAP_PROP_FRAME_COUNT))
    frame_h = int(video_reader.get(cv2.CAP_PROP_FRAME_HEIGHT))
    frame_w = int(video_reader.get(cv2.CAP_PROP_FRAME_WIDTH))
    fps = video_reader.get(cv2.CAP_PROP_FPS)

    video_writer = cv2.VideoWriter(video_out_filepath,
                            cv2.VideoWriter_fourcc(*'mp4v'),
                            fps,
                            (frame_w, frame_h))

    # Loop through the video frames
    for i in tqdm(range(nb_frames)):
        # Read a frame from the video
        success, frame = video_reader.read()

        if success:
            # Run YOLO inference on the frame on GPU Device 0
            results = model(frame, conf=0.6, device=0)

            # Visualize the results on the frame
            annotated_frame = results[0].plot()

            # Write the annotated frame
            video_writer.write(annotated_frame)

    video_reader.release()
    video_writer.release()
    cv2.destroyAllWindows()
    cv2.waitKey(1)


In [None]:
from pathlib import Path
import os

# video_in_file = "goldfish_480p_10s.mp4"
video_in_file = "capyflower.mp4"

basename = Path(video_in_file).stem
video_out_file = os.path.join(basename + '_train4_detected' + '.mp4')

# model = YOLO("best_int8_openvino_model", task="detect")
model = YOLO("assignment2_v2/train4/weights/best.pt", task='detect')

write_video(video_in_file, video_out_file, model)