# Initialization

## Show GPU + CPU

In [None]:
!nvidia-smi -L
print("\nCPU:")
!grep "model name" /proc/cpuinfo


## YOLOv8 setup

In [None]:
%pip install -q ultralytics

import ultralytics
ultralytics.checks()

## Upload dataset from Roboflow

If you are not sure how to export your annotated dataset, check the [Roboflow docs](https://docs.roboflow.com/exporting-data).

> Alternatively you can upload your dataset ([YOLOv8 format](https://roboflow.com/formats/yolov8-pytorch-txt)) from [**Google Drive**](#scrollTo=RxOnnOadc5vR) or from your [**local file system**](#scrollTo=qKTCWdtkOUw7) in the next steps.

In [None]:
%pip install -q roboflow

**Copy only the last three lines of your Download Code and insert them in the next code cell:**

In [None]:
from pathlib import Path
from roboflow import Roboflow

%cd /content

### Paste your Download Code here:
rf = Roboflow(api_key="mTbnFiFXOk5y1ddXOYvv")
project = rf.workspace("mb-f9pmt").project("pass-kqgak")
version = project.version(1)
dataset = version.download("yolov8")

###

dataset_location = dataset.location

print(f"\nLocation of dataset: {dataset_location}")
print(f"\nTotal number of images: {len(list(Path(dataset_location).glob('**/*.jpg')))}")

if Path(f"{dataset_location}/train/images").exists():
  print(f"\nNumber of training images: {len(list(Path(f'{dataset_location}/train/images').glob('*.jpg')))}")
if Path(f"{dataset_location}/valid/images").exists():
  print(f"Number of validation images: {len(list(Path(f'{dataset_location}/valid/images').glob('*.jpg')))}")
if Path(f"{dataset_location}/test/images").exists():
  print(f"Number of test images: {len(list(Path(f'{dataset_location}/test/images').glob('*.jpg')))}")
print("\nContent of data.yaml file:")
%cat {dataset_location}/data.yaml

## Recommended: Connect to Google Drive

In [None]:
from google.colab import drive
drive.mount("/content/drive")

In [None]:
#@title ## Upload dataset from Google Drive {display-mode: "form"}

#@markdown ### Google Drive path to your (zipped) dataset folder:
dataset_path = "/content/drive/MyDrive/yolov8_dataset.zip" #@param {type: "string"}
#@markdown - Please make sure to compress your dataset folder to **.zip** file for much faster upload speed!
#@markdown - Dataset has to be in [YOLOv8 format](https://roboflow.com/formats/yolov8-pytorch-txt).

from pathlib import Path

dataset_location = f"/content/{Path(dataset_path).stem}"

print("Uploading dataset from Google Drive...\n")
!rsync -ah --info=progress2 --no-i-r {dataset_path} /content
if Path(dataset_path).suffix == ".zip":
  import zipfile
  zip_path = f"/content/{Path(dataset_path).stem}.zip"
  if len(list(zipfile.Path(zip_path).iterdir())) > 1:
    !unzip -uq {zip_path} -d {dataset_location}
  else:
    !unzip -uq {zip_path} -d /content
  %rm {zip_path}
print("\nDataset was successfully uploaded!")

print(f"\nLocation of dataset: {dataset_location}")
print(f"\nTotal number of images: {len(list(Path(dataset_location).glob('**/*.jpg')))}")

if Path(f"{dataset_location}/train/images").exists():
  print(f"\nNumber of training images: {len(list(Path(f'{dataset_location}/train/images').glob('*.jpg')))}")
if Path(f"{dataset_location}/valid/images").exists():
  print(f"Number of validation images: {len(list(Path(f'{dataset_location}/valid/images').glob('*.jpg')))}")
if Path(f"{dataset_location}/test/images").exists():
  print(f"Number of test images: {len(list(Path(f'{dataset_location}/test/images').glob('*.jpg')))}")
print("\nContent of data.yaml file:")
%cat {dataset_location}/data.yaml

In [None]:
#@title ## Upload dataset from your local file system {display-mode: "form"}

#@markdown ### Name of your zipped dataset folder:
dataset_name = "yolov8_dataset" #@param {type: "string"}
#@markdown - Please make sure to compress your dataset folder to **.zip** file before uploading!
#@markdown - The name of the .zip file should be the same as for the dataset folder.
#@markdown - Dataset has to be in [YOLOv8 format](https://roboflow.com/formats/yolov8-pytorch-txt).

from pathlib import Path
import zipfile
from google.colab import files

dataset_location = f"/content/{dataset_name}"

uploaded = files.upload()

if len(list(zipfile.Path(f"{dataset_name}.zip").iterdir())) > 1:
  !unzip -uq {dataset_name}.zip -d {dataset_location}
else:
  !unzip -uq {dataset_name}.zip -d /content
%rm {dataset_name}.zip

print(f"\nLocation of dataset: {dataset_location}")
print(f"\nTotal number of images: {len(list(Path(dataset_location).glob('**/*.jpg')))}")

if Path(f"{dataset_location}/train/images").exists():
  print(f"\nNumber of training images: {len(list(Path(f'{dataset_location}/train/images').glob('*.jpg')))}")
if Path(f"{dataset_location}/valid/images").exists():
  print(f"Number of validation images: {len(list(Path(f'{dataset_location}/valid/images').glob('*.jpg')))}")
if Path(f"{dataset_location}/test/images").exists():
  print(f"Number of test images: {len(list(Path(f'{dataset_location}/test/images').glob('*.jpg')))}")
print("\nContent of data.yaml file:")
%cat {dataset_location}/data.yaml

## Edit `data.yaml`

Check the `data.yaml` file in your dataset folder to make sure the paths to the train, valid and test folders are correct.

- Open your dataset folder in the File Explorer (Folder symbol on the left side bar).
- Double-click on the `data.yaml` file, it will open in the editor to the right.

  Make sure that the paths to the train, valid and test folders are as follows:

  ``` yaml
  train: train/images
  val: valid/images
  test: test/images
  ```

- Save your changes with **Ctrl + S** and close the editor.

# Model training

In [None]:
#@title ## Optional: Select external logger {display-mode: "form"}

logger = "Weights&Biases" #@param ["Weights&Biases", "Comet", "ClearML"]

#@markdown > More info: [YOLOv8 logging](https://docs.ultralytics.com/modes/train/#logging)

if logger == "Weights&Biases":
  %pip install -q wandb
  import wandb
  wandb.login()
elif logger == "Comet":
  %pip install -q comet_ml
  import comet_ml
  comet_ml.init()
elif logger == "ClearML":
  %pip install -q clearml
  import clearml
  clearml.browser_login()

## Train YOLOv8 detection model

- `name` name of the training run
- `imgsz` input image size (recommended: same size as for inference)
- `batch` specify batch size (recommended: 32)
- `epochs` set the number of training [epochs](https://machine-learning.paperspace.com/wiki/epoch) (recommended: 100-300+)
- `data` path to `data.yaml` file
- `model` specify the [pretrained model weights](https://github.com/ultralytics/ultralytics#models)
> `model=yolov8n.pt` YOLOv8n model (recommended)  
  `model=yolov8s.pt` YOLOv8s model
- `cache` cache images in RAM for faster training
- `patience` epochs to wait for no observable improvement for early stopping of training (default: 50)

> More information on YOLOv8 [model training](https://docs.ultralytics.com/modes/train/) 🚀

In [None]:
training_run_name = "YOLOv8n_320_batch32_epochs200" #@param {type: "string"}
#@markdown Add UTC timestamp in front of training run name:
add_timestamp = True #@param {type:"boolean"}
#@markdown ---

image_size = 640 #@param {type: "integer"}
batch_size = 32 #@param {type:"slider", min:32, max:128, step:32}
number_epochs = 100 #@param {type:"slider", min:10, max:500, step:10}
model = "yolov8n.pt" #@param ["yolov8n.pt", "yolov8s.pt"]

if add_timestamp:
  from datetime import datetime
  utc_timestamp = datetime.now().strftime("%Y%m%d_%H-%M")
  train_run_name = f"{utc_timestamp}_{training_run_name}"
else:
  train_run_name = training_run_name

%cd /content

!yolo detect train \
name={train_run_name} \
imgsz={image_size} \
batch={batch_size} \
epochs={number_epochs} \
data=/content/pass-1/data.yaml \
model={model} \
cache=True \
#patience=0 # disable EarlyStopping (default: 50)

## Tensorboard logger

> If you are using Firefox, **disable Enhanced Tracking Protection** for this website (click on the shield to the left of the address bar) for the Tensorboard logger to work correctly!

In [None]:
%load_ext tensorboard
%tensorboard --logdir /content/runs/detect

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
#@title ## Export to Google Drive or Download training results {display-mode: "form"}

training_results = "Export_Google_Drive" #@param ["Export_Google_Drive", "Download"]
#@markdown ---

#@markdown ### Path for saving training results in Google Drive:
GDrive_save_path = "/content/drive/MyDrive/Training_results/YOLOv8" #@param {type: "string"}

if training_results == "Export_Google_Drive":
  print("Exporting training results to Google Drive...\n")
  !rsync -ah --mkpath --info=progress2 --no-i-r /content/runs/detect/{train_run_name} {GDrive_save_path}
  print("\nTraining results were successfully exported!")
elif training_results == "Download":
  from google.colab import files
  %cd /content/runs/detect
  !zip -rq {train_run_name}.zip {train_run_name}
  %cd -
  files.download(f"/content/runs/detect/{train_run_name}.zip")

# Model validation

Test the performance of your model on the validation and/or test dataset.

> Copy the validation results (cell output) and save to .txt file, as they will not be saved automatically.

In [None]:
task = "val" #@param ["val", "test"]
#@markdown > Use `task: test` to validate on the dataset test split.

val_run_name = f"{train_run_name}_validate_{task}"

%cd /content

!yolo detect val \
name={val_run_name} \
model=/content/runs/detect/{train_run_name}/weights/best.pt \
data=/content/pass-1/data.yaml \
imgsz={image_size} \
split={task}

In [None]:
#@title ## Export to Google Drive or Download validation results {display-mode: "form"}

validation_results = "Export_Google_Drive" #@param ["Export_Google_Drive", "Download"]
#@markdown ---

#@markdown ### Path for saving validation results in Google Drive:
GDrive_save_path = "/content/drive/MyDrive/Training_results/YOLOv8" #@param {type: "string"}

if validation_results == "Export_Google_Drive":
  print("Exporting validation results to Google Drive...\n")
  !rsync -ah --mkpath --info=progress2 --no-i-r /content/runs/detect/{val_run_name} {GDrive_save_path}/{train_run_name}
  print("\nValidation results were successfully exported!")
elif validation_results == "Download":
  from google.colab import files
  %cd /content/runs/detect
  !zip -rq {val_run_name}.zip {val_run_name}
  %cd -
  files.download(f"/content/runs/detect/{val_run_name}.zip")

# Model inference

Use your model to detect insects on images in the dataset test split.

In [None]:
#@markdown #### Decrease confidence threshold to detect objects with lower confidence score:
confidence_threshold = 0.5 #@param {type:"slider", min:0.1, max:1, step:0.1}
#@markdown #### Increase IoU threshold if the same object is detected multiple times:
iou_threshold = 0.5 #@param {type:"slider", min:0.1, max:1, step:0.1}

det_run_name = f"{train_run_name}_detect"

%cd /content

!yolo detect predict \
name={det_run_name} \
model=/content/runs/detect/{train_run_name}/weights/best.pt \
source=/content/pass-1/test/images \
imgsz={image_size} \
conf={confidence_threshold} \
iou={iou_threshold} \
save=True \
line_width=1 # bounding box line thickness and label size (default: 3)

In [None]:
#@title ## Export to Google Drive or Download inference results {display-mode: "form"}

inference_results = "Export_Google_Drive" #@param ["Export_Google_Drive", "Download"]
#@markdown ---

#@markdown ### Path for saving inference results in Google Drive:
GDrive_save_path = "/content/drive/MyDrive/Training_results/YOLOv8" #@param {type: "string"}

%cd /content/runs/detect
!zip -rq {det_run_name}.zip {det_run_name}
%cd -

if inference_results == "Export_Google_Drive":
  print("\nExporting inference results to Google Drive...\n")
  !rsync -ah --mkpath --info=progress2 --no-i-r /content/runs/detect/{det_run_name}.zip {GDrive_save_path}/{train_run_name}
  print("\nInference results were successfully exported!")
elif inference_results == "Download":
  from google.colab import files
  files.download(f"/content/runs/detect/{det_run_name}.zip")

## Show inference results on test images

In [None]:
from pathlib import Path
from IPython.display import Image, display

from pathlib import Path
from IPython.display import display, Image

# Path to your images
image_path = Path(f"/content/runs/detect/{det_run_name}")

# Counter to keep track of the number of images displayed
image_counter = 0
max_images = 20

# Loop through images in the directory
for img in image_path.glob("*.jpg"):
    if image_counter >= max_images:
        break
    display(Image(img))
    image_counter += 1
    print("\n")


In [None]:
from pathlib import Path
from PIL import Image as PILImage
from IPython.display import display

# Define the path to the single image
single_image_path = "/content/pass-1/valid/images/000009_jpg.rf.6dc5106a69be6483a98f38c8acfb9dda.jpg"  # Replace with the path to your image

# Update the detection run name
det_run_name = f"{train_run_name}_detect"

# Change directory to /content
%cd /content

# Run YOLO detection on a single image
!yolo detect predict \
name={det_run_name} \
model=/content/runs/detect/{train_run_name}/weights/best.pt \
source={single_image_path} \
imgsz={image_size} \
conf={confidence_threshold} \
iou={iou_threshold} \
save=True \
line_width=1

# Path to the output image after detection
output_image_path = Path(f"/content/runs/detect/20240807_07-20_YOLOv8n_320_batch32_epochs200_detect3/000009_jpg.rf.6dc5106a69be6483a98f38c8acfb9dda.jpg")  # Adjust if necessary

# Open and display the output image
result_image = PILImage.open(output_image_path)
display(result_image)
