In [None]:
#!/usr/bin/env python3


<br>
Farm Tech Solutions - Computer Vision Project<br>
This script implements a YOLO-based object detection system for agricultural applications.<br>
It handles the training, validation, and testing of a YOLO model on the prepared dataset.<br>
Author: Gabriel Ribeiro (RM560173)<br>
Date: 2025-04-19<br>


%%

In [None]:
import os
import sys
import argparse
import yaml
import logging
import shutil
import torch
import matplotlib.pyplot as plt
import numpy as np
from pathlib import Path
from datetime import datetime

Configure logging

In [None]:
logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s - %(levelname)s - %(message)s",
    handlers=[
        logging.StreamHandler(sys.stdout),
        logging.FileHandler("yolo_training.log", mode="w"),
    ],
)

%%<br>
Define default parameters similar to argparse in the original script

In [None]:
epochs = 30
batch_size = 16
img_size = 640
weights = "yolov5s.pt"
data_path = "./data"
save_dir = "./results"
compare = False  # Whether to compare models with different epoch settings

In [None]:
print("Configuration:")
print(f"- Epochs: {epochs}")
print(f"- Batch Size: {batch_size}")
print(f"- Image Size: {img_size}")
print(f"- Weights: {weights}")
print(f"- Data Path: {data_path}")
print(f"- Save Directory: {save_dir}")

%%

In [None]:
def setup_yolov5():
    """Clone the YOLOv5 repository if it doesn't exist already."""
    yolov5_path = Path("yolov5")
    if not yolov5_path.exists():
        logging.info("Cloning YOLOv5 repository...")
        os.system("git clone https://github.com/ultralytics/yolov5.git")
        os.system("pip install -r yolov5/requirements.txt")
        logging.info("YOLOv5 repository cloned and dependencies installed.")
    else:
        logging.info("YOLOv5 repository already exists.")
    return str(yolov5_path)

Run the setup

In [None]:
yolov5_path = setup_yolov5()

%%

In [None]:
def generate_dataset_config(data_path):
    """Generate the YAML configuration file for the dataset."""
    # Get the project root directory (parent directory of the data_path)
    project_root = os.path.abspath(
        os.path.dirname(os.path.dirname(os.path.abspath("__file__")))
    )

    # Convert relative data_path to absolute if needed
    if not os.path.isabs(data_path):
        abs_data_path = os.path.normpath(os.path.join(project_root, data_path))
    else:
        abs_data_path = os.path.normpath(data_path)

    # Ensure the path exists
    if not os.path.exists(abs_data_path):
        logging.error(f"Data path {abs_data_path} does not exist.")
        raise FileNotFoundError(f"Data path {abs_data_path} does not exist.")

    # Define absolute paths for train, val, and test directories
    train_path = os.path.normpath(os.path.join(abs_data_path, "train", "images"))
    val_path = os.path.normpath(os.path.join(abs_data_path, "val", "images"))
    test_path = os.path.normpath(os.path.join(abs_data_path, "test", "images"))

    # Verify that all required directories exist
    for path, name in [
        (train_path, "train"),
        (val_path, "validation"),
        (test_path, "test"),
    ]:
        if not os.path.exists(path):
            logging.warning(
                f"{name.capitalize()} images directory {path} does not exist."
            )
    dataset_config = {
        "path": abs_data_path,
        "train": train_path,
        "val": val_path,
        "test": test_path,
        "nc": 2,  # Number of classes
        "names": ["A_Cat", "B_Dog"],  # Class names
    }
    config_path = os.path.join(abs_data_path, "dataset.yaml")
    with open(config_path, "w") as f:
        yaml.dump(dataset_config, f, default_flow_style=False)
    logging.info(f"Dataset configuration generated at {config_path}")
    logging.info(
        f"Using the following paths:"
        f"\n  - Data path: {abs_data_path}"
        f"\n  - Train images: {train_path}"
        f"\n  - Validation images: {val_path}"
        f"\n  - Test images: {test_path}"
    )
    return str(config_path)

Generate dataset configuration

In [None]:
try:
    config_path = generate_dataset_config(data_path)
    print("\nDataset Configuration:")
    # Display the content of the generated YAML file if it exists
    if os.path.exists(config_path):
        with open(config_path, "r") as file:
            print(file.read())
    else:
        print(f"Config file {config_path} does not exist.")
except Exception as e:
    print(f"Error generating dataset config: {e}")
    # Create a dummy config path for demo purposes
    config_path = os.path.join(data_path, "dataset.yaml")

%%

In [None]:
def train_model(
    yolov5_path, config_path, epochs, batch_size, img_size, weights, save_dir
):
    """Train the YOLO model with the specified parameters."""
    cwd = os.getcwd()

    # Convert paths to absolute before changing directory
    abs_config_path = os.path.abspath(config_path)
    abs_save_dir = os.path.abspath(save_dir)
    abs_weights = weights if os.path.isabs(weights) else os.path.join(cwd, weights)

    # Change to YOLOv5 directory
    os.chdir(yolov5_path)
    run_name = (
        f"train_e{epochs}_bs{batch_size}_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
    )
    train_cmd = f"python train.py --img {img_size} --batch {batch_size} --epochs {epochs} \
               --data {abs_config_path} --weights {abs_weights} --project {abs_save_dir} --name {run_name}"
    logging.info(
        f"Starting training with {epochs} epochs and batch size {batch_size}..."
    )
    logging.info(f"Running: {train_cmd}")

    # In Jupyter, we'll use !command to run shell commands
    print(f"Running training command: {train_cmd}")
    # Uncomment to actually run the training (takes time)
    # !{train_cmd}

    # Return to original directory
    os.chdir(cwd)
    results_path = os.path.join(abs_save_dir, run_name)
    best_weights = os.path.join(results_path, "weights/best.pt")
    logging.info(f"Training completed. Results saved to {results_path}")
    return {
        "run_name": run_name,
        "results_path": results_path,
        "best_weights": best_weights,
    }

Create save directory

In [None]:
os.makedirs(save_dir, exist_ok=True)

Run the training function

In [None]:
train_results = train_model(
    yolov5_path, config_path, epochs, batch_size, img_size, weights, save_dir
)

In [None]:
print("\nTraining Results:")
print(f"- Run name: {train_results['run_name']}")
print(f"- Results path: {train_results['results_path']}")
print(f"- Best weights: {train_results['best_weights']}")

%%

In [None]:
def validate_model(
    yolov5_path, config_path, best_weights, img_size, batch_size, save_dir
):
    """Validate the trained model on the validation set."""
    cwd = os.getcwd()

    # Convert paths to absolute before changing directory
    abs_config_path = os.path.abspath(config_path)
    abs_best_weights = os.path.abspath(best_weights)
    abs_save_dir = os.path.abspath(save_dir)

    # Change to YOLOv5 directory
    os.chdir(yolov5_path)
    run_name = (
        f"val_{Path(best_weights).stem}_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
    )
    val_cmd = f"python val.py --img {img_size} --batch {batch_size} \
             --data {abs_config_path} --weights {abs_best_weights} --project {abs_save_dir} --name {run_name} --task val"
    logging.info(f"Starting validation with weights {best_weights}...")
    logging.info(f"Running: {val_cmd}")
    print(f"Running validation command: {val_cmd}")
    # Uncomment to actually run the validation (takes time)
    # !{val_cmd}

    # Return to original directory
    os.chdir(cwd)
    results_path = os.path.join(abs_save_dir, run_name)
    logging.info(f"Validation completed. Results saved to {results_path}")
    return {"run_name": run_name, "results_path": results_path}

Run the validation function

In [None]:
val_results = validate_model(
    yolov5_path,
    config_path,
    train_results["best_weights"],
    img_size,
    batch_size,
    save_dir,
)

In [None]:
print("\nValidation Results:")
print(f"- Run name: {val_results['run_name']}")
print(f"- Results path: {val_results['results_path']}")

%%

In [None]:
def test_model(yolov5_path, config_path, best_weights, img_size, batch_size, save_dir):
    """Test the trained model on the test set."""
    cwd = os.getcwd()

    # Convert paths to absolute before changing directory
    abs_config_path = os.path.abspath(config_path)
    abs_best_weights = os.path.abspath(best_weights)
    abs_save_dir = os.path.abspath(save_dir)

    # Path to test images relative to YOLOv5 directory
    abs_test_images = os.path.join(os.path.dirname(abs_config_path), "test/images")

    # Change to YOLOv5 directory
    os.chdir(yolov5_path)
    run_name = f"test_best_{datetime.now().strftime('%Y%m%d_%H%M%S')}"

    # Use absolute path to test images instead of relative path
    test_cmd = f"python detect.py --img {img_size} --source {abs_test_images} \
              --weights {abs_best_weights} --project {abs_save_dir} --name {run_name} --save-txt --save-conf"
    logging.info(f"Starting testing with weights {best_weights}...")
    logging.info(f"Running: {test_cmd}")
    print(f"Running test command: {test_cmd}")
    # Uncomment to actually run the testing (takes time)
    # !{test_cmd}

    # Return to original directory
    os.chdir(cwd)
    results_path = os.path.join(abs_save_dir, run_name)
    logging.info(f"Testing completed. Results saved to {results_path}")
    return {
        "run_name": run_name,
        "results_path": results_path,
    }

Run the test function

In [None]:
test_results = test_model(
    yolov5_path,
    config_path,
    train_results["best_weights"],
    img_size,
    batch_size,
    save_dir,
)

In [None]:
print("\nTest Results:")
print(f"- Run name: {test_results['run_name']}")
print(f"- Results path: {test_results['results_path']}")

%%

In [None]:
def generate_report(train_results, val_results, test_results, save_dir):
    """Generate a comprehensive report of the model training, validation, and testing."""
    report_path = os.path.join(save_dir, "model_report.md")
    with open(report_path, "w") as f:
        f.write("# YOLO Model Training and Evaluation Report\n\n")
        f.write(f"**Date:** {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n\n")
        f.write("## Training Results\n\n")
        f.write(f"- Results directory: {train_results['results_path']}\n")
        f.write(f"- Best weights: {train_results['best_weights']}\n\n")
        f.write("## Validation Results\n\n")
        f.write(f"- Results directory: {val_results['results_path']}\n\n")
        f.write("## Test Results\n\n")
        f.write(f"- Results directory: {test_results['results_path']}\n\n")
        f.write("## Performance Analysis\n\n")
        f.write("### Metrics\n\n")
        f.write(
            "- Precision, Recall, and mAP scores can be found in the validation results directory.\n"
        )
        f.write("- Inference examples can be found in the test results directory.\n\n")
        f.write("### Suggestions for Improvement\n\n")
        f.write("1. More accurate image labels for better detection\n")
        f.write("2. Data augmentation techniques for better model generalization\n")
        f.write(
            "3. Testing different model architectures (YOLOv5s, YOLOv5m, YOLOv5l, etc.)\n"
        )
    print(f"Report generated at {report_path}")
    return report_path

Generate the report

In [None]:
report_path = generate_report(train_results, val_results, test_results, save_dir)

In [None]:
print("\nReport Preview:")
# Display the first few lines of the report
try:
    with open(report_path, "r") as file:
        for i, line in enumerate(file):
            if i < 15:  # Display first 15 lines
                print(line.rstrip())
except Exception as e:
    print(f"Error reading report: {e}")

%%

In [None]:
def compare_models(yolov5_path, config_path, img_size, batch_size, weights, save_dir):
    """Train and compare models with different epoch settings."""
    if not compare:  # Skip this section if compare flag is not set
        print("Skipping model comparison (not enabled)")
        return
    print("Starting model comparison with different epoch settings...")
    cwd = os.getcwd()

    # Convert paths to absolute before changing directory
    abs_config_path = os.path.abspath(config_path)
    abs_save_dir = os.path.abspath(save_dir)
    abs_weights = weights if os.path.isabs(weights) else os.path.join(cwd, weights)
    comparison_dir = os.path.join(abs_save_dir, "comparison")
    os.makedirs(comparison_dir, exist_ok=True)

    # NOTE: In an actual notebook, this would train models with different epochs
    # For brevity, we'll just print what would happen
    print("Would train with 30 epochs and 60 epochs, then compare results")
    print(f"Results would be saved to: {comparison_dir}")
    return {"comparison_dir": comparison_dir}

Run the comparison if enabled

In [None]:
if compare:
    comparison_results = compare_models(
        yolov5_path, config_path, img_size, batch_size, weights, save_dir
    )