### Main code

Run the code for the full run

First running a gridsearch and later using these parameters in the model, this model only runs on the original data to later check whether transfer training indeed had a positive effect on model performance or not.

In [None]:
# Normal training (without transfer learning)
import utils_convertion
import json
import utils_models
import utils_support

import random
import numpy as np
import torch
seed = 42
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)

# Prepare the data (create coco and yolo formats)
utils_convertion.process_all_subdirs('datasets/scarecrow_dataset')

# # Create yaml file for data configs - If these functions don't work you will have to manually adjust the path in the yaml files
utils_support.generate_yaml('scarecrow_dataset', 'scarecrow.yaml')

# MURDER ALL THE CACHES (can lead to corrupted images otherwise)
utils_support.delete_cache()

# Initialize yolo model
trainer = utils_models.YOLOModel(model_path='models/pretrained/yolo11n.pt', device='cuda')

# Step 1: run the grid search 
trainer.grid_search(
    data_yaml='scarecrow.yaml',
    epochs=25,  # Keep small for speed; increase for actual tuning
    seed=seed,
    result_file='grid_results.json'
)

# Step 2: load best combination from gridsearch
with open('grid_results.json', 'r') as f:
    results = json.load(f)

best_result = max(results, key=lambda r: r['mAP_50'])
best_params = best_result['params']
print(best_params)


In [None]:
# Transfer training
import utils_convertion
import json
import utils_support

import random
import numpy as np
import torch
seed = 42
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)

# Create coco and yolo formats
utils_convertion.process_all_subdirs('datasets/bird-detection-farm')
utils_convertion.process_all_subdirs('datasets/scarecrow_dataset')

# # Create yaml file for data configs - If these functions don't work you will have to manually adjust the path in the yaml files
utils_support.generate_yaml('bird-detection-farm', 'bird.yaml')
utils_support.generate_yaml('scarecrow_dataset', 'scarecrow.yaml')

# MURDER ALL THE CACHES (can lead to corrupted images otherwise)
utils_support.delete_cache()

# Initialize yolo model
from utils_models import YOLOModel

trainer = YOLOModel(model_path='models/pretrained/yolo11n.pt', device='cuda')

# Phase 1: Train on bird.yaml
# Step 1: grid search for phase 1
trainer.grid_search(
    data_yaml='bird.yaml',
   epochs=25,  # Quick run; increase for real search
    seed=seed,
    result_file='grid_bird_results.json'
)

with open('grid_bird_results.json', 'r') as f:
    bird_results = json.load(f)

#best_bird_params = max(bird_results, key=lambda r: r['mAP_50'])['params']

# Step 2: train on bird.yaml with best parameters and save weights
trainer.train(
    data_yaml='scarecrow.yaml',
    epochs=100, #100 is enough epochs for this small dataset
    seed=seed,
    #**best_params  # Automatically injects imgsz, batch_size, iou, conf, cls
    iou= 0.3,
    conf =0.3, 
    imgsz= 896,
    cls= 0.05,
    batch_size=20
)
trainer.save('models/phase2_scarecrow.pt')


# Phase 2: Fine-tune on scarecrow.yaml using best weights from Phase 1
# Step 1: grid search for phase 2



# Step 2: train on scarecrow.yaml with best parameters
trainer.train(
    data_yaml='bird.yaml',
    epochs=100,#100 is enough epochs for this small dataset
    seed=seed,
    iou= 0.3,
    conf =0.1, 
    imgsz= 896,
    cls= 0.05,
    batch_size=20
)
trainer.save('models/phase1_bird.pt')
# Step 3: Evaluate the model
from ultralytics import YOLO
model = YOLO('models/phase1_scarecrow.pt')# Path to your trained model
model.val(data='scarecrows.yaml', split='test')       # Run on test set
model = YOLO('models/phase2_scarecrow.pt') # Path to your trained model
model.val(data='bird.yaml', split='test')       # Run on test set



### Converters

In [None]:
from PIL import Image
import os

def validate_images(image_dir):
    # Iterate through all image files in the directory
    for image_filename in os.listdir(image_dir):
        image_path = os.path.join(image_dir, image_filename)
        try:
            # Try to open the image file
            img = Image.open(image_path)
            img.verify()  # Verify that it is a valid image
            print(f"Valid image: {image_path}")
        except (IOError, SyntaxError) as e:
            print(f"Corrupt image: {image_path} - Error: {e}")

# Example usage:
image_dir = 'datasets/scarecrow_dataset/train/images'
validate_images(image_dir)

### Model loops

In [None]:
import utils_models
import utils_support

# Create yaml
#support_utils.generate_yaml(folder_name='bird-detection-farm',yaml_filename='birdfarm.yaml')

trainer = utils_models.YOLOModel()

trainer.train(data_yaml='data.yaml',epochs = 10, imgsz=640, batch_size=32, debug_mode=True)

In [None]:
import utils_support

utils_support.visualize_yolo_annotations('datasets/scarecrow_dataset/test/images/test_original_2.png', 'datasets/scarecrow_dataset/test/labels/test_original_2.txt', class_names=['Bird'])

In [None]:
import utils_models

trainer = utils_models.YOLOModel()

trainer.predict('datasets/scarecrow_dataset/test/images/test_original_2.png')

In [None]:
import utils_support

utils_support.generate_yaml(yaml_filename='test.yaml')

In [None]:
import utils_support

utils_support.delete_cache()