### Main file for running the code
Please use this file to run the code. The file has two major components:
1. **Main code**: This is the main code for running the model. It includes convertion of the data to the required format, training the model, and saving the model.
2. **Tooling**: This is a secondary module that shows some of the tooling that is used in the code.

### Main code

#### Normal training

In [None]:
# Normal training (without transfer learning)
import utils_convertion
import json

import utils_models
import utils_support

import random
import numpy as np
import torch

seed = 42
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)

# Prepare the data (create coco and yolo formats)
utils_convertion.process_all_subdirs('datasets/scarecrow_dataset')

# # Create yaml file for data configs - If these functions don't work you will have to manually adjust the path in the yaml files
utils_support.generate_yaml('scarecrow_dataset', 'scarecrow.yaml')

# MURDER ALL THE CACHES (can lead to corrupted images otherwise)
utils_support.delete_cache()

# Initialize yolo model
trainer = utils_models.YOLOModel(model_path='models/pretrained/yolo11n.pt', device='cpu')

# Step 1: run the grid search 
trainer.grid_search(
    data_yaml='scarecrow.yaml',
    epochs=1,  # Keep small for speed; increase for actual tuning
    seed=seed,
    result_file='grid_results.json'
)

# Step 2: load best combination from gridsearch
with open('grid_results.json', 'r') as f:
    results = json.load(f)

best_result = max(results, key=lambda r: r['mAP_50'])
best_params = best_result['params']

# Step 3: train the final model with the best parameters
trainer.train(
    data_yaml='scarecrow.yaml',
    epochs=1, # Keep small for testing; increase for actual training
    seed=seed,
    **best_params  # Automatically injects imgsz, batch_size, iou, conf, cls
)

#### Transfer learning

In [None]:
# Transfer training
import utils_convertion
import json
import utils_support

import random
import numpy as np
import torch

seed = 42
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)

# Create coco and yolo formats
utils_convertion.process_all_subdirs('datasets/bird-detection-farm')
utils_convertion.process_all_subdirs('datasets/scarecrow_dataset')

# Create yaml file for data configs - If these functions don't work you will have to manually adjust the path in the yaml files
utils_support.generate_yaml('bird-detection-farm', 'bird.yaml')
utils_support.generate_yaml('scarecrow_dataset', 'scarecrow.yaml')

# MURDER ALL THE CACHES (can lead to corrupted images otherwise)
utils_support.delete_cache()

# Initialize yolo model
from utils_models import YOLOModel

trainer = YOLOModel(model_path='models/pretrained/yolo11n.pt', device='cpu')

# Phase 1: Train on bird.yaml
# Step 1: grid search for phase 1
trainer.grid_search(
    data_yaml='bird.yaml',
    epochs=2,  # Quick run; increase for real search
    seed=seed,
    result_file='grid_bird_results.json'
)

with open('grid_bird_results.json', 'r') as f:
    bird_results = json.load(f)

best_bird_params = max(bird_results, key=lambda r: r['mAP_50'])['params']

# Step 2: train on bird.yaml with best parameters and save weights
trainer.train(
    data_yaml='bird.yaml',
    epochs=1,
    seed=seed,
    **best_bird_params
)
trainer.save('models/phase1_bird.pt')

# Phase 2: Fine-tune on scarecrow.yaml using best weights from Phase 1
# Step 1: grid search for phase 2
trainer.load('models/phase1_bird.pt')  # Load phase 1 weights

trainer.grid_search(
    data_yaml='scarecrow.yaml',
    epochs=2,
    seed=seed,
    result_file='grid_scarecrow_results.json'
)

with open('grid_scarecrow_results.json', 'r') as f:
    scarecrow_results = json.load(f)

best_scarecrow_params = max(scarecrow_results, key=lambda r: r['mAP_50'])['params']

# Step 2: train on scarecrow.yaml with best parameters
trainer.train(
    data_yaml='scarecrow.yaml',
    epochs=1,
    seed=seed,
    **best_scarecrow_params
)
trainer.save('models/phase2_scarecrow.pt')

### Tooling
Testing if the yolo annotations are formatted correctly.
There is also tooling for conversions yaml creation. These can be found in the main code block.

In [None]:
# For testing if the yolo annotations work
import utils_support

utils_support.visualize_yolo_annotations('datasets/scarecrow_dataset/test/images/test_original_2.png', 'datasets/scarecrow_dataset/test/labels/test_original_2.txt', class_names=['Bird'])