In [None]:
import os
import shutil
from ultralytics import YOLO
import numpy as np
import cv2
import random
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import confusion_matrix
import itertools
import csv


In [None]:
# Split dataset
yolo_folder = '/home/student/Desktop/current_model_ai/obj_train_data'               
images_folder = '/home/student/Desktop/current_model_ai/ImagesTotal'                
output_folder = '/home/student/Desktop/current_model_ai'                           

# === Prepare output directories and clear existing labels ===
for subset in ['train', 'val', 'test']:
    images_path = os.path.join(output_folder, subset, 'images')
    labels_path = os.path.join(output_folder, subset, 'labels')

    os.makedirs(images_path, exist_ok=True)
    os.makedirs(labels_path, exist_ok=True)

    # Clear existing label files
    for file in os.listdir(labels_path):
        if file.endswith(".txt"):
            os.remove(os.path.join(labels_path, file))

# === Collect annotation-image pairs ===
data_pairs = []

print("Searching for matching images and labels...")
for label_file in os.listdir(yolo_folder):
    if label_file.endswith(".txt"):
        label_path = os.path.join(yolo_folder, label_file)
        
        # Match corresponding image
        image_found = False
        for ext in ['.png', '.jpg', '.jpeg']:
            img_name = label_file.replace('.txt', ext)
            src_img = os.path.join(images_folder, img_name)
            if os.path.exists(src_img):
                data_pairs.append((src_img, label_path))
                print(f"Found: {img_name} with {label_file}")
                image_found = True
                break
        if not image_found:
            print(f"No matching image for label: {label_file}")

print(f"Total pairs found: {len(data_pairs)}")

# === Shuffle and split data ===
random.shuffle(data_pairs)
num_images = len(data_pairs)
train_split = int(0.7 * num_images)
val_split = int(0.9 * num_images)

train_data = data_pairs[:train_split]
val_data = data_pairs[train_split:val_split]
test_data = data_pairs[val_split:]

# === Save data ===
def save_data(data, subset):
    print(f"Saving {subset} data...")
    saved_count = 0
    for img_path, label_path in data:
        img_name = os.path.basename(img_path)
        label_name = os.path.basename(label_path)

        dst_img = os.path.join(output_folder, subset, 'images', img_name)
        dst_label = os.path.join(output_folder, subset, 'labels', label_name)

        shutil.copy(img_path, dst_img)
        shutil.copy(label_path, dst_label)
        saved_count += 1

    print(f"{saved_count} images and labels saved in {subset}.")

save_data(train_data, 'train')
save_data(val_data, 'val')
save_data(test_data, 'test')

print("Data split and saved successfully.")


In [None]:
# Train model
train_folder = "/home/student/Desktop/current_model_ai/folder_train"
image_dir = '/home/student/Desktop/current_model_ai/val/images'  # Folder with .jpg images
output_fp_dir = 'false_positive_visuals'
os.makedirs(output_fp_dir, exist_ok=True)
# Remove the folder if it exists
if os.path.exists(train_folder):
    shutil.rmtree(train_folder)

# Load the model
model = YOLO('yolo11n.pt')

# === Training ===
model.train(
    data='/home/student/Desktop/current_model_ai/data.yaml',
    epochs=30,
    imgsz=640,
    batch=16,
    project='/home/student/Desktop/current_model_ai/folder_train',
    name='folds_retrain',
    shear=2.5,
    scale=0.1,
    translate=0.0,
    degrees=0.0,
    conf=0.3,
    augment=True
)

In [None]:
# Predict on dataset
predict_folder = "/home/student/Desktop/current_model_ai/PredictedImages"
test_images = "/home/student/Desktop/current_model_ai/test/images"
gt_labels = "/home/student/Desktop/current_model_ai/test/labels"

# === Clean old predictions ===
if os.path.exists(predict_folder):
    shutil.rmtree(predict_folder)

# === Load trained model ===
model = YOLO('/home/student/Desktop/current_model_ai/folder_train/folds_retrain/weights/best.pt')

# === Run predictions ===
results = model.predict(
    source=test_images,
    conf=0.4,
    iou=0.4,
    save=True,
    save_txt=True,
    save_conf=True,
    project='/home/student/Desktop/current_model_ai',
    name='PredictedImages'
)

# === Paths after prediction ===
pred_labels = os.path.join(predict_folder, 'labels')

# === Evaluate confusion matrix ===
y_true = []
y_pred = []
background_images = []

print("\nEvaluating confusion matrix:")

for filename in os.listdir(test_images):
    if not filename.lower().endswith(('.jpg', '.jpeg', '.png')):
        continue

    name = os.path.splitext(filename)[0]
    gt_label_file = os.path.join(gt_labels, name + '.txt')
    pred_label_file = os.path.join(pred_labels, name + '.txt')

    gt_has_label = os.path.exists(gt_label_file) and os.path.getsize(gt_label_file) > 0
    pred_has_label = os.path.exists(pred_label_file) and os.path.getsize(pred_label_file) > 0

    if not pred_has_label:
        background_images.append(filename)

    # Labels: 1 = V (fold), 0 = background
    y_true.append(1 if gt_has_label else 0)
    y_pred.append(1 if pred_has_label else 0)

# === Generate confusion matrix ===
cm = confusion_matrix(y_true, y_pred, labels=[1, 0])
labels = ['V', 'background']

# === Print background images ===
print("\nImages predicted as background (no detections):")
for bg_img in background_images:
    print(" -", bg_img)

print(f"\nTotal background predictions: {len(background_images)}")

# === Compute percentages ===
cm_percent = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis] * 100
annot = np.empty_like(cm).astype(str)

for i in range(cm.shape[0]):
    for j in range(cm.shape[1]):
        count = cm[i, j]
        perc = cm_percent[i, j]
        annot[i, j] = f"{count}\n({perc:.1f}%)"

# === Plot confusion matrix ===
plt.figure(figsize=(6, 5))
sns.heatmap(cm, annot=annot, fmt='', cmap='Blues', xticklabels=labels, yticklabels=labels)
plt.xlabel("True Label")
plt.ylabel("Predicted Label")
plt.title("Confusion Matrix with Percentages")
plt.tight_layout()

# === Save confusion matrix ===
output_path = os.path.join(predict_folder, 'confusion_matrix_percent.png')
plt.savefig(output_path)
plt.show()

print(f"\nConfusion matrix with percentages saved to: {output_path}")

In [None]:
# Model selection
data_yaml = '/home/student/Desktop/current_model_ai/data.yaml'
project_root = '/home/student/Desktop/current_model_ai/training_results_v2'
csv_path = '/home/student/Desktop/current_model_ai/training_results_v2/model_selection_metrics.csv'

# Clean project folder
if os.path.exists(project_root):
    shutil.rmtree(project_root)

os.makedirs(project_root, exist_ok=True)

# Phase 1: Model selection
models = ['yolo11n.pt', 'yolo11s.pt', 'yolo11m.pt', 'yolo11l.pt', 'yolo11x.pt']
model_metrics = {}

for model_name in models:
    name = os.path.splitext(model_name)[0] + '_baseline'
    print(f"Training {model_name}...")
    
    # Train model
    model = YOLO(model_name)
    model.train(data=data_yaml, epochs=30, imgsz=640, batch=8,
                name=name, project=project_root, patience=10, verbose=False)

    # Evaluate
    results = model.val(data=data_yaml)

    # Extract metrics
    metrics = {
        'Box(P)': float(results.box.p),
        'Recall': float(results.box.r),
        'mAP50': float(results.box.map50),
        'mAP50-95': float(results.box.map)
    }
    model_metrics[model_name] = metrics

# Write metrics to CSV

with open(csv_path, mode='w', newline='') as file:
    writer = csv.writer(file)
    # Header
    writer.writerow(['Model', 'Box(P)', 'Recall', 'mAP50', 'mAP50-95'])
    # Data rows
    for model_name, metrics in model_metrics.items():
        writer.writerow([model_name] + list(metrics.values()))

# Identify best model
best_model = max(model_metrics, key=lambda m: model_metrics[m]['mAP50'])


NameError: name 'os' is not defined

In [None]:
# Epoch + Batch optimization
project_root = '/home/student/Desktop/current_model_ai/training_results'
data_yaml = '/home/student/Desktop/current_model_ai/data.yaml'
best_model = 'yolo11n.pt'

# Phase 2: Epoch + Batch optimization
epoch_values = [30, 50, 70, 100, 150]
batch_values = [4, 8, 16, 32]
epoch_batch_scores = []
csv_path = '/home/student/Desktop/current_model_ai/training_results_v2/epoch_batch_selection_metrics.csv'

# Create CSV and write header
with open(csv_path, mode='w', newline='') as csvfile:
    writer = csv.writer(csvfile)
    writer.writerow(['Run Name', 'Epochs', 'Batch Size', 'Box(P)', 'Recall', 'mAP@0.5', 'mAP@0.5:0.95'])

    for epochs, batch in itertools.product(epoch_values, batch_values):
        run_name = f'{os.path.splitext(best_model)[0]}_e{epochs}_b{batch}'
        run_dir = os.path.join(project_root, run_name)
        os.makedirs(run_dir, exist_ok=True)  # <-- Add this line
        print(f"Training {run_name}...")

        model = YOLO(best_model)
        model.train(
            data=data_yaml,
            epochs=epochs,
            imgsz=640,
            batch=batch,
            name=run_name,
            project=project_root,
            patience=10,
            verbose=False
        )

        results = model.val(data=data_yaml)

        metrics = {
            'Box(P)': float(results.box.p),
            'Recall': float(results.box.r),
            'mAP50': float(results.box.map50),
            'mAP50-95': float(results.box.map)
        }

        # Save for best config selection
        epoch_batch_scores.append((epochs, batch, metrics['mAP50']))

        # Write to CSV
        writer.writerow([
            run_name, epochs, batch,
            metrics['Box(P)'], metrics['Recall'],
            metrics['mAP50'], metrics['mAP50-95']
        ])


# Pick best configuration
best_epochs, best_batch, _ = max(epoch_batch_scores, key=lambda x: x[2])


⚙️ Phase 2: Epoch + Batch optimization



FileNotFoundError: [Errno 2] No such file or directory: '/home/student/Desktop/current_model_ai/training_results_v2/epoch_batch_selection_metrics.csv'

In [None]:
# Grid optimization for augmentations
project_root = '/home/student/Desktop/current_model_ai/training_results'
data_yaml = '/home/student/Desktop/current_model_ai/data.yaml'
best_model = 'yolo11n.pt'

# Phase 3: Grid Search on augmentations
shear_vals = [0.0, 2.5, 5.0, 7.5]
scale_vals = [0.0, 0.03, 0.06, 0.1]
translate_vals = [0.0, 0.03, 0.06, 0.1]
degrees_vals = [0.0, 5.0, 10.0, 15.0]
param_grid = list(itertools.product(shear_vals, scale_vals, translate_vals, degrees_vals))

# Find index to start from: INCLUDE this config
start_config = (2.5, 0.06, 0.06, 15.0)
start_index = next(i for i, params in enumerate(param_grid) if params == start_config)

# Slice param_grid from start_index onward
param_grid = param_grid[start_index:]
grid_results = []

for i, (shear, scale, translate, degrees) in enumerate(param_grid, start=start_index):
    run_name = f'grid_shear{shear}_scale{scale}_trans{translate}_deg{degrees}'
    print(f"Training {run_name} ({i+1}/{len(shear_vals)*len(scale_vals)*len(translate_vals)*len(degrees_vals)})...")

    model = YOLO(best_model)
    model.train(
        data=data_yaml,
        epochs=30,
        batch=8,
        imgsz=640,
        name=run_name,
        project=project_root,
        shear=shear,
        scale=scale,
        translate=translate,
        degrees=degrees,
        augment=True,
        patience=10,
        verbose=False
    )

    results = model.val(data=data_yaml)

    metrics = {
        'Box(P)': float(results.box.p),
        'Recall': float(results.box.r),
        'mAP50': float(results.box.map50),
        'mAP50-95': float(results.box.map)
    }

    grid_results.append({
        'Run': run_name,
        'Shear': shear,
        'Scale': scale,
        'Translate': translate,
        'Degrees': degrees,
        **metrics
    })

# Append results to CSV
csv_path = '/home/student/Desktop/current_model_ai/training_results_v2/augmentation_results.csv'
write_header = not os.path.exists(csv_path)
with open(csv_path, mode='a', newline='') as file:
    writer = csv.DictWriter(file, fieldnames=[
        'Run', 'Shear', 'Scale', 'Translate', 'Degrees',
        'Box(P)', 'Recall', 'mAP50', 'mAP50-95'
    ])
    if write_header:
        writer.writeheader()
    writer.writerows(grid_results)
