# YOLO classification model : Test and Evaluation notebook

## Test for color detection
In this section, several codes can be used to have a comprehensive understanding on where the model is failing.

### Method for displaying the wrongly classified images from a dataset

In [None]:
import cv2
from ultralytics import YOLO
import glob
import matplotlib.pyplot as plt

# Load trained YOLO model
MODEL_PATH = "runs/classify/cls-yolo11n-first-test-512-100e/weights/best.pt"
model = YOLO(MODEL_PATH)  # Replace with your model path
names = model.names  # Get class names

# Path to images
image_paths = glob.glob("dataset_color/val/color/*.jpg")  # Adjust path as needed

# List to store black-classified images
black_images = []

# Run inference
for img_path in image_paths:
    results = model(img_path)
    
    for result in results:
        probs = result.probs
        class_index = probs.top1
        class_name = result.names[class_index]
        score = float(probs.top1conf.cpu().numpy())

    if class_name == "black":
        black_images.append(img_path)

# Display the filtered images
for img_path in black_images:
    img = cv2.imread(img_path)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)  # Convert for Matplotlib
    plt.figure()
    plt.imshow(img)
    plt.axis("off")
    plt.title(img_path)

plt.show()

### Experimental method that detects color on overlapping tiles

In [None]:
from ultralytics import YOLO
from PIL import Image
import numpy as np
import os

# Load classification model
model = YOLO("runs/classify/cls-tuned-color1/weights/best.pt")

def slice_image(img, tile_size=528, overlap=0.5):
    step = int(tile_size * (1 - overlap))
    tiles = []
    for y in range(0, img.height - tile_size + 1, step):
        for x in range(0, img.width - tile_size + 1, step):
            tile = img.crop((x, y, x + tile_size, y + tile_size))
            tiles.append(tile)
    return tiles

def classify_image(image_path, tile_size=224, overlap=0.5, prob_threshold=0.6):
    img = Image.open(image_path).convert("RGB")
    tiles = slice_image(img, tile_size, overlap)
    results = model.predict(tiles, imgsz=tile_size, verbose=False)

    for res in results:
        if res.probs.data[1] > prob_threshold:  # class 1 = 'colored'
            return "colored"
    return "bw"

# Example usage
image_path = "dataset_color/val/color/319f8eb0-IMG_0411.jpg"
result = classify_image(image_path, prob_threshold=0.6)
print(f"The image is classified as: {result}")

### GPU accelerated version of the previous function
This method plot the evolution of metrics over different thresholds.

In [None]:
import os
from pathlib import Path
from PIL import Image
from ultralytics import YOLO
import concurrent.futures
from tqdm import tqdm
from sklearn.metrics import precision_score, recall_score, f1_score
import matplotlib.pyplot as plt
import numpy as np


# --- CONFIG ---
VAL_FOLDER = "dataset_color/val/"
MODEL_PATH = "runs/classify/cls-tuned-color1/weights/best.pt"
TILE_SIZE = 512
OVERLAP = 0.5
N_WORKERS = 16
THRESHOLDS = np.linspace(0.0, 1.0, 500)  # 10 values between 0 and 1

def slice_image(img, tile_size=TILE_SIZE, overlap=OVERLAP):
    step = int(tile_size * (1 - overlap))
    tiles = []
    for y in range(0, img.height - tile_size + 1, step):
        for x in range(0, img.width - tile_size + 1, step):
            tile = img.crop((x, y, x + tile_size, y + tile_size))
            tiles.append(tile)
    return tiles

def collect_images_from_folder(val_folder):
    samples = []
    for label in ["color", "black"]:
        label_path = Path(val_folder) / label
        for img_path in label_path.glob("*.jpg"):
            samples.append((str(img_path), label))
        for img_path in label_path.glob("*.png"):
            samples.append((str(img_path), label))
    return samples

# This function will be run in each worker process
def process_single_image(args):
    image_path, label = args
    try:
        # Load the model in each process - important for true parallelism
        model = YOLO(MODEL_PATH)
        
        # Process the image
        img = Image.open(image_path).convert("RGB")
        tiles = slice_image(img)
        results = model.predict(tiles, imgsz=TILE_SIZE, verbose=False)

        probs = []
        for res in results:
            probs.append(res.probs.data[1].item())  # prob for class "color"
        
        return label, probs
    except Exception as e:
        print(f"Error processing {image_path}: {e}")
        return None

def run_and_cache_probs(samples):
    data = []
    # Using ProcessPoolExecutor instead of ThreadPoolExecutor for true parallelism
    with concurrent.futures.ProcessPoolExecutor(max_workers=N_WORKERS) as executor:
        # Create a list of futures
        futures = list(tqdm(
            executor.map(process_single_image, samples),
            total=len(samples),
            desc="Processing Images"
        ))
        
        # Collect results from completed futures
        for result in futures:
            if result:  # Skip None results (errors)
                data.append(result)
                
    return data

def evaluate_over_thresholds(data, thresholds):
    precisions, recalls, f1s = [], [], []

    for t in thresholds:
        y_true, y_pred = [], []
        for true_label, probs in data:
            prediction = "color" if any(p > t for p in probs) else "black"
            y_true.append(1 if true_label == "color" else 0)
            y_pred.append(1 if prediction == "color" else 0)

        precision = precision_score(y_true, y_pred, zero_division=0)
        recall = recall_score(y_true, y_pred, zero_division=0)
        f1 = f1_score(y_true, y_pred, zero_division=0)

        precisions.append(precision)
        recalls.append(recall)
        f1s.append(f1)

    return precisions, recalls, f1s

def plot_metrics(thresholds, precisions, recalls, f1s):
    plt.figure(figsize=(10, 6))
    plt.plot(thresholds, precisions, label="Precision")
    plt.plot(thresholds, recalls, label="Recall")
    plt.plot(thresholds, f1s, label="F1 Score")
    plt.xlabel("Threshold")
    plt.ylabel("Score")
    plt.title("Metrics vs Threshold")
    plt.legend()
    plt.grid(True)
    plt.tight_layout()
    plt.show()

# --- MAIN ---
if __name__ == "__main__":
    # This guard is essential when using ProcessPoolExecutor
    samples = collect_images_from_folder(VAL_FOLDER)
    print(f"Found {len(samples)} images.")

    print("Running inference and storing tile probabilities...")
    data = run_and_cache_probs(samples)

    print("Evaluating across thresholds...")
    precisions, recalls, f1s = evaluate_over_thresholds(data, THRESHOLDS)

    print("Plotting...")
    plot_metrics(THRESHOLDS, precisions, recalls, f1s)

In [None]:
import os
from pathlib import Path
from PIL import Image
from ultralytics import YOLO
import concurrent.futures
from tqdm import tqdm
from sklearn.metrics import precision_score, recall_score, f1_score
import matplotlib.pyplot as plt
import numpy as np


# --- CONFIG ---
VAL_FOLDER = "dataset_color/val/"
MODEL_PATH = "runs/classify/cls-yolo11n-first-test-512-100e/weights/best.pt"
#MODEL_PATH = "runs/classify/train1655/weights/best.pt"
# MODEL_PATH = "runs/classify/cls-1024px-100epochs/weights/best.pt"
TILE_SIZE = 512
OVERLAP = 0.5
N_WORKERS = 16
THRESHOLDS = np.linspace(0.0, 1.0, 500)  # 10 values between 0 and 1

def slice_image(img, tile_size=TILE_SIZE, overlap=OVERLAP):
    step = int(tile_size * (1 - overlap))
    tiles = []
    for y in range(0, img.height - tile_size + 1, step):
        for x in range(0, img.width - tile_size + 1, step):
            tile = img.crop((x, y, x + tile_size, y + tile_size))
            tiles.append(tile)
    return tiles

def collect_images_from_folder(val_folder):
    samples = []
    for label in ["color", "black"]:
        label_path = Path(val_folder) / label
        for img_path in label_path.glob("*.jpg"):
            samples.append((str(img_path), label))
        for img_path in label_path.glob("*.png"):
            samples.append((str(img_path), label))
    return samples

# This function will be run in each worker process
def process_single_image(args):
    image_path, label = args
    try:
        # Load the model in each process - important for true parallelism
        model = YOLO(MODEL_PATH)
        
        # Process the image
        img = Image.open(image_path).convert("RGB")
        #tiles = slice_image(img)
        results = model.predict(img, verbose=False)

        probs = []
        for res in results:
            probs.append(res.probs.data[1].item())  # prob for class "color"
        
        return label, probs
    except Exception as e:
        print(f"Error processing {image_path}: {e}")
        return None

def run_and_cache_probs(samples):
    data = []
    # Using ProcessPoolExecutor instead of ThreadPoolExecutor for true parallelism
    with concurrent.futures.ProcessPoolExecutor(max_workers=N_WORKERS) as executor:
        # Create a list of futures
        futures = list(tqdm(
            executor.map(process_single_image, samples),
            total=len(samples),
            desc="Processing Images"
        ))
        
        # Collect results from completed futures
        for result in futures:
            if result:  # Skip None results (errors)
                data.append(result)
                
    return data

def evaluate_over_thresholds(data, thresholds):
    precisions, recalls, f1s = [], [], []

    for t in thresholds:
        y_true, y_pred = [], []
        for true_label, probs in data:
            prediction = "color" if any(p > t for p in probs) else "black"
            y_true.append(1 if true_label == "color" else 0)
            y_pred.append(1 if prediction == "color" else 0)

        precision = precision_score(y_true, y_pred, zero_division=0)
        recall = recall_score(y_true, y_pred, zero_division=0)
        f1 = f1_score(y_true, y_pred, zero_division=0)

        precisions.append(precision)
        recalls.append(recall)
        f1s.append(f1)

    return precisions, recalls, f1s

def plot_metrics(thresholds, precisions, recalls, f1s):
    plt.figure(figsize=(10, 6))
    plt.plot(thresholds, precisions, label="Precision")
    plt.plot(thresholds, recalls, label="Recall")
    plt.plot(thresholds, f1s, label="F1 Score")
    plt.xlabel("Threshold")
    plt.ylabel("Score")
    plt.title("Metrics vs Threshold")
    plt.legend()
    plt.grid(True)
    plt.tight_layout()
    plt.show()

# --- MAIN ---
if __name__ == "__main__":
    # This guard is essential when using ProcessPoolExecutor
    samples = collect_images_from_folder(VAL_FOLDER)
    print(f"Found {len(samples)} images.")

    print("Running inference and storing tile probabilities...")
    data = run_and_cache_probs(samples)

    print("Evaluating across thresholds...")
    precisions, recalls, f1s = evaluate_over_thresholds(data, THRESHOLDS)

    print("Plotting...")
    plot_metrics(THRESHOLDS, precisions, recalls, f1s)

## Validation method for YOLO models

In [None]:
from ultralytics import YOLO
# Validation

# Load the trained model
model = YOLO("runs/classify/cls-yolo11n-tuned-1024px-50e/weights/best.pt")  # Replace with your model's path

# Run evaluation on the test set
metrics = model.val(data="./dataset_color", split="test")  # Use the test set