In [None]:
# Install required packages (run once)
# !pip install ultralytics opencv-python pillow

import os
import shutil
from pathlib import Path
from ultralytics import YOLO
from tqdm.auto import tqdm
import pandas as pd

# ========== Configuration ==========
MODEL_PATH = "data/model/yolo_small_classify_v15.pt"  # Path to your YOLO classification model
SOURCE_DIR = r"C:\Users\Khaled\Downloads\classified_rois_17_02_2026\data\classified_rois"  # Directory containing ROI images
OUTPUT_DIR = r"C:\Users\Khaled\Downloads\classified_rois_17_02_2026\data\organized_rois"  # Output directory for organized images
CONFIDENCE_THRESHOLD = 0.8  # Minimum confidence to classify (0.0 - 1.0)
MOVE_FILES = True  # Set to True to move files instead of copying

# Create output directory
os.makedirs(OUTPUT_DIR, exist_ok=True)

# ========== Load YOLO Model ==========
print("Loading YOLO model...")
model = YOLO(MODEL_PATH)
print(f"✓ Model loaded successfully!")
print(f"Model classes: {model.names}")
print(f"Number of classes: {len(model.names)}")

# ========== Get all JPG images ==========
image_files = sorted([
    f for f in os.listdir(SOURCE_DIR)
    if f.lower().endswith(('.jpg', '.jpeg'))
])

print(f"\nFound {len(image_files)} JPG images to process")

# ========== Process Images ==========
results_data = []
stats = {}
low_confidence_images = []

print("\nClassifying images...")
for filename in tqdm(image_files):
    image_path = os.path.join(SOURCE_DIR, filename)

    # Run inference
    results = model(image_path, verbose=False)

    # Get prediction
    probs = results[0].probs  # Classification probabilities
    top_class_idx = probs.top1  # Index of top class
    confidence = probs.top1conf.item()  # Confidence score
    class_name = model.names[top_class_idx]  # Class name

    # Store results
    results_data.append({
        'filename': filename,
        'predicted_class': class_name,
        'confidence': confidence,
        'class_idx': top_class_idx
    })

    # Check confidence threshold
    if confidence < CONFIDENCE_THRESHOLD:
        low_confidence_images.append((filename, class_name, confidence))
        class_name = f"low_confidence_{class_name}"

    # Update statistics
    if class_name not in stats:
        stats[class_name] = 0
    stats[class_name] += 1

    # Create class directory
    class_dir = os.path.join(OUTPUT_DIR, class_name)
    os.makedirs(class_dir, exist_ok=True)

    # Move or copy file
    dest_path = os.path.join(class_dir, filename)
    if MOVE_FILES:
        shutil.move(image_path, dest_path)
    else:
        shutil.copy2(image_path, dest_path)

# ========== Save Results to CSV ==========
results_df = pd.DataFrame(results_data)
results_csv_path = os.path.join(OUTPUT_DIR, 'classification_results.csv')
results_df.to_csv(results_csv_path, index=False)

# ========== Print Statistics ==========
print("\n" + "="*60)
print("CLASSIFICATION COMPLETE!")
print("="*60)
print(f"\nTotal images processed: {len(image_files)}")
print(f"\nClass distribution:")
for class_name, count in sorted(stats.items()):
    percentage = (count / len(image_files)) * 100
    print(f"  {class_name}: {count} images ({percentage:.1f}%)")

# ========== Low Confidence Warnings ==========
if low_confidence_images:
    print(f"\n⚠ Warning: {len(low_confidence_images)} images below confidence threshold ({CONFIDENCE_THRESHOLD}):")
    print("\nTop 10 low confidence images:")
    for filename, class_name, conf in sorted(low_confidence_images, key=lambda x: x[2])[:10]:
        print(f"  {filename} -> {class_name} (conf: {conf:.3f})")

# ========== Confidence Statistics ==========
print(f"\nConfidence statistics:")
print(f"  Mean confidence: {results_df['confidence'].mean():.3f}")
print(f"  Median confidence: {results_df['confidence'].median():.3f}")
print(f"  Min confidence: {results_df['confidence'].min():.3f}")
print(f"  Max confidence: {results_df['confidence'].max():.3f}")

print(f"\n✓ Results saved to: {results_csv_path}")
print(f"✓ Images organized in: {OUTPUT_DIR}")
print("="*60)

# ========== Optional: Visualize Class Distribution ==========
import matplotlib.pyplot as plt

# Bar chart of class distribution
plt.figure(figsize=(12, 6))
class_counts = results_df['predicted_class'].value_counts()
class_counts.plot(kind='bar', color='steelblue', edgecolor='black')
plt.title('Class Distribution', fontsize=16, fontweight='bold')
plt.xlabel('Class', fontsize=12)
plt.ylabel('Number of Images', fontsize=12)
plt.xticks(rotation=45, ha='right')
plt.grid(axis='y', alpha=0.3)
plt.tight_layout()
plt.show()

# Confidence distribution histogram
plt.figure(figsize=(12, 6))
plt.hist(results_df['confidence'], bins=50, color='coral', edgecolor='black', alpha=0.7)
plt.axvline(CONFIDENCE_THRESHOLD, color='red', linestyle='--', linewidth=2, label=f'Threshold ({CONFIDENCE_THRESHOLD})')
plt.title('Confidence Score Distribution', fontsize=16, fontweight='bold')
plt.xlabel('Confidence', fontsize=12)
plt.ylabel('Frequency', fontsize=12)
plt.legend()
plt.grid(alpha=0.3)
plt.tight_layout()
plt.show()

# ========== Optional: Preview Sample Images from Each Class ==========
import matplotlib.pyplot as plt
from PIL import Image
import numpy as np

def preview_class_samples(output_dir, samples_per_class=3, figsize=(15, 10)):
    """
    Display sample images from each class directory.
    """
    class_dirs = [d for d in os.listdir(output_dir) if os.path.isdir(os.path.join(output_dir, d))]

    n_classes = len(class_dirs)
    fig, axes = plt.subplots(n_classes, samples_per_class, figsize=figsize)

    if n_classes == 1:
        axes = axes.reshape(1, -1)

    for i, class_name in enumerate(sorted(class_dirs)):
        class_path = os.path.join(output_dir, class_name)
        images = [f for f in os.listdir(class_path) if f.lower().endswith(('.jpg', '.jpeg'))][:samples_per_class]

        for j, img_name in enumerate(images):
            img_path = os.path.join(class_path, img_name)
            img = Image.open(img_path)

            if samples_per_class == 1:
                ax = axes[i]
            else:
                ax = axes[i, j]

            ax.imshow(img)
            ax.axis('off')
            if j == 0:
                ax.set_title(f"{class_name}", fontsize=12, fontweight='bold', loc='left')

    plt.tight_layout()
    plt.suptitle('Sample Images from Each Class', fontsize=16, fontweight='bold', y=1.00)
    plt.show()

# Run the preview
preview_class_samples(OUTPUT_DIR, samples_per_class=5, figsize=(18, 4 * len(stats)))

# ========== Optional: Review Low Confidence Images Interactively ==========
from IPython.display import display, Image as IPImage, clear_output
import ipywidgets as widgets

def review_low_confidence_images(results_df, output_dir, threshold=0.5):
    """
    Interactive review of low confidence predictions.
    """
    low_conf_df = results_df[results_df['confidence'] < threshold].copy()

    if len(low_conf_df) == 0:
        print("No low confidence images to review!")
        return

    print(f"Found {len(low_conf_df)} low confidence images to review\n")

    current_idx = [0]
    corrections = {}

    def show_image():
        if current_idx[0] >= len(low_conf_df):
            clear_output()
            print("✓ Review complete!")
            print(f"Corrections made: {len(corrections)}")
            return corrections

        row = low_conf_df.iloc[current_idx[0]]
        filename = row['filename']
        predicted_class = row['predicted_class']
        confidence = row['confidence']

        clear_output(wait=True)

        print(f"Image {current_idx[0] + 1}/{len(low_conf_df)}")
        print(f"Filename: {filename}")
        print(f"Predicted: {predicted_class} (confidence: {confidence:.3f})")

        # Find the image in the organized directory
        img_path = os.path.join(output_dir, f"low_confidence_{predicted_class}", filename)
        if not os.path.exists(img_path):
            img_path = os.path.join(output_dir, predicted_class, filename)

        display(IPImage(filename=img_path, width=500))

        # Create buttons
        accept_btn = widgets.Button(description="Accept", button_style='success')
        skip_btn = widgets.Button(description="Skip", button_style='info')

        def on_accept(b):
            current_idx[0] += 1
            show_image()

        def on_skip(b):
            current_idx[0] += 1
            show_image()

        accept_btn.on_click(on_accept)
        skip_btn.on_click(on_skip)

        display(widgets.HBox([accept_btn, skip_btn]))

    show_image()

# Uncomment to run interactive review
# review_low_confidence_images(results_df, OUTPUT_DIR, threshold=CONFIDENCE_THRESHOLD)
