# Two-Stage Cat Identification Pipeline üöÄ

This notebook implements a two-stage computer vision pipeline:
1.  **Cat Detector:** A YOLO model trained to find any cat in an image.
2.  **Cat Classifier:** A second model trained to identify if a cropped image of a cat is **Marnie** or **Milo**.

## Part 0: Master Configuration ‚öôÔ∏è
**Action:** Set all the main variables in this cell before running the notebook.

In [1]:
import os
import shutil
import random
import zipfile
import yaml
import cv2
from ultralytics import YOLO
from collections import defaultdict

# --- 1. Project & File Configuration ---
PROJECT_NAME = "Cat_Pipeline_v1"
EXPORTED_ZIP_PATH = 'C:\\Users\\josia\\Downloads\\project-2-at-2025-10-07-23-35-a592af58.zip' # Path to your Label Studio YOLO export

# --- 2. Dataset Path Configuration ---
# These folders will be created and organized by the notebook
BASE_DATA_DIR = 'datasets' # Root folder for all datasets
RAW_DATA_PATH = os.path.join(BASE_DATA_DIR, '0_raw_from_zip') # For the initial unzipped data
DETECTOR_DATA_PATH = os.path.join(BASE_DATA_DIR, '1_detector_dataset') # For the single-class detector
CLASSIFIER_DATA_PATH = os.path.join(BASE_DATA_DIR, '2_classifier_dataset') # For the two-class classifier

# --- 3. Dataset & Training Settings ---
TRAIN_RATIO = 0.8
VAL_RATIO = 0.2
DETECTOR_EPOCHS = 150
DETECTOR_PATIENCE = 50
CLASSIFIER_EPOCHS = 35
CLASSIFIER_PATIENCE = 15
IMAGE_SIZE = 640

# This mapping is from your Label Studio export. It's critical for creating the classifier dataset.
CLASS_MAPPING = {0: 'Marnie', 1: 'Milo'} 

print("‚úÖ Configuration loaded.")

‚úÖ Configuration loaded.


## Part 1: Initial Data Prep (From Label Studio) üì¶
--- 
This section takes the raw export from Label Studio and prepares it for our pipeline.

In [4]:
print(f"Unzipping '{EXPORTED_ZIP_PATH}' to '{RAW_DATA_PATH}'...")

# Clean up previous runs
if os.path.exists(RAW_DATA_PATH):
    shutil.rmtree(RAW_DATA_PATH)

# Unzip the new data
with zipfile.ZipFile(EXPORTED_ZIP_PATH, 'r') as zip_ref:
    zip_ref.extractall(RAW_DATA_PATH)

print("‚úÖ Unzip complete.")

Unzipping 'C:\Users\josia\Downloads\project-2-at-2025-10-07-23-35-a592af58.zip' to 'datasets\0_raw_from_zip'...
‚úÖ Unzip complete.


In [21]:
# NOTE: This useful filename cleaning function is from your original notebook.
from urllib.parse import unquote

def clean_filenames_in_directory(directory_path):
    if not os.path.isdir(directory_path):
        print(f"  - ‚ö†Ô∏è  Warning: Directory not found, skipping: {directory_path}")
        return 0
    renamed_count = 0
    for filename in os.listdir(directory_path):
        new_filename = None
        if '__' in filename:
            _, main_part = filename.split('__', 1)
            new_filename = unquote(main_part)
        elif '-' in filename:
            parts = filename.split('-', 1)
            if len(parts) == 2 and len(parts[0]) == 8 and all(c in '0123456789abcdef' for c in parts[0]):
                new_filename = parts[1]
        if new_filename and new_filename != filename:
            shutil.move(os.path.join(directory_path, filename), os.path.join(directory_path, new_filename))
            renamed_count += 1
    return renamed_count

print("--- Starting Filename Cleanup ---")
images_dir_to_clean = os.path.join(RAW_DATA_PATH, 'images')
labels_dir_to_clean = os.path.join(RAW_DATA_PATH, 'labels')

renamed_images = clean_filenames_in_directory(images_dir_to_clean)
print(f"Renamed {renamed_images} image files.")
renamed_labels = clean_filenames_in_directory(labels_dir_to_clean)
print(f"Renamed {renamed_labels} label files.")
print("‚úÖ Filename cleanup complete.")

--- Starting Filename Cleanup ---
Renamed 0 image files.
Renamed 137 label files.
‚úÖ Filename cleanup complete.


In [22]:
import os
import shutil

# ==============================================================================
# --- ‚öôÔ∏è Configuration ---
# ==============================================================================

# 1. Define the source folder where your combined images are currently located.
#    This path should be updated if you change the output of your previous script.
SOURCE_IMAGES_FOLDER = 'original images\\ALLPhotos_and_frames'

# ==============================================================================

# --- Script Logic ---

# Create the full path to the destination 'images' subfolder
destination_images_folder = os.path.join(RAW_DATA_PATH, 'images')

print(f"Preparing to move files...")
print(f"  - From: {SOURCE_IMAGES_FOLDER}")
print(f"  - To:   {destination_images_folder}")

# Ensure the destination directory and its parent exist
os.makedirs(destination_images_folder, exist_ok=True)

# Check if the source directory exists before proceeding
if not os.path.isdir(SOURCE_IMAGES_FOLDER):
    print(f"\n‚ùå ERROR: Source directory not found at '{SOURCE_IMAGES_FOLDER}'. Please check the path.")
else:
    # Get a list of all files to move
    files_to_move = [f for f in os.listdir(SOURCE_IMAGES_FOLDER) if os.path.isfile(os.path.join(SOURCE_IMAGES_FOLDER, f))]

    moved_count = 0
    # Loop through the files and move each one
    for filename in files_to_move:
        source_path = os.path.join(SOURCE_IMAGES_FOLDER, filename)
        destination_path = os.path.join(destination_images_folder, filename)

        # Move the file
        shutil.copy(source_path, destination_path)
        moved_count += 1

    print(f"\n‚úÖ Successfully moved {moved_count} image files.")

Preparing to move files...
  - From: original images\ALLPhotos_and_frames
  - To:   datasets\0_raw_from_zip\images

‚úÖ Successfully moved 571 image files.


In [23]:
import os

# Set the path to your folders after running prediction
images_folder = os.path.join(RAW_DATA_PATH, 'images')
labels_folder = os.path.join(RAW_DATA_PATH, 'labels')

# --- Script to create empty label files ---

# Get the base names (without extension) of all images and labels
image_basenames = {os.path.splitext(f)[0] for f in os.listdir(images_folder)}
label_basenames = {os.path.splitext(f)[0] for f in os.listdir(labels_folder)}

# Find all images that are missing a label file
missing_labels = image_basenames - label_basenames

print(f"Found {len(missing_labels)} images without labels. Creating empty .txt files for them...")

# Create an empty .txt file for each missing label
for basename in missing_labels:
    with open(os.path.join(labels_folder, f"{basename}.txt"), 'w') as f:
        pass # The 'pass' command creates an empty file

print("‚úÖ Done.")

Found 50 images without labels. Creating empty .txt files for them...
‚úÖ Done.


## Part 2: Preparing the DETECTOR Dataset üéØ
--- 
Here, we'll convert the multi-class labels (Marnie, Milo) into a single class ('cat') and split the data for training.

In [6]:
print("Modifying labels for single-class 'cat' detection...")

# Create the new detector dataset structure
detector_images_path = os.path.join(DETECTOR_DATA_PATH, 'images')
detector_labels_path = os.path.join(DETECTOR_DATA_PATH, 'labels')
os.makedirs(detector_images_path, exist_ok=True)
os.makedirs(detector_labels_path, exist_ok=True)

source_labels_path = os.path.join(RAW_DATA_PATH, 'labels')
source_images_path = os.path.join(RAW_DATA_PATH, 'images')

modified_count = 0
# Copy all images to the new location
for filename in os.listdir(source_images_path):
    shutil.copy(os.path.join(source_images_path, filename), os.path.join(detector_images_path, filename))

# Process and copy label files, changing the class ID to 0
for filename in os.listdir(source_labels_path):
    with open(os.path.join(source_labels_path, filename), 'r') as f_in:
        with open(os.path.join(detector_labels_path, filename), 'w') as f_out:
            for line in f_in:
                parts = line.strip().split()
                if len(parts) > 1:
                    # Replace class ID (parts[0]) with 0
                    new_line = f"0 {' '.join(parts[1:])}\n"
                    f_out.write(new_line)
                    modified_count += 1

print(f"Processed {modified_count} labels, converting all to class '0' for the detector.")
print("‚úÖ Detector dataset created.")

Modifying labels for single-class 'cat' detection...
Processed 532 labels, converting all to class '0' for the detector.
‚úÖ Detector dataset created.


In [16]:
print("Splitting the detector dataset...")

# Get all image filenames
all_files = [f for f in os.listdir(detector_images_path) if f.lower().endswith(('.jpg', '.png', '.jpeg'))]
random.shuffle(all_files)

# Calculate split index
train_idx = int(len(all_files) * TRAIN_RATIO)
train_files = all_files[:train_idx]
val_files = all_files[train_idx:]

# Function to copy files
def copy_split_files(file_list, split_name):
    split_images_dir = os.path.join(DETECTOR_DATA_PATH, 'images', split_name)
    split_labels_dir = os.path.join(DETECTOR_DATA_PATH, 'labels', split_name)
    os.makedirs(split_images_dir, exist_ok=True)
    os.makedirs(split_labels_dir, exist_ok=True)
    
    for filename in file_list:
        base_name = os.path.splitext(filename)[0]
        shutil.copy(os.path.join(detector_images_path, filename), os.path.join(split_images_dir, filename))
        shutil.copy(os.path.join(detector_labels_path, f"{base_name}.txt"), os.path.join(split_labels_dir, f"{base_name}.txt"))

# Perform the split and move files
copy_split_files(train_files, 'train')
copy_split_files(val_files, 'val')

# Clean up the temporary flat directories
for f in os.listdir(detector_images_path):
    if os.path.isfile(os.path.join(detector_images_path, f)): os.remove(os.path.join(detector_images_path, f))
for f in os.listdir(detector_labels_path):
    if os.path.isfile(os.path.join(detector_labels_path, f)): os.remove(os.path.join(detector_labels_path, f))
    
print(f"‚úÖ Dataset split complete. Train: {len(train_files)}, Val: {len(val_files)}")

Splitting the detector dataset...
‚úÖ Dataset split complete. Train: 456, Val: 115


In [17]:
print("Creating detector_data.yaml file...")

detector_yaml_path = os.path.join(DETECTOR_DATA_PATH, 'detector_data.yaml')

data_for_yaml = {
    'train': './images/train',
    'val': './images/val',
    'nc': 1,
    'names': ['cat']
}

with open(detector_yaml_path, 'w') as f:
    yaml.dump(data_for_yaml, f, sort_keys=False, indent=4)

print(f"‚úÖ YAML file created at: {detector_yaml_path}")

Creating detector_data.yaml file...
‚úÖ YAML file created at: datasets\1_detector_dataset\detector_data.yaml


## Part 3: Training the Cat DETECTOR (Model 1) üöÄ
--- 

In [None]:
# Load a pre-trained object detection model
detector_model = YOLO('yolo11m.pt')

print("üöÄ Starting Cat Detector training...")
detector_model.train(
    data=os.path.join(DETECTOR_DATA_PATH, 'detector_data.yaml'),
    epochs=DETECTOR_EPOCHS,
    imgsz=IMAGE_SIZE,
    project=PROJECT_NAME,
    name='detector_model',
    exist_ok=True,
    batch=.85,
    patience=DETECTOR_PATIENCE
)

print("‚úÖ Detector training complete!")
DETECTOR_WEIGHTS_PATH = os.path.join(PROJECT_NAME, 'detector_model/weights/best.pt')
print(f"Best detector model saved at: {DETECTOR_WEIGHTS_PATH}")

## Part 4: Preparing the CLASSIFIER Dataset üè∑Ô∏è
--- 
Now, we'll use the original labels and images to create a new dataset of cropped cat pictures, organized by name for classification training.

In [24]:
print("Creating cropped image dataset for the classifier...")

# Clean up previous runs
if os.path.exists(CLASSIFIER_DATA_PATH):
    shutil.rmtree(CLASSIFIER_DATA_PATH)

def create_cropped_images(split_name, source_files):
    # Create subdirectories (e.g., train/Marnie, train/Milo)
    for class_name in CLASS_MAPPING.values():
        os.makedirs(os.path.join(CLASSIFIER_DATA_PATH, split_name, class_name), exist_ok=True)

    crop_count = 0
    for image_filename in source_files:
        base_name = os.path.splitext(image_filename)[0]
        image_path = os.path.join(RAW_DATA_PATH, 'images', image_filename)
        label_path = os.path.join(RAW_DATA_PATH, 'labels', f"{base_name}.txt")

        if not os.path.exists(label_path):
            continue

        image = cv2.imread(image_path)
        h, w, _ = image.shape

        with open(label_path, 'r') as f:
            for i, line in enumerate(f):
                parts = line.strip().split()
                class_id = int(parts[0])
                x_center, y_center, width, height = map(float, parts[1:])

                # Convert YOLO format to pixel coordinates
                x1 = int((x_center - width / 2) * w)
                y1 = int((y_center - height / 2) * h)
                x2 = int((x_center + width / 2) * w)
                y2 = int((y_center + height / 2) * h)

                # Crop the image
                cropped_image = image[y1:y2, x1:x2]

                # Save to the correct class folder
                class_name = CLASS_MAPPING.get(class_id, 'Unknown')
                save_path = os.path.join(CLASSIFIER_DATA_PATH, split_name, class_name, f"{base_name}_{i}.jpg")
                cv2.imwrite(save_path, cropped_image)
                crop_count += 1
    return crop_count

# Create crops for both train and validation sets to maintain data integrity
train_crops = create_cropped_images('train', train_files)
val_crops = create_cropped_images('val', val_files)

print(f"‚úÖ Classifier dataset created. Saved {train_crops} training crops and {val_crops} validation crops.")

Creating cropped image dataset for the classifier...
‚úÖ Classifier dataset created. Saved 425 training crops and 107 validation crops.


In [32]:
#
# üöÄ REPLACE your "Part 4.5" cell with this improved version.
#
import numpy as np

print("--- Starting 'Unknown' Data Generation (v2 - with Overlap Check) ---")

# --- ‚öôÔ∏è Configuration ---
generic_detector = YOLO('yolo11x.pt')
EXCLUDED_CLASSES = {'cat', 'dog'}
UNKNOWN_CLASS_NAME = 'Unknown'
IOU_THRESHOLD = 0.01 # If a generic box overlaps with a known cat by more than 1%, ignore it.
# ------------------------

def calculate_iou(boxA, boxB):
    """Calculates Intersection over Union (IoU) between two bounding boxes."""
    xA = max(boxA[0], boxB[0])
    yA = max(boxA[1], boxB[1])
    xB = min(boxA[2], boxB[2])
    yB = min(boxA[3], boxB[3])
    interArea = max(0, xB - xA) * max(0, yB - yA)
    boxAArea = (boxA[2] - boxA[0]) * (boxA[3] - boxA[1])
    boxBArea = (boxB[2] - boxB[0]) * (boxB[3] - boxB[1])
    iou = interArea / float(boxAArea + boxBArea - interArea)
    return iou

def get_ground_truth_boxes(label_path, img_width, img_height):
    """Reads a YOLO label file and converts boxes to pixel coordinates (x1, y1, x2, y2)."""
    if not os.path.exists(label_path):
        return []

    gt_boxes = []
    with open(label_path, 'r') as f:
        for line in f:
            parts = line.strip().split()
            x_center, y_center, width, height = map(float, parts[1:])
            x1 = int((x_center - width / 2) * img_width)
            y1 = int((y_center - height / 2) * img_height)
            x2 = int((x_center + width / 2) * img_width)
            y2 = int((y_center + height / 2) * img_height)
            gt_boxes.append([x1, y1, x2, y2])
    return gt_boxes

def generate_unknown_crops(split_name, source_files):
    print(f"Processing '{split_name}' set for unknown objects...")
    unknown_dir = os.path.join(CLASSIFIER_DATA_PATH, split_name, UNKNOWN_CLASS_NAME)
    os.makedirs(unknown_dir, exist_ok=True)

    unknown_classes_found = set()

    unknown_crop_count = 0
    for image_filename in source_files:
        image_path = os.path.join(RAW_DATA_PATH, 'images', image_filename)
        base_name = os.path.splitext(image_filename)[0]
        label_path = os.path.join(RAW_DATA_PATH, 'labels', f"{base_name}.txt")

        image = cv2.imread(image_path)
        if image is None: continue
        h, w, _ = image.shape

        # 1. Get the ground truth boxes for Marnie and Milo
        gt_cat_boxes = get_ground_truth_boxes(label_path, w, h)

        # 2. Run the generic detector
        results = generic_detector(image_path, verbose=False)

        for res in results:
            if res.boxes is None: continue

            for box in res.boxes:
                class_id = int(box.cls[0])
                class_name = generic_detector.names[class_id]

                # First check: is the class something we want to ignore?
                if class_name in EXCLUDED_CLASSES:
                    continue

                generic_box = [int(coord) for coord in box.xyxy[0]]

                # 3. Second check: does this box overlap with a known cat?
                is_overlap = False
                for gt_box in gt_cat_boxes:
                    if calculate_iou(generic_box, gt_box) > IOU_THRESHOLD:
                        is_overlap = True
                        break # Found an overlap, no need to check other gt_boxes

                if is_overlap:
                    continue # Skip this box because it's probably one of your cats

                # 4. If it passes all checks, save it as "Unknown"
                x1, y1, x2, y2 = generic_box
                cropped_image = image[y1:y2, x1:x2]
                if cropped_image.size > 0:
                    save_path = os.path.join(unknown_dir, f"unknown_{base_name}_{unknown_crop_count}.jpg")
                    cv2.imwrite(save_path, cropped_image)
                    unknown_crop_count += 1
                    unknown_classes_found.add(class_name)

    print(f"-> Found and saved {unknown_crop_count} 'Unknown' crops for the '{split_name}' set.")
    print(f"-> Classes saved as 'Unknown': {sorted(list(unknown_classes_found))}")
    return unknown_crop_count

# Run the generation process for both your training and validation sets
generate_unknown_crops('train', train_files)
generate_unknown_crops('val', val_files)

print("\n‚úÖ 'Unknown' class data generation complete.")

--- Starting 'Unknown' Data Generation (v2 - with Overlap Check) ---
Processing 'train' set for unknown objects...
-> Found and saved 622 'Unknown' crops for the 'train' set.
-> Classes saved as 'Unknown': ['bed', 'bottle', 'bowl', 'chair', 'couch', 'laptop', 'person', 'refrigerator', 'remote', 'suitcase', 'tie', 'tv', 'vase']
Processing 'val' set for unknown objects...
-> Found and saved 159 'Unknown' crops for the 'val' set.
-> Classes saved as 'Unknown': ['bed', 'bottle', 'car', 'chair', 'couch', 'potted plant', 'suitcase', 'vase']

‚úÖ 'Unknown' class data generation complete.


## Part 5: Training the Cat CLASSIFIER (Model 2) üöÄ
--- 

In [6]:
import torch
import gc

torch.cuda.empty_cache()
gc.collect()

# Load a pre-trained classification model
classifier_model = YOLO('yolo11n-cls.pt')

print("üöÄ Starting Cat Classifier training...")
classifier_model.train(
    data=CLASSIFIER_DATA_PATH,
    epochs=CLASSIFIER_EPOCHS,
    imgsz=224, # Classifiers typically use smaller image sizes
    project=PROJECT_NAME,
    name='classifier_model',
    exist_ok=True,
    batch=16,
    # Lower dropout to a more standard range to allow the model to learn effectively.
    dropout=0.3,

    # Add weight decay to prevent overfitting.
    weight_decay=0.0005,

    # Your augmentations are good for this problem.
    fliplr=0.5,    # Horizontal flips
    flipud=0.1,    # Occasional vertical flips
    erasing=0.4,   # Helps model learn from partial images
)

print("‚úÖ Classifier training complete!")
CLASSIFIER_WEIGHTS_PATH = os.path.join(PROJECT_NAME, 'classifier_model/weights/best.pt')
print(f"Best classifier model saved at: {CLASSIFIER_WEIGHTS_PATH}")

üöÄ Starting Cat Classifier training...
New https://pypi.org/project/ultralytics/8.3.214 available  Update with 'pip install -U ultralytics'
Ultralytics 8.3.205  Python-3.12.10 torch-2.8.0+cu129 CUDA:0 (NVIDIA GeForce RTX 5070 Ti, 16303MiB)
[34m[1mengine\trainer: [0magnostic_nms=False, amp=True, augment=False, auto_augment=randaugment, batch=16, bgr=0.0, box=7.5, cache=False, cfg=None, classes=None, close_mosaic=10, cls=0.5, compile=False, conf=None, copy_paste=0.0, copy_paste_mode=flip, cos_lr=False, cutmix=0.0, data=datasets\2_classifier_dataset, degrees=0.0, deterministic=True, device=None, dfl=1.5, dnn=False, dropout=0.3, dynamic=False, embed=None, epochs=35, erasing=0.4, exist_ok=True, fliplr=0.5, flipud=0.1, format=torchscript, fraction=1.0, freeze=None, half=False, hsv_h=0.015, hsv_s=0.7, hsv_v=0.4, imgsz=224, int8=False, iou=0.7, keras=False, kobj=1.0, line_width=None, lr0=0.01, lrf=0.01, mask_ratio=4, max_det=300, mixup=0.0, mode=train, model=yolo11n-cls.pt, momentum=0.937

KeyboardInterrupt: 

In [3]:
#
# üöÄ REPLACE your Temperature Scaling cell with this corrected version.
#
import torch
import torch.nn.functional as F
from torch.utils.data import DataLoader
from torchvision.datasets import ImageFolder
from torchvision import transforms
from scipy.optimize import minimize

print("--- Starting Temperature Scaling Calibration (v2) ---")

# --- ‚öôÔ∏è Configuration ---
CLASSIFIER_WEIGHTS_PATH = os.path.join(PROJECT_NAME, 'classifier_model/weights/best.pt')
VALIDATION_DIR = os.path.join(CLASSIFIER_DATA_PATH, 'val')
# ------------------------

# 1. Load your trained classifier and validation data
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
calib_model = YOLO(CLASSIFIER_WEIGHTS_PATH)

transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
])
val_dataset = ImageFolder(VALIDATION_DIR, transform=transform)
val_loader = DataLoader(val_dataset, batch_size=16, shuffle=False)

# 2. Get the raw model outputs (logits) for the validation set
logits_list = []
labels_list = []
print("Getting model outputs from validation set...")
with torch.no_grad():
    for images, labels in val_loader:
        # Run prediction on the entire batch of images
        results = calib_model.predict(images.to(device), verbose=False)

        # --- THIS IS THE FIX ---
        # Stack the probabilities from each result in the batch into a single tensor
        # This creates a tensor of shape [batch_size, num_classes]
        batch_probs = torch.stack([r.probs.data for r in results])

        # Convert the batch's probabilities to logits
        batch_logits = torch.log(batch_probs + 1e-9) # Add epsilon to avoid log(0)

        # Append the entire batch's logits and labels to our lists
        logits_list.append(batch_logits)
        labels_list.append(labels)

# Concatenate all the batches into single tensors
logits = torch.cat(logits_list).to(device)
labels = torch.cat(labels_list).to(device)

print(f"Logits shape: {logits.shape}, Labels shape: {labels.shape}") # Should be [N, 3] and [N]

# 3. Find the optimal temperature 'T'
def objective(T):
    T = T[0]
    # Ensure T is not zero to avoid division errors
    if T == 0: return float('inf')
    scaled_logits = logits / T
    loss = F.cross_entropy(scaled_logits, labels.to(device))
    return loss.item()

print("Finding optimal temperature...")
result = minimize(objective, [1.0], method='nelder-mead', bounds=[(0.1, 10.0)])
optimal_temperature = result.x[0]

print(f"\n‚úÖ Calibration complete!")
print(f"Optimal Temperature (T) = {optimal_temperature:.4f}")

--- Starting Temperature Scaling Calibration (v2) ---
Getting model outputs from validation set...
Logits shape: torch.Size([266, 3]), Labels shape: torch.Size([266])
Finding optimal temperature...

‚úÖ Calibration complete!
Optimal Temperature (T) = 1.1410


## Part 6: Pipeline Demonstration & Improvement Loop üèÅ
--- 
This section shows how to use both models together and explains the next steps for improving your models.

In [9]:
import cv2
import os
from ultralytics import YOLO
from collections import deque
import numpy as np

print("--- Running Full Pipeline with Simplified Stability Tracking ---")

# --- ‚öôÔ∏è Configuration ---
HISTORY_SIZE = 10
IOU_MATCH_THRESHOLD = 0.4
INACTIVE_FRAMES_LIMIT = 5
MIN_CONFIDENCE = 0.50
# ----------------------------------------------------

# --- Original Configuration ---
DETECTOR_WEIGHTS_PATH = os.path.join(PROJECT_NAME, 'detector_model/weights/best.pt')
CLASSIFIER_WEIGHTS_PATH = os.path.join(PROJECT_NAME, 'classifier_model/weights/best.pt')
INPUT_VIDEO_PATH = 'C:\\Users\\josia\\Videos\\Marnie4.mp4' # <--- CHANGE THIS
output_dir = 'pipeline_results'
os.makedirs(output_dir, exist_ok=True)
OUTPUT_VIDEO_PATH = os.path.join(output_dir, f"simple_stable_result_{os.path.basename(INPUT_VIDEO_PATH)}")
CAT_COLORS = {"Marnie": (0, 0, 255), "Milo": (255, 0, 0), "Unknown": (0, 255, 0)}
DEFAULT_COLOR = (0, 255, 0) # Green fallback

# --- Tracker State Variables ---
tracked_objects = {}
next_object_id = 0

def calculate_iou(boxA, boxB):
    xA = max(boxA[0], boxB[0])
    yA = max(boxA[1], boxB[1])
    xB = min(boxA[2], boxB[2])
    yB = min(boxA[3], boxB[3])
    interArea = max(0, xB - xA) * max(0, yB - yA)
    boxAArea = (boxA[2] - boxA[0]) * (boxA[3] - boxA[1])
    boxBArea = (boxB[2] - boxB[0]) * (boxB[3] - boxB[1])
    iou = interArea / float(boxAArea + boxBArea - interArea)
    return iou

# 1. Load models
print("Loading models...")
final_detector = YOLO(DETECTOR_WEIGHTS_PATH)
final_classifier = YOLO(CLASSIFIER_WEIGHTS_PATH)
print("‚úÖ Models loaded.")

# 2. Setup video reader and writer
cap = cv2.VideoCapture(INPUT_VIDEO_PATH)
if not cap.isOpened():
    print(f"‚ùå Error: Could not open video file at {INPUT_VIDEO_PATH}")
else:
    frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = int(cap.get(cv2.CAP_PROP_FPS))
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    video_writer = cv2.VideoWriter(OUTPUT_VIDEO_PATH, fourcc, fps, (frame_width, frame_height))

    print(f"Processing video: {INPUT_VIDEO_PATH}")

    # 3. Loop through each frame
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret: break

        cat_detections = final_detector(frame, verbose=False)
        current_detections = []
        for detection in cat_detections:
            for box_data in detection.boxes:
                box = box_data.xyxy[0].int().tolist()
                current_detections.append({'box': box, 'matched': False})

        # --- Original (Greedy) Tracker Logic ---
        for obj_id, obj_data in tracked_objects.items():
            best_match_iou = 0
            best_match_idx = -1
            for i, det in enumerate(current_detections):
                if not det['matched']:
                    iou = calculate_iou(obj_data['box'], det['box'])
                    if iou > best_match_iou:
                        best_match_iou = iou
                        best_match_idx = i

            if best_match_iou > IOU_MATCH_THRESHOLD:
                obj_data['box'] = current_detections[best_match_idx]['box']
                obj_data['inactive_frames'] = 0
                current_detections[best_match_idx]['matched'] = True
                obj_data['updated_this_frame'] = True
            else:
                obj_data['inactive_frames'] += 1
                obj_data['updated_this_frame'] = False

        for det in current_detections:
            if not det['matched']:
                tracked_objects[next_object_id] = {
                    'box': det['box'],
                    'history': deque(maxlen=HISTORY_SIZE),
                    'inactive_frames': 0,
                    'updated_this_frame': True
                }
                next_object_id += 1

        # --- Classification and Simplified Drawing ---
        for obj_id, obj_data in tracked_objects.items():
            if obj_data.get('updated_this_frame', False):
                x1, y1, x2, y2 = obj_data['box']
                cat_crop = frame[y1:y2, x1:x2]
                if cat_crop.size == 0: continue

                classification_results = final_classifier(cat_crop, verbose=False)
                top_pred_index = classification_results[0].probs.top1
                cat_name = final_classifier.names[top_pred_index]
                confidence = classification_results[0].probs.top1conf.item()

                obj_data['history'].append(cat_name)

                most_common_name = max(set(obj_data['history']), key=list(obj_data['history']).count)
                stability = list(obj_data['history']).count(most_common_name) / len(obj_data['history'])

                adjusted_confidence = confidence * stability

                if adjusted_confidence >= MIN_CONFIDENCE:
                    label = f"{most_common_name}: {adjusted_confidence:.2f}"
                    box_color = CAT_COLORS.get(most_common_name, DEFAULT_COLOR)
                    cv2.rectangle(frame, (x1, y1), (x2, y2), box_color, 2)
                    cv2.putText(frame, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, box_color, 2)

        # --- Cleanup ---
        inactive_ids = [obj_id for obj_id, data in tracked_objects.items() if data['inactive_frames'] >= INACTIVE_FRAMES_LIMIT]
        for obj_id in inactive_ids:
            del tracked_objects[obj_id]

        video_writer.write(frame)

    cap.release()
    video_writer.release()
    print(f"\n‚úÖ Video processing complete! Output saved to: {OUTPUT_VIDEO_PATH}")

--- Running Full Pipeline with Simplified Stability Tracking ---
Loading models...
‚úÖ Models loaded.
Processing video: C:\Users\josia\Videos\Marnie4.mp4

‚úÖ Video processing complete! Output saved to: pipeline_results\simple_stable_result_Marnie4.mp4


### Next Steps & Improvement Loop

1.  **Evaluate:** Use the demonstration cell above to visually inspect how well your pipeline works on different images.
2.  **Gather More Data:** Use a script (like the one in your original notebook) to extract frames from new videos.
3.  **Pre-label with the Detector:** Use your newly trained **detector** model (`detector_model/weights/best.pt`) to predict on these new frames. This will generate bounding boxes for any cats it finds.
4.  **Import to Label Studio:** Import the new frames and their predicted labels into a new Label Studio project.
5.  **Correct & Label:** In Label Studio, correct any bad bounding boxes and, most importantly, assign the correct class (`Marnie` or `Milo`) to each box.
6.  **Re-run This Notebook:** Export your newly labeled data and run this entire notebook again. Your models will get smarter with each cycle!

In [None]:
import cv2
import os
import csv
import uuid
from ultralytics import YOLO

print("--- Starting Event Generation ---")

# --- ‚öôÔ∏è Configuration ---
DETECTOR_WEIGHTS_PATH = 'Cat_Pipeline_v1/detector_model/weights/best.pt'
CLASSIFIER_WEIGHTS_PATH = 'Cat_Pipeline_v1/classifier_model/weights/best.pt'
INPUT_VIDEO_PATH = 'C:\\Users\\josia\\Videos\\Milo4.mp4'  # <--- CHANGE THIS
OUTPUT_DIR = 'seed_data'
OUTPUT_CROPPED_IMAGE_DIR = os.path.join(OUTPUT_DIR, 'images', 'pending_review')
OUTPUT_FULL_FRAME_IMAGE_DIR = os.path.join(OUTPUT_DIR, 'images', 'full_frames')
OUTPUT_CSV_PATH = os.path.join(OUTPUT_DIR, 'events.csv')
# -------------------------

os.makedirs(OUTPUT_CROPPED_IMAGE_DIR, exist_ok=True)
os.makedirs(OUTPUT_FULL_FRAME_IMAGE_DIR, exist_ok=True)

print("Loading models...")
final_detector = YOLO(DETECTOR_WEIGHTS_PATH)
final_classifier = YOLO(CLASSIFIER_WEIGHTS_PATH)
print("‚úÖ Models loaded.")

cap = cv2.VideoCapture(INPUT_VIDEO_PATH)
event_data = []
frame_number = 0

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    frame_number += 1
    if frame_number % 15 != 0:
        continue

    original_height, original_width, _ = frame.shape
    cat_detections = final_detector(frame, verbose=False)

    # There can be multiple detections in a single result object
    all_boxes = cat_detections[0].boxes
    if all_boxes is None or len(all_boxes) == 0:
        continue

    # --- NEW: Save the full frame only ONCE if there are any cats ---
    full_frame_id = str(uuid.uuid4())
    full_frame_filename = f"{full_frame_id}_full.jpg"
    full_frame_save_path = os.path.join(OUTPUT_FULL_FRAME_IMAGE_DIR, full_frame_filename)
    cv2.imwrite(full_frame_save_path, frame)
    full_frame_image_path_for_csv = f"images/full_frames/{full_frame_filename}"

    # --- NEW: Loop through each detected box in the frame ---
    for box in all_boxes.xyxy:
        x1, y1, x2, y2 = [int(coord) for coord in box]

        cat_crop = frame[y1:y2, x1:x2]
        if cat_crop.size == 0: continue

        classification_results = final_classifier(cat_crop, verbose=False)
        top_pred_index = classification_results[0].probs.top1
        cat_name = final_classifier.names[top_pred_index]
        confidence = classification_results[0].probs.top1conf.item()

        event_id = str(uuid.uuid4())

        cropped_filename = f"{event_id}_crop.jpg"
        cropped_save_path = os.path.join(OUTPUT_CROPPED_IMAGE_DIR, cropped_filename)
        cv2.imwrite(cropped_save_path, cat_crop)

        event_data.append([
            event_id,
            f"images/pending_review/{cropped_filename}",
            full_frame_image_path_for_csv,  # Reference the single saved full frame
            cat_name,
            confidence,
            x1, y1, x2, y2,
            original_width,
            original_height
        ])

print(f"Writing {len(event_data)} events to {OUTPUT_CSV_PATH}...")
with open(OUTPUT_CSV_PATH, 'w', newline='') as f:
    writer = csv.writer(f)
    writer.writerow([
        'Id', 'CroppedImagePath', 'FullFrameImagePath', 'PredictedClassName', 'PredictedConfidence',
        'OriginalBboxX1', 'OriginalBboxY1', 'OriginalBboxX2', 'OriginalBboxY2',
        'OriginalImageWidth', 'OriginalImageHeight'
    ])
    writer.writerows(event_data)

cap.release()
print("\n‚úÖ Event generation complete!")