# Thermal Counting Training

Rough training pipeline for gathering and labeling grouped bounding boxes.

> Note: If there is a tkinter error, run ```sudo apt install python3-tk -y```

In [2]:
import importlib  # Refreshing imports

# Core libraries
import cv2
import supervision as sv
import numpy as np
from ultralytics import YOLO
from utils.thermal_frame_to_temp import result_to_temp_frame
import utils.group_bounding_boxes as gbb
import validate_bounding_box as vbb
import tkinter as tk
from tkinter.filedialog import askopenfilename, askdirectory
import joblib
from collections import Counter

# SVM and model training
from sklearn.model_selection import train_test_split, StratifiedKFold, cross_val_score
from sklearn.svm import SVC
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.model_selection import GridSearchCV

# Random Forest
from sklearn.ensemble import RandomForestClassifier

# Synthesizing Data
from imblearn.over_sampling import SMOTE

# ANN
from tensorflow import keras
from tensorflow.keras import layers

# Better exception handling and helpers
import traceback
import pprint
import datetime
import os
import time

# Refresh imports to see live changes vs cached ones
importlib.reload(gbb)
importlib.reload(vbb)



<module 'validate_bounding_box' from '/mnt/c/Users/JJ/Desktop/Repos/Chick-Counting/thermal/validate_bounding_box.py'>

In [34]:
''' Declaraing the save directory for grouped bounding box crops '''
SAVE_DIR = "grouped_bounding_box_crops"  # Directory to save the crops

## Helpers for saving the results (cropped bounding box results)

In [35]:
def save_crop(frame: np.ndarray, box: tuple[int,int,int,int], frame_count: int, group_id: int) -> None:
    """Safely crop and save each combined bounding box to SAVE_DIR with a unique name."""
    
    os.makedirs(SAVE_DIR, exist_ok=True)
    h, w = frame.shape[:2]
    x1, y1, x2, y2 = box
    
    # Clip to frame
    x1 = max(0, min(x1, w-1)); x2 = max(0, min(x2, w-1))
    y1 = max(0, min(y1, h-1)); y2 = max(0, min(y2, h-1))
    
    # Validate the coordinates
    if x2 <= x1 or y2 <= y1:
        return
    
    # Perform the crop and save
    crop = frame[y1:y2, x1:x2].copy()
    ts = int(time.time() * 1000)
    out_path = os.path.join(SAVE_DIR, f"f{frame_count}_g{group_id}_{ts}.jpg")
    cv2.imwrite(out_path, crop)

## Run the YOLO model and gather the results (currently unoptimized, POC)

In [36]:
FRAME_COUNT_EARLY_STOP = 5000  # For testing, limit to first N frames
FRAME_COUNT_LATE_START = 1000  # Skip first N frames

def get_line_from_video_frame(frame):
    frame_height, frame_width = frame.shape[:2]

    # Draw a horizontal line across the middle of the frame
    line_start = (frame_width, frame_height // 2)
    line_end = (0, frame_height // 2)
    return [line_start, line_end]

def chick_counting(video_path, line_points):

    # Grab a sample frame so we know video size
    generator = sv.get_video_frames_generator(video_path)
    frame = next(generator)

    # Set up video writer with same FPS/size as input
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    cap = cv2.VideoCapture(video_path)
    fps = cap.get(cv2.CAP_PROP_FPS)
    cap.release()

    # Init tracker and helpers
    byte_tracker = sv.ByteTrack()
    trace_annotator = sv.TraceAnnotator(thickness=4, trace_length=50)

    # Create the counting line
    line_zone = sv.LineZone(start=sv.Point(*line_points[0]), end=sv.Point(*line_points[1]))

    # Load custom YOLO model (trained on chicks only)
    model = YOLO("models/new_iron.pt")

    frame_count = 0
    total_count = 0
    all_counted_ids = set()  # keep track of already-counted trackers

    try:
        generator = sv.get_video_frames_generator(video_path)

        for frame in generator:
            frame_count += 1
            # Skip frames for testing
            if frame_count < FRAME_COUNT_LATE_START:
                continue
            # Early stop for testing
            if frame_count > FRAME_COUNT_EARLY_STOP + FRAME_COUNT_LATE_START:
                break
            
            print(f"Processing frame {frame_count}")

            # Run YOLO on frame
            results = model(frame)[0]

            # Convert results to supervision Detections
            detections = sv.Detections.from_ultralytics(results)

            # Update tracker with detections
            detections = byte_tracker.update_with_detections(detections)
            print("Tracker IDs this frame:", detections.tracker_id)

            # See if any trackers crossed the line
            crossed_in_flags, crossed_out_flags = line_zone.trigger(detections)

            ''' Additional training logic for grouping bounding boxes and saving crops '''
            # Find groups that contain any box overlapping the detection that crossed "in"
            xyxy_np = detections.xyxy.astype(float)
            groups = gbb.group_bounding_boxes(xyxy_np)  # Default, low threshold for now
            
            # Finding which groups correspond to crossed "in" boxes
            crossed_i = {i for i, crossed in enumerate(crossed_in_flags) if crossed}
            groups_to_save = []
            for gid, g in enumerate(groups):
                if any(idx in crossed_i for idx in g):
                    groups_to_save.append((gid, g))
                    
            # Merge and save the grouped boxes for each group
            for gid, g in groups_to_save:
                # Skip empty groups
                if not g:
                    continue
                # Get merged box and save crop
                merged_box = gbb.merge_group_bounding_box(xyxy_np, g)
                save_crop(frame, merged_box, frame_count, gid)
            
            # Only count new IDs that cross "in"
            for i, crossed in enumerate(crossed_in_flags):
                if crossed:
                    tracker_id = detections.tracker_id[i]
                    if tracker_id is not None and tracker_id not in all_counted_ids:
                        total_count += 1
                        all_counted_ids.add(tracker_id)
                        print(f"New Chick crossed the line! ID {tracker_id}, Total count: {total_count}")

            # Sensitivity for declaring a box as "nested"
            # e.g. 0.9 means inner must have at least 90% of its area inside outer
            NESTED_THRESHOLD = 0.9  

            contained_indices = set()
            boxes = detections.xyxy

            for i, outer in enumerate(boxes):
                x1o, y1o, x2o, y2o = outer
                outer_area = max(0, (x2o - x1o)) * max(0, (y2o - y1o))

                for j, inner in enumerate(boxes):
                    if i == j:
                        continue
                    x1i, y1i, x2i, y2i = inner
                    inner_area = max(0, (x2i - x1i)) * max(0, (y2i - y1i))

                    # Intersection box
                    inter_x1 = max(x1o, x1i)
                    inter_y1 = max(y1o, y1i)
                    inter_x2 = min(x2o, x2i)
                    inter_y2 = min(y2o, y2i)

                    inter_area = max(0, inter_x2 - inter_x1) * max(0, inter_y2 - inter_y1)

                    # Ratio of inner covered by outer
                    if inner_area > 0 and (inter_area / inner_area) >= NESTED_THRESHOLD:
                        contained_indices.add(j)


            # Assign labels + colors depending on nesting
            labels = []
            colors = []
            for i, tracker_id in enumerate(detections.tracker_id):
                if i in contained_indices:
                    labels.append(f"#{tracker_id} nested")
                    colors.append(sv.Color.RED)
                else:
                    labels.append(f"#{tracker_id} chick")
                    colors.append(sv.Color.GREEN)

            

    except Exception as e:
        # Detailed exception logging
        print("=== Exception while processing video frames ===")
        print("Time:", datetime.datetime.now().isoformat())
        print("Exception type:", type(e).__name__)
        print("Exception message:", str(e))
        print("Full traceback:")
        print(traceback.format_exc())

    finally:
        print(f"Processing complete. Processed {frame_count} frames.")
        print(f"Final total count: {total_count}")

if __name__ == "__main__":
    tk.Tk().withdraw()

    # Pick input video + output folder with file dialogs
    SOURCE_VIDEO_PATH = askopenfilename()
    print("User chose:", SOURCE_VIDEO_PATH)
    
    # Grab a frame to define the line
    cap = cv2.VideoCapture(SOURCE_VIDEO_PATH)
    ret, frame = cap.read()
    if not ret:
        print("Failed to read the video")
        exit()
    cap.release()
    
    line_points = get_line_from_video_frame(frame)
    
    chick_counting(SOURCE_VIDEO_PATH, line_points)
    
    print(f"Completed attempted processing of {FRAME_COUNT_EARLY_STOP} frames.")

User chose: ()


error: OpenCV(4.12.0) :-1: error: (-5:Bad argument) in function 'VideoCapture'
> Overload resolution failed:
>  - Expected 'filename' to be a str or path-like object
>  - VideoCapture() missing required argument 'apiPreference' (pos 2)
>  - Argument 'index' is required to be an integer
>  - VideoCapture() missing required argument 'apiPreference' (pos 2)
>  - VideoCapture() missing required argument 'apiPreference' (pos 2)


## Extracting features from collected (and hand-labeled) data

In [None]:
from typing import List

categories: List[int] = [0, 1, 2, 3, 4]  # Categories for different chick counts (ignoring 5 for now)
data: List[np.array] = []  # Will hold the extracted feature data
raw_data: List[tuple[int, np.array]] = []  # (category, raw image data)

# Loop through each category directory and extract features
for category in categories:
    path = os.path.join(SAVE_DIR, f"{str(category)}")
    # Each image in the category directory
    for img in os.listdir(path):
        # Load the image into CV2
        img_path = os.path.join(path, img)
        image = cv2.imread(img_path)
        # Skip if image failed to load
        if image is None:
            continue
        
        # Save the raw data for potential CNN use
        raw_data.append((category, image.copy()))
        
        # NOTE Blocker - How can we get the temperature data at this point?
        
        # Temporary Solution - Plain Normalization
        image_temp = (image - np.min(image)) / (np.max(image) - np.min(image))
        image_temp = image_temp.astype(np.float32)
        
        # Extract features using validate_bounding_box module
        image_features = vbb.get_box_features(image_temp)
        
        data.append((category, image_features))
        
# Save the extracted data for later model training
joblib.dump(data, "thermal_chick_counting_features.pkl")  # Features
joblib.dump(raw_data, "thermal_chick_counting_raw_data.pkl")  # Raw images (CNN)
print("Saved extracted features for chick counting model training.")

Saved extracted features for chick counting model training.


## Splitting into test/train

Also synthethically generating more data of minority samples to prevent the model from overpredicting counts of *1* (large skewedness)

In [4]:
# Loading in the features (if not already loaded)
features = joblib.load("thermal_chick_counting_features.pkl")

print("Loaded features for model training:", len(features), "samples.")

# Unpack into targets and features
y = np.array([label for label, _ in features], dtype=int)
X = np.array([feat for _, feat in features], dtype=np.float32)

# Splitting into training and test sets (20% test, stratified)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# Synthesizing more data for minority classes using SMOTE
smote = SMOTE(sampling_strategy='auto' ,random_state=42)
X_resampled, y_resampled = smote.fit_resample(X, y)
X_train_resampled, y_train_resampled = smote.fit_resample(X_train, y_train)

# Showing class distributions
counts = Counter(y)
counts_resampled = Counter(y_train_resampled)
n_total = len(y)
n_total_resampled = len(y_train_resampled)
print("\nUnsynthesized Class counts:", counts, "\nSynthesized Class counts:", (counts_resampled + Counter(y_test)), "\n")
print("Unsynthesized Class ratios:", {c: f"{counts[c]/n_total:.3f}" for c in counts})
print("Synthesized Class ratios:", {c: f"{counts_resampled[c]/n_total_resampled:.3f}" for c in counts_resampled})

Loaded features for model training: 438 samples.

Unsynthesized Class counts: Counter({np.int64(1): 319, np.int64(2): 84, np.int64(3): 27, np.int64(4): 8}) 
Synthesized Class counts: Counter({np.int64(1): 319, np.int64(2): 272, np.int64(3): 260, np.int64(4): 257}) 

Unsynthesized Class ratios: {np.int64(1): '0.728', np.int64(2): '0.192', np.int64(3): '0.062', np.int64(4): '0.018'}
Synthesized Class ratios: {np.int64(1): '0.250', np.int64(2): '0.250', np.int64(3): '0.250', np.int64(4): '0.250'}


## Training/Testing a basic SVM Model

Running a GridSearchCV to find the best cross-validated result.

In [None]:
# Pipeline
pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('svm', SVC(kernel='rbf', gamma='scale', probability=True, class_weight='balanced'))
])

# Stratified K-Fold for cross-validation
skf = StratifiedKFold(n_splits=3, shuffle=True, random_state=42)

# Parameter grid to test different C values
param_grid = {
    'svm__C': [0.01, 0.1, 1.0, 10.0, 100.0]  # Tweaked for optimal range (NOTE: c=100.0 could indicate overfitting)
}

# Grid search using the same StratifiedKFold
grid = GridSearchCV(
    estimator=pipeline,
    param_grid=param_grid,
    cv=skf,
    scoring='accuracy',
    n_jobs=-1,
    verbose=2,
    return_train_score=False
)

grid.fit(X_resampled, y_resampled)

# Print the best parameter set
print("Best params:", grid.best_params_)
print("Best cross-validation accuracy:", grid.best_score_)

# Show mean score per tested C
for mean, params in zip(grid.cv_results_['mean_test_score'], grid.cv_results_['params']):
    print(f"{params['svm__C']}: mean CV accuracy = {mean:.4f}")

# Evaluate best estimator with cross_val_score to confirm
best_est = grid.best_estimator_
best_scores = cross_val_score(best_est, X_resampled, y_resampled, cv=skf, scoring='accuracy', n_jobs=-1)
print("Re-evaluated CV scores for best estimator:", best_scores)
print("Re-evaluated mean accuracy:", np.mean(best_scores))

Fitting 3 folds for each of 5 candidates, totalling 15 fits
[CV] END ........................................svm__C=10.0; total time=   0.2s
[CV] END ........................................svm__C=10.0; total time=   0.2s
[CV] END .........................................svm__C=1.0; total time=   0.2s
[CV] END .........................................svm__C=1.0; total time=   0.2s
[CV] END ........................................svm__C=10.0; total time=   0.2s
[CV] END .........................................svm__C=1.0; total time=   0.2s
[CV] END .........................................svm__C=0.1; total time=   0.2s
[CV] END .........................................svm__C=0.1; total time=   0.3s
[CV] END .........................................svm__C=0.1; total time=   0.3s
[CV] END ........................................svm__C=0.01; total time=   0.3s
[CV] END ........................................svm__C=0.01; total time=   0.3s
[CV] END ........................................

In [None]:
''' Saving the trained model '''
joblib.dump(grid.best_estimator_, "thermal_chick_counting_svm_model.pkl")

['thermal_chick_counting_svm_model.pkl']

## Attempting a Random Forest Classifier

Testing a more complex model with more data. Running a GridSearchCV to optimize parameters for the best cross-validated result.

In [None]:
# Pipeline
rf_pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('randomforest', RandomForestClassifier(n_jobs=-1, random_state=42))  # Will fill in all parameters with gridsearch
])

# Stratified K-Fold for cross-validation
skf = StratifiedKFold(n_splits=4, shuffle=True, random_state=42)

# Parameter grid for Random Forest
rf_param_grid = {
    'randomforest__n_estimators': [50, 100, 200, 300, 400],
    'randomforest__max_depth': [4, 6, 8, 10, None],
    'randomforest__min_samples_split': [2, 4, 6, 8],
    'randomforest__min_samples_leaf': [1, 2, 4, 6],
    'randomforest__max_features': ['sqrt', 'log2', 0.5, 0.8],
}

# Grid search using the same StratifiedKFold
rf_grid = GridSearchCV(
    estimator=rf_pipeline,
    param_grid=rf_param_grid,
    cv=skf,
    scoring='accuracy',
    verbose=2,
    return_train_score=False
)
rf_grid.fit(X_resampled, y_resampled)

# Print the best parameter set
print("Best params:", rf_grid.best_params_)
print("Best cross-validation accuracy:", rf_grid.best_score_)

# Evaluate best estimator with cross_val_score to confirm
best_est = grid.best_estimator_
best_scores = cross_val_score(best_est, X_resampled, y_resampled, cv=skf, scoring='accuracy', n_jobs=-1)
print("Re-evaluated CV scores for best estimator:", best_scores)
print("Re-evaluated mean accuracy:", np.mean(best_scores))

Fitting 4 folds for each of 1600 candidates, totalling 6400 fits
[CV] END randomforest__max_depth=4, randomforest__max_features=sqrt, randomforest__min_samples_leaf=1, randomforest__min_samples_split=2, randomforest__n_estimators=50; total time=   0.2s
[CV] END randomforest__max_depth=4, randomforest__max_features=sqrt, randomforest__min_samples_leaf=1, randomforest__min_samples_split=2, randomforest__n_estimators=50; total time=   0.5s
[CV] END randomforest__max_depth=4, randomforest__max_features=sqrt, randomforest__min_samples_leaf=1, randomforest__min_samples_split=2, randomforest__n_estimators=50; total time=   0.2s
[CV] END randomforest__max_depth=4, randomforest__max_features=sqrt, randomforest__min_samples_leaf=1, randomforest__min_samples_split=2, randomforest__n_estimators=50; total time=   0.2s
[CV] END randomforest__max_depth=4, randomforest__max_features=sqrt, randomforest__min_samples_leaf=1, randomforest__min_samples_split=2, randomforest__n_estimators=100; total time=  

In [None]:
''' Saving the trained model '''
joblib.dump(rf_grid.best_estimator_, "thermal_chick_counting_rf_model.pkl")

print("Random Forest Best Params: ", rf_grid.best_params_)
print("Random Forest Best Score: ", rf_grid.best_score_)

In [7]:
# Fitting a model based on best params found earlier
OPTIMAL_RF_PARAMS = {
    'randomforest__max_depth': 10, 
    'randomforest__max_features': 0.8, 
    'randomforest__min_samples_leaf': 1, 
    'randomforest__min_samples_split': 4, 
    'randomforest__n_estimators': 100
}
rf_model_optimal = RandomForestClassifier(**{k.replace('randomforest__', ''): v for k, v in OPTIMAL_RF_PARAMS.items()})
rf_model_optimal.fit(X_resampled, y_resampled)
joblib.dump(rf_model_optimal, "thermal_chick_counting_rf_model_fit.pkl")

# Fitting the standard scaler for the pipeline
scaler = StandardScaler()
scaler.fit(X_resampled)
joblib.dump(scaler, "thermal_chick_counting_rf_scaler.pkl")

['thermal_chick_counting_rf_scaler.pkl']

## TODO: Attempting a basic CNN Model

Passing in all pixels of the bounding box image, just to test if there is potential with a CNN and the bounding box pixels as features. This can be done next week.