In [10]:
import numpy as np 
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path
import cv2
import os
from collections import defaultdict
import time

from skimage.feature import hog
from skimage import exposure

from sklearn.svm import SVC, LinearSVC
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import (
    classification_report, 
    confusion_matrix,
    precision_recall_fscore_support,
    accuracy_score,
    roc_auc_score
)

from tqdm.notebook import tqdm

#### Data  path configuration 

In [11]:

base_dir = Path('../data')
train_dir = base_dir / 'train'
test_dir = base_dir / 'test'
validation_dir = base_dir / 'valid'

classes = [ "Ants",
    "Bees",
    "Beetles",
    "Caterpillars",
    "Earthworms",
    "Earwigs",
    "Grasshoppers",
    "Moths",
    "Slugs",
    "Snails",
    "Wasps",
    "Weevils"]
 
num_classes = len(classes)

#### Data Preprocessing

In [12]:
def parse_yolo_label(label_path):
    boxes = []
    
    if not os.path.exists(label_path):
        return boxes
    
    with open(label_path, 'r') as f:
        for line in f:
            line = line.strip()
            if line:
                parts = line.split()
                class_id = int(parts[0])
                x_center = float(parts[1])
                y_center = float(parts[2])
                width = float(parts[3])
                height = float(parts[4])
                boxes.append([class_id, x_center, y_center, width, height])
    
    return boxes

def yolo_to_bbox(x_center, y_center, width, height, img_width, img_height):
    x_center_abs = x_center * img_width
    y_center_abs = y_center * img_height
    width_abs = width * img_width
    height_abs = height * img_height
    
    x_min = int(x_center_abs - width_abs / 2)
    y_min = int(y_center_abs - height_abs / 2) 
    x_max = int(x_center_abs + width_abs / 2)
    y_max = int(y_center_abs + height_abs / 2)
    
    return (x_min, y_min, x_max, y_max)


def crop_object_from_image(image, bbox):
    x_min, y_min, x_max, y_max = bbox
    
    h, w = image.shape[:2]
    x_min = max(0, x_min)
    y_min = max(0, y_min)
    x_max = min(w, x_max)
    y_max = min(h, y_max)
    
    cropped = image[y_min:y_max, x_min:x_max]
    
    if cropped.shape[0] < 10 or cropped.shape[1] < 10:
        return None
    
    return cropped

def load_yolo_dataset(data_dir, max_samples=None, use_full_image=False):
    images_dir = data_dir / 'images'
    labels_dir = data_dir / 'labels'
    
    print(f"Loading data from {data_dir}...")
    
    images = []
    labels = []
    bboxes = []
    
    image_files = sorted(list(images_dir.glob('*.jpg')))
    
    if max_samples:
        image_files = image_files[:max_samples]
    
    for img_path in image_files:
        img = cv2.imread(str(img_path))
        if img is None:
            continue
        
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        img_height, img_width = img.shape[:2]
           
        label_path = labels_dir / (img_path.stem + '.txt')
        
        if not label_path.exists():
            continue
        
        boxes = parse_yolo_label(label_path)
        
        if len(boxes) == 0:
            continue
        
        for box in boxes:
            class_id, x_c, y_c, w, h = box
            
            if class_id >= num_classes:
                continue  
            
            bbox = yolo_to_bbox(x_c, y_c, w, h, img_width, img_height)
            
            if use_full_image:
                cropped = img
            else:
                cropped = crop_object_from_image(img, bbox)
                
                if cropped is None:
                    continue
            
            images.append(cropped)
            labels.append(classes[class_id])
            bboxes.append(bbox)
    
    print(f"Loaded {len(images)} objects from {len(image_files)} images")
    
    return images, labels, bboxes


#### HOG Feature Extraction

In [14]:


class HOGFeatureExtractor:
    def __init__(self, image_size=(128, 128), orientations=9, pixels_per_cell=(8, 8), cells_per_block=(2, 2)):
        self.image_size = image_size
        self.hog_params = {
            'orientations': orientations,
            'pixels_per_cell': pixels_per_cell,
            'cells_per_block': cells_per_block,
            'block_norm': 'L2-Hys',
            'channel_axis': None
        }

    def preprocess_image(self, image):
        if len(image.shape) ==3:
            gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
        else:
            gray = image.copy()

        resized = cv2.resize(gray, self.image_size)
        return resized
    
    def extract_features(self, images):
        single_image = False
        if not isinstance(images, list):
            images = [images]
            single_image = True 
        
        features_list = []
        iterator = tqdm(images, desc="Extracting HOG")

        for img in iterator:
            preprocessed = self.preprocess_image(img)
            feature = hog(preprocessed, visualize=False, **self.hog_params)
            features_list.append(feature)
        
        features = np.array(features_list)

        return features[0] if single_image else features

    def visualize(self, image):
        preprocessed = self.preprocess_image(image)
        
        hog_params_viz = self.hog_params.copy()
        
        features, hog_image = hog(
            preprocessed, 
            visualize=True, 
            **hog_params_viz
        )
        
        hog_image = exposure.rescale_intensity(hog_image, in_range=(0, 10))
        
        return features, hog_image, preprocessed

#### SVM Classifier


In [15]:

class SVMClassifier:
    def __init__(self, kernel='rbf', C=1.0, gamma='scale'):
        self.kernel = kernel
        self.C = C
        self.gamma = gamma
        
        self.model = None
        self.scaler = StandardScaler()
        self.label_encoder = LabelEncoder()
        
        self.training_time = None
        self.testing_time = None

    def train(self, X_train, y_train):
        print(f"Training SVM (kernel={self.kernel}, C={self.C})...")
        print(f"Training samples: {len(X_train)}")

        start_time = time.time()

        # Feature scaling (fit on training data)
        X_train_scaled = self.scaler.fit_transform(X_train)
        
        # Label encoding
        y_train_encoded = self.label_encoder.fit_transform(y_train)
        
        # Check class distribution
        unique, counts = np.unique(y_train_encoded, return_counts=True)
        print(f"   Classes: {len(unique)}, Distribution: {dict(zip(unique, counts))}")
        
        # SVM training
        self.model = SVC(
            kernel=self.kernel,
            C=self.C,
            gamma=self.gamma,
            probability=True,  
            random_state=42,
            verbose=True
        )

        self.model.fit(X_train_scaled, y_train_encoded)
        
        self.training_time = time.time() - start_time
        self.is_trained = True
        
        print(f"Training completed in {self.training_time:.2f} seconds")
        print(f"Support vectors: {self.model.n_support_}")

    def predict(self, X_test):
        X_test_scaled = self.scaler.transform(X_test)
        y_pred_encoded = self.model.predict(X_test_scaled)
        return self.label_encoder.inverse_transform(y_pred_encoded)
    
    def predict_proba(self, X_test):
        X_test_scaled = self.scaler.transform(X_test)
        return self.model.predict_proba(X_test_scaled)

    def evaluate(self, X_test, y_test, dataset_name='Test'):
        print(f"Evaluating on {dataset_name} set...")
        
        start_time = time.time()

        y_pred = self.predict(X_test)
        y_test_encoded = self.label_encoder.transform(y_test)
        y_pred_encoded = self.label_encoder.transform(y_pred)

        pass #do it later.. (nov 4)




#### Sliding Window Detector

In [None]:
class SlidingWindowDetector:
    def __init__(self, classifier, feature_extractor, scales=[0.5, 0.75, 1.0, 1.25, 1.5], step_size=32, confidence_threshold=0.5, nms_threshold=0.3):
        self.classifier = classifier
        self.feature_extractor = feature_extractor
        self.scales = scales
        self.step_size = step_size
        self.confidence_threshold = confidence_threshold
        self.nms_threshold = nms_threshold

        (W, H) = self.feature_extractor.image_size
        self.window_size = (H, W)

    def sliding_window(self, image, window_size, step_size):
        h, w = image.shape[:2]
        win_h, win_w = window_size

        for y in range(0, h - win_h + 1, step_size):
            for x in range(0, w - win_w + 1, step_size):
                window = image[y:y+win_h, x:x+win_w]
                yield (x, y, window)

    def compute_iou(self, box1, box2):
        x1_1, y1_1, x2_1, y2_1 = box1
        x1_2, y1_2, x2_2, y2_2 = box2
                
        x1_i = max(x1_1, x1_2)
        y1_i = max(y1_1, y1_2)
        x2_i = min(x2_1, x2_2)
        y2_i = min(y2_1, y2_2)

        iw = max(0, x2_i - x1_i)
        ih = max(0, y2_i - y1_i)
        intersection = iw * ih

        area1 = (x2_1 - x1_1) * (y2_1 - y1_1)
        area2 = (x2_2 - x1_2) * (y2_2 - y1_2)
        union = area1 + area2 - intersection

        return intersection / union if union > 0 else 0

    def non_max_suppression(self, detections):
        if len(detections) == 0:
            return []

        detections = sorted(detections, key=lambda x: x[5], reverse=True)

        keep = []
        while len(detections) > 0:
            current = detections[0]
            keep.append(current)

            detections = detections[1:]
            filtered =[]

            for detection in detections:
                iou = self.compute_iou(current[0:4], detection[0:4])
                if iou < self.nms_threshold:
                    filtered.append(detection)

            detections = filtered

        return keep

    def detect(self, image):
        detections = []
        
        (H_win, W_win) = self.window_size

        iterator = tqdm(self.scales, desc="Detecting")
        for scale in iterator:
            h, w = image.shape[0:2]
            new_h, new_w = int(h * scale), int(w * scale)
            resized = cv2.resize(image, (new_w, new_h))

            if new_h < H_win or new_w < W_win:
                continue

            windows_list = list(self.sliding_window(resized, self.window_size, self.step_size))
    
            windows_images = [win for (_,_,win) in windows_list]

            if len(windows_images) == 0:
                continue
            
            # Extract HOG features (batch)
            features = self.feature_extractor.extract_features(windows_images)

            # Predict (batch)
            predictions = self.classifier.predict(features)
            confidences = self.classifier.predict_proba(features)

            for (x,y, _), pred, conf in zip(windows_list, predictions, confidences):
                for class_idx, conf_val in enumerate(conf):
                    if conf_val >= self.confidence_threshold:
                        scale_factor = 1.0 / scale
                        x1 = int(x * scale_factor)
                        y1 = int(y * scale_factor)
                        x2 = int((x + W_win) * scale_factor)
                        y2 = int((y + H_win) * scale_factor)

                        class_name = self.classifier.label_encoder.inverse_transform([class_idx])[0]
                        detections.append([x1, y1, x2, y2, class_name, conf_val])
                
        if len(detections) > 0:
            detections = self.non_max_suppression(detections)

        return detections

        
    def visualize_detections(self, image, detections, save_path=None):

        import matplotlib.pyplot as plt
        import matplotlib.patches as patches
        
        fig, ax = plt.subplots(1, figsize=(12, 8))
        ax.imshow(image)
        
        colors = plt.cm.tab10(np.linspace(0, 1, len(classes)))
        class_to_color = {cls: colors[i] for i, cls in enumerate(classes)}
        
        for (x1, y1, x2, y2, class_name, confidence) in detections:
            rect = patches.Rectangle(
                (x1, y1), x2-x1, y2-y1,
                linewidth=2,
                edgecolor=class_to_color.get(class_name, 'red'),
                facecolor='none'
            )
            ax.add_patch(rect)
            

            label = f"{class_name}: {confidence:.2f}"
            ax.text(
                x1, y1-5,
                label,
                bbox=dict(facecolor=class_to_color.get(class_name, 'red'), alpha=0.7),
                fontsize=10,
                color='white',
                weight='bold'
            )
        
        ax.axis('off')
        plt.tight_layout()
        
        if save_path:
            plt.savefig(save_path, dpi=150, bbox_inches='tight')
        
        plt.show()
        
        print(f"Found {len(detections)} objects")

        