In [7]:
import os
import numpy as np
from PIL import Image
import shap

from sklearn.svm import OneClassSVM
from sklearn.model_selection import train_test_split

In [8]:
def load_images_recursive(root_dir, label, image_size=(128, 128)):
    """
    Recursively loads images from a directory tree, converts them to grayscale,
    resizes, and flattens them into a NumPy array.

    Parameters:
        root_dir (str): The root directory containing subdirectories with images.
        label (int): The label to assign to each image (e.g., 1 for animal).
        image_size (tuple): Desired image size as (width, height).

    Returns:
        tuple: (X, y) where X is a NumPy array of shape (N, width*height) containing
               the flattened images and y is an array of labels.
    """
    data = []
    labels = []
    for dirpath, dirnames, filenames in os.walk(root_dir):
        for fname in filenames:
            fpath = os.path.join(dirpath, fname)
            try:
                img = Image.open(fpath).convert("L").resize(image_size)
                data.append(np.array(img).flatten())
                labels.append(label)
            except Exception as e:
                print(f"Skipping file {fpath} due to error: {e}")
                continue
    return np.array(data), np.array(labels)

# One-Class SVM for Animal Detection (When Only Animal Images are Available)

This notebook demonstrates how to detect if an image contains an animal, **without** having any non-animal training images. We use a **One-Class SVM**, which learns what “normal” (in this case, *animal*) looks like, and flags anything else as an outlier.

## Key Points
1. **No non-animal data**: We cannot do a standard supervised (binary) classification. We only have *animal* images.
2. **One-Class SVM**: This approach allows us to treat animal images as our "normal" class. At prediction time, if a new image deviates significantly from what the model learned, it's flagged as "Non-Animal."
3. **Hyperparameter Tuning**: We do a simple grid search over parameters like `nu` and `gamma` to find a good fit for our animal data.
4. **Same Function Names**: The required functions are `load_images`, `train_animal_classifier`, and `predict_image`.


## Setup & Imports
We will use:
- `PIL` (via `Pillow`) to handle image loading and resizing.
- `numpy` for numerical arrays.
- `sklearn.svm.OneClassSVM` for our one-class model.
- `train_test_split` to create a validation set from our animal images.


In [9]:
def train_animal_classifier(animal_path, image_size=(128, 128)):
    """
    Trains a One-Class SVM using only animal images. This function expects that
    animal_path is a root folder that contains subfolders of images. No non-animal
    data is used.

    The process includes:
      1. Recursively loading animal images.
      2. Splitting the data into training (80%) and validation (20%) sets.
      3. A manual grid search over a small set of hyperparameters for OneClassSVM.
      4. Returning the best model (based on minimal outlier rate on the validation set)
         along with the training data for later use as background data in explainability.

    Parameters:
        animal_path (str): Path to the root folder of animal images (folder-of-folders).
        image_size (tuple): Desired image size as (width, height).

    Returns:
        tuple: (best_model, X_train)
    """
    # Load images recursively from the animal training folder.
    X, _ = load_images_recursive(animal_path, label=1, image_size=image_size)
    if len(X) == 0:
        raise ValueError(f"No images found in '{animal_path}'. Cannot train the model.")
    print(f"Loaded {len(X)} animal images.")

    # Split the data into training and validation sets.
    X_train, X_val = train_test_split(X, test_size=0.2, random_state=42)
    print(f"Training set size: {len(X_train)} images")

    # Define a grid of hyperparameters for OneClassSVM.
    param_grid = {
        "nu":    [0.001, 0.01, 0.1],   # Controls fraction of outliers.
        "gamma": ["scale", 1e-3, 1e-4]   # Kernel coefficient for RBF.
    }

    best_model = None
    best_outlier_rate = float("inf")
    best_params = None

    # Perform a manual grid search over hyperparameters.
    for nu_val in param_grid["nu"]:
        for gamma_val in param_grid["gamma"]:
            print(f"Training One-Class SVM with nu={nu_val}, gamma={gamma_val}...", end="/r")
            model = OneClassSVM(kernel="rbf", nu=nu_val, gamma=gamma_val)
            model.fit(X_train)

            # Predict on the validation set: +1 for inliers, -1 for outliers.
            val_preds = model.predict(X_val)
            outlier_count = np.sum(val_preds == -1)
            outlier_rate = outlier_count / len(X_val)

            if outlier_rate < best_outlier_rate:
                best_outlier_rate = outlier_rate
                best_model = model
                best_params = (nu_val, gamma_val)

    print(f"Best One-Class SVM params: nu={best_params[0]}, gamma={best_params[1]}")
    print(f"Validation outlier rate on animal data: {best_outlier_rate:.2%}")

    # Return the best model and training data for later explainability.
    return best_model, X_train

def predict_image(file_path, model, image_size=(64, 64)):
    """
    Predicts whether an image is 'Animal' or 'Non-Animal' based on the trained OneClassSVM.

    Parameters:
        file_path (str): Path to the image file.
        model (OneClassSVM): Trained OneClassSVM model.
        image_size (tuple): Desired image size as (width, height).

    Returns:
        str: "Animal" if the model predicts +1, otherwise "Non-Animal".
    """
    img = Image.open(file_path).convert("L").resize(image_size)
    arr = np.array(img).flatten().reshape(1, -1)
    prediction = model.predict(arr)[0]
    return "Animal" if prediction == 1 else "Non-Animal"


## Example Usage
Assume you have a folder structure like this:
```
data/
  animals/
    animal1.jpg
    animal2.jpg
    ...
```

## How It Works
1. **Load & Preprocess**: Converts images to grayscale, resizes them to 64×64, and flattens them into 1D arrays.
2. **Train/Validation Split**: Splits the available *animal* images into train (80%) and validation (20%).
3. **One-Class SVM**: The model learns a decision boundary around your *animal* data. Anything that deviates significantly is labeled as "-1" (outlier).
4. **Hyperparameter Tuning**: We do a simple loop over a small grid of `(nu, gamma)` values:
   - `nu` controls the fraction of outliers allowed in the training set.
   - `gamma` is the kernel coefficient for the RBF kernel.
   - We pick the combination that yields the **fewest** outliers on the validation set.
5. **Prediction**: `predict_image` loads a new image and runs the trained One-Class SVM. The result is either +1 ("Animal") or -1 ("Non-Animal").

> **Important**: Because we have no real non-animal data for training or testing, we cannot measure the false positive rate (i.e., how often it labels a non-animal as "Animal").

In [10]:
def explain_image_prediction(file_path, model, background_data, image_size=(128, 128), ns=50):
    """
    Generates a SHAP explanation for a single image prediction made by the OneClassSVM.
    It explains the decision function (a score indicating inlier likelihood) for the given image.

    Parameters:
        file_path (str): Path to the image to be explained.
        model (OneClassSVM): Trained OneClassSVM model.
        background_data (np.array): A 2D NumPy array of background images (flattened)
                                    to be used as a reference for the SHAP explainer.
        image_size (tuple): Desired image size as (width, height).

    Returns:
        tuple: (shap_values, expected_value) for the image.
    """
    img = Image.open(file_path).convert("L").resize(image_size)
    instance = np.array(img).flatten().reshape(1, -1)

    def model_predict(X):
        return model.decision_function(X)

    # Create a KernelExplainer using the provided background data.
    explainer = shap.KernelExplainer(model_predict, background_data)
    shap_values = explainer.shap_values(instance, nsamples=ns)
    expected_value = explainer.expected_value

    return shap_values, expected_value


In [11]:
animal_images_dir = r"C:\Users\rf4thyrvm\Documents\CritterSnap\data\example\as_conservationistFrankfurt\IE_Forest_County_Wicklow_21_loc_01-20241031T145429Z-001" 

In [12]:
model, X_train = train_animal_classifier(animal_images_dir, image_size=(64, 64))

KeyboardInterrupt: 

In [None]:
# test_image_path = r"C:\Users\rf4thyrvm\Documents\CritterSnap\data\example\ds_researchATU\IMG_0197.JPG"
test_image_path = r"C:\Users\rf4thyrvm\Documents\CritterSnap\data\example\ds_researchATU\IMG_0001.JPG"
prediction = predict_image(test_image_path, model, image_size=(64, 64))
print(f"Prediction for {test_image_path}: {prediction}")