# Aerial Image Classification Demo

This notebook demonstrates how to load and use the pre-trained models for aerial image classification.
We provide two types of models:
1. **Classic Machine Learning**: Bag of Features (BoF) with different algorithms (Softmax, SVM, Decision Tree, Random Forest, NaÃ¯ve Bayes).
2. **Deep Learning**: A custom ResNet-based Convolutional Neural Network (CNN).

## Prerequisites
Ensure you have the necessary dependencies installed:
```bash
pip install torch torchvision scikit-learn opencv-python joblib huggingface_hub matplotlib
```

In [22]:
import os
import time
import joblib
import torch
import numpy as np
from PIL import Image
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader
from huggingface_hub import hf_hub_download
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import matplotlib.pyplot as plt

# Local imports
from classicML.visual_pipeline import *
from NeuralNets.model import PiattiCNN

## 1. Classic Machine Learning Model (BoVW)

In [23]:
# Download and load the Classic ML model from Hugging Face
classic_model_path = hf_hub_download(
    repo_id="JavideuS/aid-image-classification",
    filename="classicML/models/bovw_softmax.pkl"
)

bundle = joblib.load(classic_model_path)
pipeline = bundle['pipeline']
le = bundle['label_encoder']

print("Classic ML Model loaded successfully.")

Classic ML Model loaded successfully.


In [None]:
def predict_classic(image_path):
    """
    Predicts the class of an image using the Classic ML pipeline.
    """
    if not os.path.exists(image_path):
        print(f"Error: Image not found at {image_path}")
        return

    # The pipeline expects a list of paths
    pred_int = pipeline.predict([image_path])
    pred_label = le.inverse_transform(pred_int)

    # print(f"[Classic ML] Prediction for {os.path.basename(image_path)}: {pred_label[0]}")
    return pred_label[0]

# Example Usage
# predict_classic("path/to/your/image.jpg")

## 2. Deep Learning Model (CNN)

In [25]:
# Download and load the CNN model from Hugging Face
cnn_model_path = hf_hub_download(
    repo_id="JavideuS/aid-image-classification",
    filename="neuralNet/models/PiattiVL_v0.69.pth"
)

checkpoints = torch.load(cnn_model_path, map_location='cpu')
cnn_model = PiattiCNN(num_classes=checkpoints['num_classes'])
cnn_model.load_state_dict(checkpoints['model_state_dict'])
cnn_model.eval()

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
cnn_model.to(device)

print(f"CNN Model loaded successfully on {device}.")

CNN Model loaded successfully on cuda.


In [None]:
# Define the preprocessing transform (must match training)
# Funny enough this transformation performs better than tes
eval_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

def predict_cnn(image_path):
    """
    Predicts the class of an image using the CNN model.
    """
    if not os.path.exists(image_path):
        print(f"Error: Image not found at {image_path}")
        return

    # Load and preprocess image
    image = Image.open(image_path).convert('RGB')
    image_tensor = eval_transform(image).unsqueeze(0).to(device)

    with torch.no_grad():
        outputs = cnn_model(image_tensor)
        _, predicted = torch.max(outputs, 1)

    # Assuming we have the class names from the dataset or label encoder
    # For now, we reuse the label encoder from the classic model if available,
    # or just print the index if not.
    try:
        class_name = le.inverse_transform([predicted.item()])[0]
    except NameError:
        class_name = str(predicted.item())

    # print(f"[CNN] Prediction for {os.path.basename(image_path)}: {class_name}")
    return class_name

# Example Usage
# predict_cnn("path/to/your/image.jpg")

## 3. Comparative Evaluation

In this section, we load the raw dataset, split it into training and testing sets, and evaluate both models on the test set to compare their **Accuracy** and **Inference Time**.

In [None]:
# 1. Load Dataset Paths
image_paths =  []
labels = []
# If you don't have the dataset, please run python download_dataset.py
source_dir = 'data/raw'

if os.path.exists(source_dir):
    categories = [d for d in os.listdir(source_dir)
                  if os.path.isdir(os.path.join(source_dir, d))]
    for category in categories:
        category_path = os.path.join(source_dir, category)
        files = [os.path.join(category_path, f) for f in os.listdir(category_path)
                 if os.path.isfile(os.path.join(category_path, f))]
        image_paths.extend(files)
        labels.extend([category] * len(files))

    print(f"Found {len(image_paths)} images in {len(categories)} categories")

    # 2. Encode Labels
    labels_encoded = le.transform(labels)

    # 3. Split Data
    X_train, X_test, y_train, y_test = train_test_split(image_paths, labels_encoded, test_size=0.2, random_state=0, stratify=labels)
    print(f"Test Set Size: {len(X_test)} images")
else:
    print(f"Warning: '{source_dir}' directory not found. Please ensure the dataset is downloaded and extracted there to run the comparison.")
    X_test = []

Found 10000 images in 30 categories
Test Set Size: 2000 images


In [28]:
if len(X_test) > 0:
    # --- Classic ML Evaluation ---
    print(f"Evaluating Classic ML on {len(X_test)} images...")
    t0 = time.time()
    y_pred_classic = pipeline.predict(X_test)
    t_classic = time.time() - t0
    acc_classic = accuracy_score(y_test, y_pred_classic)

    # --- CNN Evaluation ---
    class SimpleDataset(Dataset):
        def __init__(self, paths, labels, transform):
            self.paths = paths
            self.labels = labels
            self.transform = transform
        def __len__(self): return len(self.paths)
        def __getitem__(self, idx):
            img = Image.open(self.paths[idx]).convert('RGB')
            return self.transform(img), self.labels[idx]

    test_dataset = SimpleDataset(X_test, y_test, eval_transform)
    test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False, num_workers=0)

    print(f"Evaluating CNN on {len(X_test)} images...")
    cnn_preds = []
    cnn_targets = []
    t0 = time.time()
    with torch.no_grad():
        for images, labels in test_loader:
            images = images.to(device)
            outputs = cnn_model(images)
            _, predicted = torch.max(outputs, 1)
            cnn_preds.extend(predicted.cpu().numpy())
            cnn_targets.extend(labels.numpy())
    t_cnn = time.time() - t0
    acc_cnn = accuracy_score(cnn_targets, cnn_preds)

    # --- Results ---
    print("\n" + "="*45)
    print(f"{'Model':<15} | {'Accuracy':<10} | {'Time (s)':<10}")
    print("-" * 45)
    print(f"{'Classic ML':<15} | {acc_classic:.4f}     | {t_classic:.2f}")
    print(f"{'CNN':<15} | {acc_cnn:.4f}     | {t_cnn:.2f}")
    print("="*45)
else:
    print("Skipping evaluation (no data found).")

Evaluating Classic ML on 2000 images...
Evaluating CNN on 2000 images...

Model           | Accuracy   | Time (s)  
---------------------------------------------
Classic ML      | 0.6330     | 97.00
CNN             | 0.9580     | 17.36
