# Hyperspectral Face Recognition Model

This notebook implements a CNN-based hyperspectral face recognition model that extracts facial embeddings for authentication purposes.

## Dataset Structure
This implementation is designed for the UWA HSFD dataset structure:
```
dataset/
├── subject_001/
│   ├── image_001.npy (or .mat)
│   └── image_002.npy
└── subject_002/
    └── ...
```

In [None]:
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, models
from tensorflow.keras.applications import ResNet50
from sklearn.model_selection import train_test_split
import os
import pickle
import matplotlib.pyplot as plt

print(f"TensorFlow version: {tf.__version__}")
print(f"GPU Available: {tf.config.list_physical_devices('GPU')}")

## 1. Data Loading and Preprocessing

In [None]:
def load_hyperspectral_data(data_path, img_size=(128, 128), num_channels=33):
    """
    Load hyperspectral face data from the dataset directory.
    
    Args:
        data_path: Path to the dataset directory
        img_size: Target image size (height, width)
        num_channels: Number of spectral channels (default 33 for UWA HSFD)
    
    Returns:
        X: Array of hyperspectral images
        y: Array of labels
        subject_names: List of subject identifiers
    """
    X = []
    y = []
    subject_names = []
    
    # For demonstration, create synthetic data if path doesn't exist
    if not os.path.exists(data_path):
        print(f"Warning: Path {data_path} not found. Creating synthetic data for demonstration.")
        # Create synthetic data: 10 subjects with 5 images each
        num_subjects = 10
        images_per_subject = 5
        
        for subject_id in range(num_subjects):
            subject_name = f"subject_{subject_id:03d}"
            subject_names.append(subject_name)
            
            for img_id in range(images_per_subject):
                # Generate synthetic hyperspectral data
                img = np.random.randn(*img_size, num_channels).astype(np.float32)
                # Add some structure to make it more realistic
                img += np.random.randn(num_channels) * 0.5
                X.append(img)
                y.append(subject_id)
        
        return np.array(X), np.array(y), subject_names
    
    # Load real data
    subject_dirs = sorted([d for d in os.listdir(data_path) 
                          if os.path.isdir(os.path.join(data_path, d))])
    
    for subject_id, subject_dir in enumerate(subject_dirs):
        subject_names.append(subject_dir)
        subject_path = os.path.join(data_path, subject_dir)
        
        image_files = [f for f in os.listdir(subject_path) 
                      if f.endswith('.npy') or f.endswith('.mat')]
        
        for img_file in image_files:
            img_path = os.path.join(subject_path, img_file)
            
            if img_file.endswith('.npy'):
                img = np.load(img_path)
            elif img_file.endswith('.mat'):
                from scipy.io import loadmat
                data = loadmat(img_path)
                # Adjust key based on your .mat file structure
                img = data['hyperspectral_image']
            
            # Resize if needed
            if img.shape[:2] != img_size:
                from scipy.ndimage import zoom
                zoom_factors = (img_size[0]/img.shape[0], 
                              img_size[1]/img.shape[1], 1)
                img = zoom(img, zoom_factors, order=1)
            
            # Normalize
            img = (img - img.mean()) / (img.std() + 1e-7)
            
            X.append(img)
            y.append(subject_id)
    
    return np.array(X), np.array(y), subject_names

# Load data (using synthetic data for demonstration)
DATA_PATH = "./UWA_HSFD_dataset"  # Update this path for real data
X, y, subject_names = load_hyperspectral_data(DATA_PATH)

print(f"Loaded {len(X)} images from {len(subject_names)} subjects")
print(f"Image shape: {X[0].shape}")
print(f"Number of classes: {len(np.unique(y))}")

## 2. Build the Hyperspectral Face Recognition Model

We'll create a CNN-based model with an embedding layer for feature extraction.

In [None]:
def build_hyperspectral_model(input_shape=(128, 128, 33), embedding_dim=128, num_classes=10):
    """
    Build a CNN model for hyperspectral face recognition.
    
    Args:
        input_shape: Shape of input hyperspectral images
        embedding_dim: Dimension of the embedding vector
        num_classes: Number of subjects/classes for training
    
    Returns:
        model: Complete model for training
        embedding_model: Model that outputs embeddings only
    """
    inputs = layers.Input(shape=input_shape)
    
    # Convolutional blocks for feature extraction
    x = layers.Conv2D(32, (3, 3), activation='relu', padding='same')(inputs)
    x = layers.BatchNormalization()(x)
    x = layers.MaxPooling2D((2, 2))(x)
    x = layers.Dropout(0.25)(x)
    
    x = layers.Conv2D(64, (3, 3), activation='relu', padding='same')(x)
    x = layers.BatchNormalization()(x)
    x = layers.MaxPooling2D((2, 2))(x)
    x = layers.Dropout(0.25)(x)
    
    x = layers.Conv2D(128, (3, 3), activation='relu', padding='same')(x)
    x = layers.BatchNormalization()(x)
    x = layers.MaxPooling2D((2, 2))(x)
    x = layers.Dropout(0.25)(x)
    
    x = layers.Conv2D(256, (3, 3), activation='relu', padding='same')(x)
    x = layers.BatchNormalization()(x)
    x = layers.MaxPooling2D((2, 2))(x)
    x = layers.Dropout(0.25)(x)
    
    # Flatten and create embedding
    x = layers.Flatten()(x)
    x = layers.Dense(512, activation='relu')(x)
    x = layers.BatchNormalization()(x)
    x = layers.Dropout(0.5)(x)
    
    # Embedding layer (L2 normalized for better similarity computation)
    embeddings = layers.Dense(embedding_dim, activation=None, name='embeddings')(x)
    embeddings_norm = layers.Lambda(lambda x: tf.nn.l2_normalize(x, axis=1), 
                                    name='embeddings_norm')(embeddings)
    
    # Classification head for training
    outputs = layers.Dense(num_classes, activation='softmax', name='classification')(embeddings_norm)
    
    # Full model for training
    model = models.Model(inputs=inputs, outputs=outputs, name='hyperspectral_face_model')
    
    # Embedding model for inference
    embedding_model = models.Model(inputs=inputs, outputs=embeddings_norm, 
                                   name='embedding_model')
    
    return model, embedding_model

# Build model
IMG_SIZE = (128, 128)
NUM_CHANNELS = 33
EMBEDDING_DIM = 128
NUM_CLASSES = len(np.unique(y))

model, embedding_model = build_hyperspectral_model(
    input_shape=(*IMG_SIZE, NUM_CHANNELS),
    embedding_dim=EMBEDDING_DIM,
    num_classes=NUM_CLASSES
)

model.summary()

## 3. Train the Model

In [None]:
# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, 
                                                      stratify=y, random_state=42)

print(f"Training samples: {len(X_train)}")
print(f"Test samples: {len(X_test)}")

# Compile model
model.compile(
    optimizer=keras.optimizers.Adam(learning_rate=0.001),
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
)

# Callbacks
callbacks = [
    keras.callbacks.EarlyStopping(patience=10, restore_best_weights=True),
    keras.callbacks.ReduceLROnPlateau(factor=0.5, patience=5, min_lr=1e-7)
]

# Train model
history = model.fit(
    X_train, y_train,
    validation_data=(X_test, y_test),
    epochs=50,
    batch_size=16,
    callbacks=callbacks,
    verbose=1
)

## 4. Evaluate the Model

In [None]:
# Plot training history
fig, axes = plt.subplots(1, 2, figsize=(12, 4))

axes[0].plot(history.history['loss'], label='Train Loss')
axes[0].plot(history.history['val_loss'], label='Val Loss')
axes[0].set_xlabel('Epoch')
axes[0].set_ylabel('Loss')
axes[0].set_title('Training and Validation Loss')
axes[0].legend()
axes[0].grid(True)

axes[1].plot(history.history['accuracy'], label='Train Acc')
axes[1].plot(history.history['val_accuracy'], label='Val Acc')
axes[1].set_xlabel('Epoch')
axes[1].set_ylabel('Accuracy')
axes[1].set_title('Training and Validation Accuracy')
axes[1].legend()
axes[1].grid(True)

plt.tight_layout()
plt.savefig('training_history.png', dpi=150, bbox_inches='tight')
plt.show()

# Evaluate on test set
test_loss, test_acc = model.evaluate(X_test, y_test, verbose=0)
print(f"\nTest Accuracy: {test_acc:.4f}")
print(f"Test Loss: {test_loss:.4f}")

## 5. Save the Model

We save both the full model and the embedding model for use in the authentication system.

In [None]:
# Save models
model.save('hyperspectral_face_model.h5')
embedding_model.save('hyperspectral_embedding_model.h5')

# Save metadata
metadata = {
    'img_size': IMG_SIZE,
    'num_channels': NUM_CHANNELS,
    'embedding_dim': EMBEDDING_DIM,
    'num_classes': NUM_CLASSES,
    'subject_names': subject_names,
    'test_accuracy': test_acc
}

with open('model_metadata.pkl', 'wb') as f:
    pickle.dump(metadata, f)

print("Models saved successfully!")
print(f"  - hyperspectral_face_model.h5")
print(f"  - hyperspectral_embedding_model.h5")
print(f"  - model_metadata.pkl")

## 6. Test Embedding Extraction

Verify that the embedding model works correctly.

In [None]:
# Test embedding extraction
sample_embeddings = embedding_model.predict(X_test[:5])

print(f"Embedding shape: {sample_embeddings.shape}")
print(f"Sample embedding (first 10 values): {sample_embeddings[0][:10]}")

# Verify embeddings are L2 normalized
norms = np.linalg.norm(sample_embeddings, axis=1)
print(f"\nEmbedding norms (should be ~1.0): {norms}")

# Compute pairwise similarities
from sklearn.metrics.pairwise import cosine_similarity

similarities = cosine_similarity(sample_embeddings)
print(f"\nPairwise cosine similarities:")
print(similarities)

print("\n✓ Embedding model is ready for authentication system!")