# 04. Convolutional Autoencoder (Unsupervised)

## Introduction
This notebook implements a Convolutional Autoencoder (CAE) for unsupervised anomaly detection.
The model is trained ONLY on normal images to learn to reconstruct them.
Anomalies are detected by high reconstruction error.

## Setup

In [None]:
import os
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras import layers, models

tf.random.set_seed(42)
np.random.seed(42)

## 1. Data Loading
For the Autoencoder, we train ONLY on the 'train/good' folder.

In [None]:
import sys
sys.path.append('..')
from src.preprocessing.dataset import load_and_split_data

IMG_SIZE = (256, 256)
BATCH_SIZE = 16
DATA_DIR = "../data/raw"
TARGET_CATEGORY = 'bottle' # Train only on this category

# Load data
print(f"Loading and splitting data for {TARGET_CATEGORY}...")
train_df, val_df, test_df, class_names = load_and_split_data(DATA_DIR, target_category=TARGET_CATEGORY, augment=True)

# Filter for normal samples only for Autoencoder training
print("Filtering for normal samples...")
normal_train_df = train_df[train_df['label_str'].str.endswith('_good')]
print(f"Normal training samples: {len(normal_train_df)}")

# Dataset creation helper
def process_path(filepath):
    img = tf.io.read_file(filepath)
    img = tf.io.decode_image(img, channels=3, expand_animations=False)
    img = tf.image.resize(img, IMG_SIZE)
    # Rescale to [0, 1]
    img = tf.cast(img, tf.float32) / 255.0
    return img

def create_dataset(dataframe, batch_size=16, shuffle=False):
    filepaths = dataframe['filepath'].values
    # Autoencoder doesn't need labels for training, but we need them for evaluation
    # For training, we can just return images
    ds = tf.data.Dataset.from_tensor_slices(filepaths)
    ds = ds.map(process_path, num_parallel_calls=tf.data.AUTOTUNE)
    
    if shuffle:
        ds = ds.shuffle(buffer_size=1000)
    
    # Autoencoder expects (x, x)
    ds = ds.map(lambda x: (x, x))
    
    ds = ds.batch(batch_size)
    ds = ds.prefetch(buffer_size=tf.data.AUTOTUNE)
    return ds

AUTOTUNE = tf.data.AUTOTUNE
train_ds = create_dataset(normal_train_df, BATCH_SIZE, shuffle=True)
# We can use val_df (filtered) for validation if we want
normal_val_df = val_df[val_df['label_str'].str.endswith('_good')]
val_ds = create_dataset(normal_val_df, BATCH_SIZE, shuffle=False)

print("Datasets created.")

## 2. Model Architecture
Encoder-Decoder architecture.

In [None]:
def create_autoencoder(input_shape):
    # Encoder
    inputs = layers.Input(shape=input_shape)
    x = layers.Conv2D(32, (3, 3), activation='relu', padding='same', strides=2)(inputs)
    x = layers.Conv2D(64, (3, 3), activation='relu', padding='same', strides=2)(x)
    x = layers.Conv2D(128, (3, 3), activation='relu', padding='same', strides=2)(x)
    
    # Latent space
    shape_before_flattening = tf.keras.backend.int_shape(x)[1:]
    x = layers.Flatten()(x)
    latent = layers.Dense(128, name='latent_vector')(x)
    
    # Decoder
    x = layers.Dense(np.prod(shape_before_flattening))(latent)
    x = layers.Reshape(shape_before_flattening)(x)
    
    x = layers.Conv2DTranspose(128, (3, 3), activation='relu', padding='same', strides=2)(x)
    x = layers.Conv2DTranspose(64, (3, 3), activation='relu', padding='same', strides=2)(x)
    x = layers.Conv2DTranspose(32, (3, 3), activation='relu', padding='same', strides=2)(x)
    
    outputs = layers.Conv2DTranspose(3, (3, 3), activation='sigmoid', padding='same')(x)
    
    model = models.Model(inputs, outputs, name='autoencoder')
    return model

autoencoder = create_autoencoder(IMG_SIZE + (3,))
autoencoder.summary()

## 3. Training
Loss function is Mean Squared Error (MSE) between input and output.

In [None]:
autoencoder.compile(optimizer='adam', loss='mse')

history = autoencoder.fit(
    train_ds,
    epochs=20,
    # In unsupervised setting, we often use a split of train set as validation,
    # or just monitor loss.
)

# Create Test Dataset (with labels for evaluation)
def process_path_label(filepath, label):
    img = tf.io.read_file(filepath)
    img = tf.io.decode_image(img, channels=3, expand_animations=False)
    img = tf.image.resize(img, IMG_SIZE)
    img = tf.cast(img, tf.float32) / 255.0
    return img, label

def create_test_dataset(dataframe):
    filepaths = dataframe['filepath'].values
    labels = dataframe['label'].values
    ds = tf.data.Dataset.from_tensor_slices((filepaths, labels))
    ds = ds.map(process_path_label, num_parallel_calls=AUTOTUNE)
    ds = ds.batch(1) # Batch size 1 for individual prediction
    return ds

test_ds = create_test_dataset(test_df)

def predict_anomaly(model, dataset, threshold=None):
    reconstruction_errors = []
    labels = []
    
    for image, label in dataset:
        reconstructed = model.predict(image, verbose=0)
        loss = np.mean(np.abs(image - reconstructed))
        reconstruction_errors.append(loss)
        labels.append(label.numpy()[0])
        
    return np.array(reconstruction_errors), np.array(labels)

print("Predicting anomalies on test set...")
errors, labels = predict_anomaly(autoencoder, test_ds)

# Determine threshold (e.g., 90th percentile of errors)
threshold = np.percentile(errors, 90)
print(f"Threshold: {threshold}")

# Visualize
plt.figure(figsize=(10, 5))
# Identify normal label indices
# We need to know which integer labels correspond to 'good'
# class_names list has the strings.
normal_indices = [i for i, name in enumerate(class_names) if name.endswith('_good')]

# Create mask for normal and anomaly
is_normal = np.isin(labels, normal_indices)

plt.hist(errors[is_normal], bins=20, alpha=0.5, label='Normal')
plt.hist(errors[~is_normal], bins=20, alpha=0.5, label='Anomaly')
plt.axvline(threshold, color='r', linestyle='--', label='Threshold')
plt.legend()
plt.title("Reconstruction Error Distribution")
plt.show()

In [None]:
# Load Test Data
test_ds = tf.keras.utils.image_dataset_from_directory(
    TEST_DIR,
    label_mode='int',
    image_size=IMG_SIZE,
    batch_size=1,
    shuffle=False
)

def predict_anomaly(model, dataset, threshold=None):
    reconstruction_errors = []
    labels = []
    
    for image, label in dataset:
        image = preprocess(image)
        reconstructed = model.predict(image, verbose=0)
        loss = np.mean(np.abs(image - reconstructed))
        reconstruction_errors.append(loss)
        labels.append(label.numpy()[0])
        
    return np.array(reconstruction_errors), np.array(labels)

errors, labels = predict_anomaly(autoencoder, test_ds)

# Determine threshold (e.g., 95th percentile of errors)
# Ideally this is done on a validation set of normal images
threshold = np.percentile(errors, 90)
print(f"Threshold: {threshold}")

# Visualize
plt.figure(figsize=(10, 5))
plt.hist(errors[labels==0], bins=20, alpha=0.5, label='Normal')
plt.hist(errors[labels!=0], bins=20, alpha=0.5, label='Anomaly')
plt.axvline(threshold, color='r', linestyle='--', label='Threshold')
plt.legend()
plt.title("Reconstruction Error Distribution")
plt.show()

In [None]:
# Advanced Evaluation for Autoencoder
from src.evaluation.metrics import calculate_auc, calculate_f1
import numpy as np

print("Evaluating Autoencoder...")

# 1. Calculate Reconstruction Error (MSE) for all test images
reconstructions = autoencoder.predict(test_ds)
mse_scores = []
y_true_labels = []

# We need to iterate through test_ds to get original images and labels
# Note: test_ds yields (images, labels)
idx = 0
for images, labels in test_ds:
    batch_recon = reconstructions[idx : idx + len(images)]
    batch_mse = np.mean(np.square(images - batch_recon), axis=(1, 2, 3))
    mse_scores.extend(batch_mse)
    y_true_labels.extend(labels.numpy())
    idx += len(images)

mse_scores = np.array(mse_scores)
y_true_labels = np.array(y_true_labels)

# 2. Calculate AUC-ROC using MSE as anomaly score
# Note: Higher MSE = Anomaly (1), Lower MSE = Normal (0)
# Ensure labels are 0 (Normal) and 1 (Anomaly)
auc = calculate_auc(y_true_labels, mse_scores)
print(f"AUC-ROC: {auc:.4f}")

# 3. Calculate F1-Score (requires thresholding)
# Simple strategy: use mean + 2*std of normal samples from validation set as threshold
# For now, we'll just pick a threshold that maximizes F1 on test set for demonstration
best_f1 = 0
best_thresh = 0
thresholds = np.linspace(mse_scores.min(), mse_scores.max(), 100)

for thresh in thresholds:
    y_pred_bin = (mse_scores > thresh).astype(int)
    f1 = calculate_f1(y_true_labels, y_pred_bin)
    if f1 > best_f1:
        best_f1 = f1
        best_thresh = thresh

print(f"Best F1-Score: {best_f1:.4f} (at threshold {best_thresh:.4f})")
