# Raman Spectroscopy Fake Alcohol Detection - Demo

This notebook demonstrates the classification pipeline using pre-processed data.

**For full data generation pipeline, see `main_pipeline.ipynb`**

## Pipeline:
1. Load pre-processed data (synthetic_1d.npy, spectral_maps_gadf.npy)
2. Train DenseNet/ResNet models
3. Evaluate with confusion matrices

In [None]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
import keras
from keras import layers, models, regularizers
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_score, recall_score, f1_score, confusion_matrix
from sklearn.utils.class_weight import compute_class_weight
import seaborn as sns
import json
import random
from datetime import datetime

# Set random seeds
def set_seed(seed=42):
    np.random.seed(seed)
    tf.random.set_seed(seed)
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)

SEED = 42
set_seed(SEED)
print("Setup complete!")

## 1. Load Pre-processed Data

In [None]:
# Directories
data_dir = '../data'
synthetic_dir = os.path.join(data_dir, 'synthetic')
maps_dir = os.path.join(data_dir, 'maps')
labels_dir = os.path.join(data_dir, 'labels')
visualizations_dir = os.path.join(data_dir, 'visualizations')

# Load data
X_1d = np.load(os.path.join(synthetic_dir, 'synthetic_1d.npy'))
X_2d = np.load(os.path.join(maps_dir, 'spectral_maps_gadf.npy'))
labels_df = pd.read_csv(os.path.join(labels_dir, 'labels.csv'))
y = labels_df['label'].values

print(f"X_1d shape: {X_1d.shape}")
print(f"X_2d shape: {X_2d.shape}")
print(f"Labels: {len(y)} samples, {len(np.unique(y))} classes")
print(f"\nLabel distribution:\n{labels_df['label'].value_counts().sort_index()}")

In [None]:
# Visualize sample spectra
wavenumbers = np.linspace(500, 3500, 880)

plt.figure(figsize=(12, 5))
for i in range(5):
    idx = np.random.randint(0, X_1d.shape[0])
    plt.plot(wavenumbers, X_1d[idx, :, 0], alpha=0.7, label=f'Label {y[idx]} ({y[idx]*10}% Ethanol)')

plt.xlabel('Wavenumber (cm$^{-1}$)')
plt.ylabel('Intensity (normalized)')
plt.title('Sample Raman Spectra')
plt.legend()
plt.grid(True, alpha=0.3)
plt.axvspan(870, 890, color='green', alpha=0.2, label='Ethanol peak')
plt.axvspan(1000, 1020, color='red', alpha=0.2, label='Methanol peak')
plt.show()

## 2. Model Architectures

Define DenseNet and ResNet models for both 1D (spectra) and 2D (GADF) inputs.

In [None]:
def build_1d_densenet(input_shape=(880, 1), num_classes=11, growth_rate=12):
    """DenseNet for 1D spectral classification"""
    inputs = layers.Input(shape=input_shape)
    x = layers.Conv1D(48, 7, padding='same', activation='relu', 
                      kernel_regularizer=regularizers.l2(0.0005))(inputs)
    x = layers.BatchNormalization()(x)
    x = layers.MaxPooling1D(pool_size=2)(x)
    
    def dense_block(x, num_layers, filters):
        for _ in range(num_layers):
            y = layers.BatchNormalization()(x)
            y = layers.Activation('relu')(y)
            y = layers.Conv1D(filters, 3, padding='same', 
                              kernel_regularizer=regularizers.l2(0.0005))(y)
            x = layers.Concatenate()([x, y])
        return x
    
    def transition_layer(x):
        filters = x.shape[-1]
        x = layers.BatchNormalization()(x)
        x = layers.Activation('relu')(x)
        x = layers.Conv1D(filters // 2, 1, padding='same', 
                          kernel_regularizer=regularizers.l2(0.0005))(x)
        x = layers.MaxPooling1D(pool_size=2)(x)
        return x
    
    for _ in range(3):
        x = dense_block(x, num_layers=4, filters=growth_rate)
        x = transition_layer(x)
    
    x = layers.BatchNormalization()(x)
    x = layers.Activation('relu')(x)
    x = layers.GlobalAveragePooling1D()(x)
    x = layers.Dense(128, activation='relu')(x)
    x = layers.Dropout(0.4)(x)
    outputs = layers.Dense(num_classes, activation='softmax')(x)
    
    return models.Model(inputs, outputs)


def build_2d_densenet(input_shape=(64, 64, 1), num_classes=11, growth_rate=12):
    """DenseNet for 2D GADF image classification"""
    inputs = layers.Input(shape=input_shape)
    x = layers.Conv2D(48, 3, padding='same', activation='relu', 
                      kernel_regularizer=regularizers.l2(0.0005))(inputs)
    x = layers.BatchNormalization()(x)
    x = layers.MaxPooling2D(pool_size=(2, 2))(x)
    
    def dense_block(x, num_layers, filters):
        for _ in range(num_layers):
            y = layers.BatchNormalization()(x)
            y = layers.Activation('relu')(y)
            y = layers.Conv2D(filters, 3, padding='same', 
                              kernel_regularizer=regularizers.l2(0.0005))(y)
            x = layers.Concatenate()([x, y])
        return x
    
    def transition_layer(x):
        filters = x.shape[-1]
        x = layers.BatchNormalization()(x)
        x = layers.Activation('relu')(x)
        x = layers.Conv2D(filters // 2, 1, padding='same', 
                          kernel_regularizer=regularizers.l2(0.0005))(x)
        x = layers.MaxPooling2D(pool_size=(2, 2))(x)
        return x
    
    for _ in range(3):
        x = dense_block(x, num_layers=4, filters=growth_rate)
        x = transition_layer(x)
    
    x = layers.BatchNormalization()(x)
    x = layers.Activation('relu')(x)
    x = layers.GlobalAveragePooling2D()(x)
    x = layers.Dense(128, activation='relu')(x)
    x = layers.Dropout(0.4)(x)
    outputs = layers.Dense(num_classes, activation='softmax')(x)
    
    return models.Model(inputs, outputs)


def build_1d_resnet(input_shape=(880, 1), num_classes=11):
    """ResNet for 1D spectral classification"""
    inputs = layers.Input(shape=input_shape)
    x = layers.Conv1D(64, 5, padding='same', activation='relu', 
                      kernel_regularizer=regularizers.l2(0.0001))(inputs)
    x = layers.BatchNormalization()(x)
    x = layers.MaxPooling1D(pool_size=2)(x)
    
    def residual_block(x, filters, kernel_size=3):
        shortcut = x
        x = layers.Conv1D(filters, kernel_size, padding='same', activation='relu', 
                          kernel_regularizer=regularizers.l2(0.0001))(x)
        x = layers.BatchNormalization()(x)
        x = layers.Conv1D(filters, kernel_size, padding='same', activation='relu', 
                          kernel_regularizer=regularizers.l2(0.0001))(x)
        x = layers.BatchNormalization()(x)
        if shortcut.shape[-1] != filters:
            shortcut = layers.Conv1D(filters, 1, padding='same')(shortcut)
        x = layers.Add()([shortcut, x])
        x = layers.Activation('relu')(x)
        return x
    
    x = residual_block(x, 64)
    x = residual_block(x, 64)
    x = layers.MaxPooling1D(pool_size=2)(x)
    x = residual_block(x, 128)
    x = residual_block(x, 128)
    x = residual_block(x, 128)
    x = layers.GlobalAveragePooling1D()(x)
    x = layers.Dense(128, activation='relu')(x)
    x = layers.Dropout(0.3)(x)
    outputs = layers.Dense(num_classes, activation='softmax')(x)
    
    return models.Model(inputs, outputs)


print("Model architectures defined!")

## 3. Data Augmentation & Training Setup

In [None]:
# Data augmentation layers
data_augmentation_1d = models.Sequential([
    layers.Lambda(lambda x: x + tf.random.normal(tf.shape(x), mean=0.0, stddev=0.05)),
    layers.Lambda(lambda x: x * tf.random.uniform((), 0.8, 1.2)),
    layers.Lambda(lambda x: tf.roll(x, shift=tf.random.uniform((), -5, 5, dtype=tf.int32), axis=1))
])

# Split data
X_1d_train, X_1d_test, y_train, y_test = train_test_split(
    X_1d, y, test_size=0.2, random_state=42
)
X_2d_train, X_2d_test, y_train_2d, y_test_2d = train_test_split(
    X_2d, y, test_size=0.2, random_state=42
)

# Compute class weights for imbalanced data
class_weights = compute_class_weight('balanced', classes=np.arange(11), y=y)
class_weight = {i: w for i, w in enumerate(class_weights)}

print(f"Train: {len(y_train)} samples")
print(f"Test: {len(y_test)} samples")

## 4. Train Model (DenseNet 1D Example)

In [None]:
# Create experiment directory
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
experiment_dir = os.path.join('../experiments', f'experiment_{timestamp}')
model_dir = os.path.join(experiment_dir, 'models')
os.makedirs(model_dir, exist_ok=True)

# Build model with augmentation
tf.keras.backend.clear_session()
lr_schedule = keras.optimizers.schedules.CosineDecay(
    initial_learning_rate=0.001, 
    decay_steps=10 * len(X_1d_train) // 64
)
optimizer = keras.optimizers.Adam(learning_rate=lr_schedule)

densenet_1d = models.Sequential([
    data_augmentation_1d,
    build_1d_densenet()
])
densenet_1d.compile(
    optimizer=optimizer, 
    loss='sparse_categorical_crossentropy', 
    metrics=['accuracy']
)

# Callbacks
early_stopping = keras.callbacks.EarlyStopping(patience=10, restore_best_weights=True)
checkpoint = keras.callbacks.ModelCheckpoint(
    os.path.join(model_dir, 'best_densenet_1d.keras'), 
    save_best_only=True
)

print("Model ready for training!")
print(f"Experiment dir: {experiment_dir}")

In [None]:
# Train (reduce epochs for demo - use 50-100 for production)
history = densenet_1d.fit(
    X_1d_train, y_train,
    validation_split=0.1,
    epochs=10,  # Increase for better results
    batch_size=64,
    callbacks=[early_stopping, checkpoint],
    class_weight=class_weight
)

## 5. Evaluation

In [None]:
# Predict
y_pred = densenet_1d.predict(X_1d_test)
y_pred_labels = np.argmax(y_pred, axis=1)

# Metrics
accuracy = np.mean(y_pred_labels == y_test)
precision = precision_score(y_test, y_pred_labels, average='macro')
recall = recall_score(y_test, y_pred_labels, average='macro')
f1 = f1_score(y_test, y_pred_labels, average='macro')

print(f"\nDenseNet 1D Results:")
print(f"  Accuracy:  {accuracy:.4f}")
print(f"  Precision: {precision:.4f}")
print(f"  Recall:    {recall:.4f}")
print(f"  F1 Score:  {f1:.4f}")

In [None]:
# Confusion Matrix
cm = confusion_matrix(y_test, y_pred_labels)
cm_normalized = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis] * 100

plt.figure(figsize=(12, 10))
sns.heatmap(
    cm_normalized, annot=True, fmt='.1f', cmap='Blues',
    xticklabels=[f"{i*10}%" for i in range(11)],
    yticklabels=[f"{i*10}%" for i in range(11)],
    cbar_kws={'label': '%'}
)
plt.xlabel('Predicted Ethanol %', fontsize=12)
plt.ylabel('True Ethanol %', fontsize=12)
plt.title('Normalized Confusion Matrix - DenseNet 1D', fontsize=14)
plt.tight_layout()
plt.show()

In [None]:
# Save model
densenet_1d.save(os.path.join(model_dir, 'densenet_1d_full.keras'))
print(f"Model saved to {model_dir}")

## Summary

This demo showed:
1. Loading pre-processed data (1D spectra + 2D GADF maps)
2. Building DenseNet/ResNet architectures
3. Training with data augmentation and class weighting
4. Evaluation with confusion matrices

**For full pipeline including:**
- Data loading from Excel
- Baseline correction (airPLS)
- Synthetic data generation
- GADF transformation
- Training all 4 models
- Occlusion analysis

**See `main_pipeline.ipynb`**