# 1D CNN Classification of Raman Spectra (ramanbiolib)

This notebook:
1. Loads and prepares the preprocessed Raman spectra dataset from `ramanbiolib`
2. Trains a **1D CNN** for multi-class molecular classification
3. Evaluates performance (accuracy, confusion matrix, per-class F1)
4. Applies **Integrated Gradients** to identify which wavenumber regions drive predictions
5. Plots saliency maps overlaid on example spectra for each class
6. Saves model weights and training logs to `outputs/`

In [None]:
# ── Cell 1: Imports ─────────────────────────────────────────────────────────
import os, json, warnings
import numpy as np
import pandas as pd
import matplotlib
matplotlib.use('Agg')   # headless backend so plots save without a display
import matplotlib.pyplot as plt
import matplotlib.cm as cm
import seaborn as sns

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader, WeightedRandomSampler

from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import (accuracy_score, classification_report,
                             confusion_matrix, ConfusionMatrixDisplay)

warnings.filterwarnings('ignore')
torch.manual_seed(42)
np.random.seed(42)

DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f'Using device: {DEVICE}')

# Output directories
os.makedirs('outputs/figures', exist_ok=True)
os.makedirs('outputs/logs',    exist_ok=True)
os.makedirs('outputs/model',   exist_ok=True)
print('Output directories ready.')

## 1. Load and Prepare Data

In [None]:
# ── Cell 2: Load spectra + metadata ─────────────────────────────────────────
def parse_list(s):
    """Convert a '[a, b, c]' string from the CSV into a Python list of floats."""
    return [float(v) for v in s.strip('[]').split(', ')]

spectra_df = pd.read_csv(
    'ramanbiolib/db/raman_spectra_db.csv',
    converters={'wavenumbers': parse_list, 'intensity': parse_list}
)
meta_df = pd.read_csv('ramanbiolib/db/metadata_db.csv')

# Merge on id — keep only the first duplicate per id (some ids appear multiple
# times in metadata with different laser wavelengths)
meta_unique = meta_df[['id', 'type']].drop_duplicates(subset='id')
df = spectra_df.merge(meta_unique, on='id')

# Collapse to top-level class (e.g. 'Lipids/FattyAcids' -> 'Lipids')
df['class'] = df['type'].str.split('/').str[0]

print('Full dataset shape:', df.shape)
print('\nClass distribution:')
print(df['class'].value_counts().to_string())

In [None]:
# ── Cell 3: Visualise class distribution ────────────────────────────────────
fig, ax = plt.subplots(figsize=(9, 4))
counts = df['class'].value_counts()
ax.bar(counts.index, counts.values,
       color=plt.cm.tab10(np.linspace(0, 1, len(counts))))
ax.set_xlabel('Molecular Class', fontsize=12)
ax.set_ylabel('Number of Spectra', fontsize=12)
ax.set_title('Class Distribution in ramanbiolib Spectra DB', fontsize=14)
plt.xticks(rotation=30, ha='right')
plt.tight_layout()
plt.savefig('outputs/figures/class_distribution.png', dpi=150)
plt.show()
print('Saved: outputs/figures/class_distribution.png')

In [None]:
# ── Cell 4: Filter to top-6 classes (enough samples for meaningful training) ─
KEEP_CLASSES = ['Proteins', 'Lipids', 'Saccharides',
                'AminoAcids', 'PrimaryMetabolites', 'NucleicAcids']

df_filt = df[df['class'].isin(KEEP_CLASSES)].reset_index(drop=True)
print('Filtered dataset shape:', df_filt.shape)
print(df_filt['class'].value_counts().to_string())

# Extract numpy arrays
X_raw = np.array(df_filt['intensity'].tolist(), dtype=np.float32)   # (N, 1351)
wavenumbers = np.array(df_filt['wavenumbers'].iloc[0])               # (1351,)
print(f'\nSpectrum length: {X_raw.shape[1]} wavenumber points')
print(f'Wavenumber range: {wavenumbers[0]:.0f} – {wavenumbers[-1]:.0f} cm⁻¹')

# Label encoding
le = LabelEncoder()
y_raw = le.fit_transform(df_filt['class'])
CLASS_NAMES = list(le.classes_)
N_CLASSES   = len(CLASS_NAMES)
print(f'\nClasses ({N_CLASSES}): {CLASS_NAMES}')

## 2. Data Augmentation

In [None]:
# ── Cell 5: Augmentation ─────────────────────────────────────────────────────
# With only ~194 spectra we augment by adding Gaussian noise and random scaling.
# Each original spectrum produces AUG_FACTOR augmented copies.

AUG_FACTOR = 15   # multiplier per spectrum

def augment_spectra(X, y, factor=AUG_FACTOR, noise_std=0.015, scale_range=(0.85, 1.15)):
    """Add Gaussian noise + random amplitude scaling to each spectrum."""
    X_aug, y_aug = [X.copy()], [y.copy()]
    rng = np.random.default_rng(42)
    for _ in range(factor):
        noise  = rng.normal(0, noise_std, X.shape).astype(np.float32)
        scales = rng.uniform(*scale_range, (X.shape[0], 1)).astype(np.float32)
        X_aug.append(np.clip(X * scales + noise, 0, None))
        y_aug.append(y.copy())
    return np.vstack(X_aug), np.concatenate(y_aug)

X_aug, y_aug = augment_spectra(X_raw, y_raw)
print(f'After augmentation: {X_aug.shape[0]} spectra ({N_CLASSES} classes)')

## 3. Dataset, DataLoader, and Train/Test Split

In [None]:
# ── Cell 6: Train / test split and PyTorch Dataset ───────────────────────────
X_train, X_test, y_train, y_test = train_test_split(
    X_aug, y_aug, test_size=0.20, stratify=y_aug, random_state=42
)
print(f'Train: {len(X_train)}  Test: {len(X_test)}')

class RamanDataset(Dataset):
    def __init__(self, X, y):
        # CNN expects (batch, channels, length) → add channel dim
        self.X = torch.tensor(X, dtype=torch.float32).unsqueeze(1)
        self.y = torch.tensor(y, dtype=torch.long)
    def __len__(self):
        return len(self.y)
    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

train_ds = RamanDataset(X_train, y_train)
test_ds  = RamanDataset(X_test,  y_test)

# Weighted sampler for class balance during training
class_counts = np.bincount(y_train)
weights = 1.0 / class_counts[y_train]
sampler = WeightedRandomSampler(weights, num_samples=len(weights), replacement=True)

BATCH = 32
train_loader = DataLoader(train_ds, batch_size=BATCH, sampler=sampler)
test_loader  = DataLoader(test_ds,  batch_size=BATCH, shuffle=False)
print(f'Batch size: {BATCH}')

## 4. 1D CNN Architecture

In [None]:
# ── Cell 7: Model definition ─────────────────────────────────────────────────
class RamanCNN1D(nn.Module):
    """
    Three-block 1D CNN for Raman spectrum classification.
    Input shape: (batch, 1, 1351)
    """
    def __init__(self, input_len=1351, n_classes=6):
        super().__init__()
        self.block1 = nn.Sequential(
            nn.Conv1d(1,  32, kernel_size=15, padding=7),
            nn.BatchNorm1d(32), nn.ReLU(),
            nn.Conv1d(32, 32, kernel_size=15, padding=7),
            nn.BatchNorm1d(32), nn.ReLU(),
            nn.MaxPool1d(4), nn.Dropout(0.25)
        )
        self.block2 = nn.Sequential(
            nn.Conv1d(32, 64, kernel_size=11, padding=5),
            nn.BatchNorm1d(64), nn.ReLU(),
            nn.Conv1d(64, 64, kernel_size=11, padding=5),
            nn.BatchNorm1d(64), nn.ReLU(),
            nn.MaxPool1d(4), nn.Dropout(0.25)
        )
        self.block3 = nn.Sequential(
            nn.Conv1d(64, 128, kernel_size=7, padding=3),
            nn.BatchNorm1d(128), nn.ReLU(),
            nn.MaxPool1d(4), nn.Dropout(0.25)
        )
        # Compute flattened size
        dummy = torch.zeros(1, 1, input_len)
        flat  = self._forward_features(dummy).shape[1]

        self.classifier = nn.Sequential(
            nn.Linear(flat, 256), nn.ReLU(), nn.Dropout(0.4),
            nn.Linear(256, n_classes)
        )

    def _forward_features(self, x):
        x = self.block1(x)
        x = self.block2(x)
        x = self.block3(x)
        return x.view(x.size(0), -1)

    def forward(self, x):
        x = self._forward_features(x)
        return self.classifier(x)

SEQ_LEN = X_train.shape[1]
model = RamanCNN1D(input_len=SEQ_LEN, n_classes=N_CLASSES).to(DEVICE)

# Count parameters
n_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(f'Model parameters: {n_params:,}')
print(model)

## 5. Training

In [None]:
# ── Cell 8: Training loop ────────────────────────────────────────────────────
EPOCHS   = 80
LR       = 1e-3
WD       = 1e-4

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=LR, weight_decay=WD)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=EPOCHS)

history = {'train_loss': [], 'train_acc': [], 'val_loss': [], 'val_acc': []}

best_val_acc = 0.0

for epoch in range(1, EPOCHS + 1):
    # ── Train
    model.train()
    t_loss, t_correct, t_total = 0, 0, 0
    for xb, yb in train_loader:
        xb, yb = xb.to(DEVICE), yb.to(DEVICE)
        optimizer.zero_grad()
        logits = model(xb)
        loss   = criterion(logits, yb)
        loss.backward()
        optimizer.step()
        t_loss    += loss.item() * len(yb)
        t_correct += (logits.argmax(1) == yb).sum().item()
        t_total   += len(yb)
    scheduler.step()

    # ── Validate
    model.eval()
    v_loss, v_correct, v_total = 0, 0, 0
    with torch.no_grad():
        for xb, yb in test_loader:
            xb, yb = xb.to(DEVICE), yb.to(DEVICE)
            logits = model(xb)
            loss   = criterion(logits, yb)
            v_loss    += loss.item() * len(yb)
            v_correct += (logits.argmax(1) == yb).sum().item()
            v_total   += len(yb)

    train_acc = t_correct / t_total
    val_acc   = v_correct / v_total
    history['train_loss'].append(t_loss / t_total)
    history['train_acc'].append(train_acc)
    history['val_loss'].append(v_loss / v_total)
    history['val_acc'].append(val_acc)

    # Save best model
    if val_acc > best_val_acc:
        best_val_acc = val_acc
        torch.save(model.state_dict(), 'outputs/model/best_model.pt')

    if epoch % 10 == 0 or epoch == 1:
        print(f'Epoch {epoch:3d}/{EPOCHS} | '
              f'Train loss {t_loss/t_total:.4f} acc {train_acc:.3f} | '
              f'Val loss {v_loss/v_total:.4f} acc {val_acc:.3f}')

print(f'\nBest validation accuracy: {best_val_acc:.4f}')

# Save training log
pd.DataFrame(history).to_csv('outputs/logs/training_log.csv', index=False)
print('Training log saved to outputs/logs/training_log.csv')

In [None]:
# ── Cell 9: Training curves ──────────────────────────────────────────────────
fig, axes = plt.subplots(1, 2, figsize=(12, 4))

axes[0].plot(history['train_loss'], label='Train', lw=2)
axes[0].plot(history['val_loss'],   label='Val',   lw=2)
axes[0].set_xlabel('Epoch'); axes[0].set_ylabel('Loss')
axes[0].set_title('Cross-Entropy Loss'); axes[0].legend()

axes[1].plot(history['train_acc'], label='Train', lw=2)
axes[1].plot(history['val_acc'],   label='Val',   lw=2)
axes[1].set_xlabel('Epoch'); axes[1].set_ylabel('Accuracy')
axes[1].set_title('Classification Accuracy'); axes[1].legend()

plt.tight_layout()
plt.savefig('outputs/figures/training_curves.png', dpi=150)
plt.show()
print('Saved: outputs/figures/training_curves.png')

## 6. Evaluation

In [None]:
# ── Cell 10: Load best model & evaluate ──────────────────────────────────────
model.load_state_dict(torch.load('outputs/model/best_model.pt', map_location=DEVICE))
model.eval()

all_preds, all_labels = [], []
with torch.no_grad():
    for xb, yb in test_loader:
        logits = model(xb.to(DEVICE))
        all_preds.extend(logits.argmax(1).cpu().numpy())
        all_labels.extend(yb.numpy())

all_preds  = np.array(all_preds)
all_labels = np.array(all_labels)

test_acc = accuracy_score(all_labels, all_preds)
print(f'Test accuracy: {test_acc:.4f}  ({test_acc*100:.1f}%)')
print()
print(classification_report(all_labels, all_preds, target_names=CLASS_NAMES))

In [None]:
# ── Cell 11: Confusion matrix ─────────────────────────────────────────────────
cm_vals = confusion_matrix(all_labels, all_preds)

fig, ax = plt.subplots(figsize=(8, 7))
disp = ConfusionMatrixDisplay(confusion_matrix=cm_vals, display_labels=CLASS_NAMES)
disp.plot(ax=ax, colorbar=False, cmap='Blues')
ax.set_title('Confusion Matrix – Test Set', fontsize=13)
plt.xticks(rotation=30, ha='right')
plt.tight_layout()
plt.savefig('outputs/figures/confusion_matrix.png', dpi=150)
plt.show()
print('Saved: outputs/figures/confusion_matrix.png')

## 7. Integrated Gradients (Feature Attribution)

In [None]:
# ── Cell 12: Integrated Gradients implementation ──────────────────────────────
#
# IG(x) = (x - baseline) * ∫₀¹ ∂F(baseline + α*(x-baseline)) / ∂x  dα
# Approximated with N_STEPS Riemann sum.

def integrated_gradients(model, input_tensor, target_class,
                          baseline=None, n_steps=50):
    """
    Compute Integrated Gradients for a single spectrum.

    Parameters
    ----------
    model        : trained RamanCNN1D
    input_tensor : torch.Tensor, shape (1, 1, L)
    target_class : int
    baseline     : torch.Tensor same shape as input_tensor (default: zeros)
    n_steps      : int, number of Riemann steps

    Returns
    -------
    ig : np.ndarray, shape (L,) — attribution per wavenumber
    """
    if baseline is None:
        baseline = torch.zeros_like(input_tensor)

    # Interpolate along straight-line path from baseline to input
    alphas       = torch.linspace(0, 1, n_steps).to(DEVICE)
    interpolated = torch.stack([
        baseline + alpha * (input_tensor - baseline)
        for alpha in alphas
    ]).squeeze(1)  # (n_steps, 1, L)

    interpolated.requires_grad_(True)

    # Forward + backward
    logits = model(interpolated)       # (n_steps, n_classes)
    score  = logits[:, target_class].sum()
    score.backward()

    grads = interpolated.grad.detach()  # (n_steps, 1, L)
    avg_grads = grads.mean(dim=0)       # (1, L)

    ig = ((input_tensor - baseline).squeeze() * avg_grads.squeeze()).cpu().numpy()
    return ig


def compute_class_saliency(model, X_class, label, n_samples=10, n_steps=50):
    """
    Average Integrated Gradients over up to n_samples spectra from one class.
    Returns the mean absolute attribution per wavenumber.
    """
    model.eval()
    igs = []
    # Sample a subset for speed
    idx = np.random.choice(len(X_class), min(n_samples, len(X_class)), replace=False)
    for i in idx:
        x = torch.tensor(X_class[i], dtype=torch.float32).unsqueeze(0).unsqueeze(0).to(DEVICE)
        ig = integrated_gradients(model, x, label, n_steps=n_steps)
        igs.append(np.abs(ig))
    return np.mean(igs, axis=0)

print('Integrated Gradients function defined.')

In [None]:
# ── Cell 13: Compute per-class saliency maps ──────────────────────────────────
# Use the *original* (non-augmented) spectra for cleaner attributions
saliency_maps = {}   # class_name -> mean |IG| array
class_spectra  = {}  # class_name -> mean spectrum array

N_SALIENCY_SAMPLES = 15   # per-class spectra used for averaging
IG_STEPS           = 50

for cls_name in CLASS_NAMES:
    cls_idx  = le.transform([cls_name])[0]
    mask     = y_raw == cls_idx
    X_cls    = X_raw[mask]

    print(f'Computing IG for {cls_name} ({len(X_cls)} spectra)...', end=' ')
    ig_mean = compute_class_saliency(
        model, X_cls, cls_idx,
        n_samples=N_SALIENCY_SAMPLES, n_steps=IG_STEPS
    )
    saliency_maps[cls_name] = ig_mean
    class_spectra[cls_name] = X_cls.mean(axis=0)
    print('done')

print('\nAll saliency maps computed.')

## 8. Saliency Maps Overlaid on Spectra

In [None]:
# ── Cell 14: Saliency overlay plots (one plot per class) ─────────────────────
COLORS = plt.cm.tab10(np.linspace(0, 1, N_CLASSES))

for i, cls_name in enumerate(CLASS_NAMES):
    sal   = saliency_maps[cls_name]
    spec  = class_spectra[cls_name]
    sal_n = (sal - sal.min()) / (sal.max() - sal.min() + 1e-9)  # normalize 0-1

    fig, ax1 = plt.subplots(figsize=(12, 4))

    # Mean spectrum (left y-axis)
    color_spec = COLORS[i]
    ax1.plot(wavenumbers, spec, color=color_spec, lw=1.5, label='Mean spectrum')
    ax1.set_xlabel('Wavenumber (cm⁻¹)', fontsize=12)
    ax1.set_ylabel('Intensity (normalised)', color=color_spec, fontsize=12)
    ax1.tick_params(axis='y', labelcolor=color_spec)

    # Saliency (right y-axis, shaded)
    ax2 = ax1.twinx()
    ax2.fill_between(wavenumbers, sal_n, alpha=0.35, color='crimson', label='IG saliency')
    ax2.set_ylabel('Normalised |IG| saliency', color='crimson', fontsize=12)
    ax2.tick_params(axis='y', labelcolor='crimson')

    # Annotate top-3 wavenumber peaks in saliency
    from scipy.signal import find_peaks
    peaks, _ = find_peaks(sal_n, prominence=0.15, distance=20)
    top_peaks = peaks[np.argsort(sal_n[peaks])[::-1][:3]]
    for pk in top_peaks:
        ax1.axvline(wavenumbers[pk], color='grey', lw=0.8, ls='--')
        ax1.text(wavenumbers[pk] + 5, spec.max() * 0.9,
                 f'{wavenumbers[pk]:.0f}', fontsize=8, rotation=90)

    # Combined legend
    lines1, labels1 = ax1.get_legend_handles_labels()
    lines2, labels2 = ax2.get_legend_handles_labels()
    ax1.legend(lines1 + lines2, labels1 + labels2, loc='upper right', fontsize=9)

    ax1.set_title(f'Saliency Map – {cls_name}', fontsize=14)
    plt.tight_layout()
    out_path = f'outputs/figures/saliency_{cls_name.lower()}.png'
    plt.savefig(out_path, dpi=150)
    plt.show()
    print(f'Saved: {out_path}')

In [None]:
# ── Cell 15: Aggregate saliency heatmap (all classes) ─────────────────────────
sal_matrix = np.array([
    (saliency_maps[c] - saliency_maps[c].min()) /
    (saliency_maps[c].max() - saliency_maps[c].min() + 1e-9)
    for c in CLASS_NAMES
])   # (n_classes, 1351)

# Downsample wavenumber axis for readability
step = 10
wn_ds  = wavenumbers[::step]
sal_ds = sal_matrix[:, ::step]

fig, ax = plt.subplots(figsize=(14, 4))
im = ax.imshow(sal_ds, aspect='auto', cmap='hot',
               extent=[wn_ds[0], wn_ds[-1], len(CLASS_NAMES)-0.5, -0.5])
ax.set_yticks(range(len(CLASS_NAMES)))
ax.set_yticklabels(CLASS_NAMES, fontsize=11)
ax.set_xlabel('Wavenumber (cm⁻¹)', fontsize=12)
ax.set_title('Integrated-Gradient Saliency Heatmap (all classes)', fontsize=13)
plt.colorbar(im, ax=ax, label='Normalised |IG|')
plt.tight_layout()
plt.savefig('outputs/figures/saliency_heatmap_all.png', dpi=150)
plt.show()
print('Saved: outputs/figures/saliency_heatmap_all.png')

## 9. Key Spectral Regions Per Class

In [None]:
# ── Cell 16: Identify top wavenumber windows per class ───────────────────────
from scipy.signal import find_peaks

WINDOW = 20   # ± cm⁻¹ around each peak to define a 'region'

key_regions = {}
summary_rows = []

for cls_name in CLASS_NAMES:
    sal   = saliency_maps[cls_name]
    sal_n = (sal - sal.min()) / (sal.max() - sal.min() + 1e-9)

    peaks, props = find_peaks(sal_n, prominence=0.10, distance=15)
    if len(peaks) == 0:
        # Fall back: take absolute max
        peaks = np.array([np.argmax(sal_n)])

    # Sort by saliency height
    ranked = peaks[np.argsort(sal_n[peaks])[::-1]]
    top5   = ranked[:5]

    regions = []
    for pk in top5:
        wn_center = wavenumbers[pk]
        regions.append({
            'center_cm': int(wn_center),
            'range': f'{int(wn_center-WINDOW)}–{int(wn_center+WINDOW)} cm⁻¹',
            'saliency': float(sal_n[pk])
        })
        summary_rows.append({
            'class': cls_name,
            'center_cm': int(wn_center),
            'range': f'{int(wn_center-WINDOW)}–{int(wn_center+WINDOW)} cm⁻¹',
            'saliency_score': round(float(sal_n[pk]), 4)
        })

    key_regions[cls_name] = regions

summary_df = pd.DataFrame(summary_rows)
summary_df.to_csv('outputs/logs/key_spectral_regions.csv', index=False)
print('Key spectral regions saved to outputs/logs/key_spectral_regions.csv')
print()
print(summary_df.to_string(index=False))

## 10. Save Final Model & Artefacts

In [None]:
# ── Cell 17: Save model weights, config, and saliency arrays ─────────────────

# 1. Final weights (at end of training)
torch.save(model.state_dict(), 'outputs/model/final_model.pt')

# 2. Model config
config = {
    'input_len': SEQ_LEN,
    'n_classes': N_CLASSES,
    'class_names': CLASS_NAMES,
    'wavenumber_range': [int(wavenumbers[0]), int(wavenumbers[-1])],
    'epochs': EPOCHS,
    'best_val_acc': round(best_val_acc, 4),
    'test_acc': round(float(test_acc), 4)
}
with open('outputs/model/model_config.json', 'w') as f:
    json.dump(config, f, indent=2)

# 3. Saliency arrays
np.savez('outputs/logs/saliency_maps.npz',
         wavenumbers=wavenumbers,
         class_names=np.array(CLASS_NAMES),
         **{cls: saliency_maps[cls] for cls in CLASS_NAMES})

print('Saved artefacts:')
print('  outputs/model/final_model.pt')
print('  outputs/model/best_model.pt')
print('  outputs/model/model_config.json')
print('  outputs/logs/training_log.csv')
print('  outputs/logs/key_spectral_regions.csv')
print('  outputs/logs/saliency_maps.npz')
print('  outputs/figures/  (all plots)')

print(f'\n===  FINAL TEST ACCURACY: {test_acc*100:.2f}%  ===')

## 11. Summary of Key Spectral Regions

| Class | Top Region 1 | Top Region 2 | Top Region 3 |
|---|---|---|---|
| **Proteins** | ~1650 cm⁻¹ (Amide I) | ~1250 cm⁻¹ (Amide III) | ~1003 cm⁻¹ (Phe ring) |
| **Lipids** | ~1300 cm⁻¹ (CH₂ twist) | ~1440 cm⁻¹ (CH₂ scissor) | ~1660 cm⁻¹ (C=C stretch) |
| **Saccharides** | ~1340 cm⁻¹ (C-H bending) | ~1460 cm⁻¹ (CH₂) | ~930 cm⁻¹ (C-O-C ring) |
| **AminoAcids** | ~1670 cm⁻¹ (C=O) | ~1200 cm⁻¹ (C-N) | ~850 cm⁻¹ (C-C) |
| **PrimaryMetabolites** | ~1380 cm⁻¹ | ~1620 cm⁻¹ | ~750 cm⁻¹ |
| **NucleicAcids** | ~790 cm⁻¹ (ring breathing) | ~1090 cm⁻¹ (PO₂⁻) | ~1580 cm⁻¹ (base C=N) |

> **Note:** The exact wavenumber peaks above are illustrative. Run this notebook to obtain model-derived values stored in `outputs/logs/key_spectral_regions.csv`.

See `spectral_regions_summary.md` for a detailed narrative summary.