# Exoplanet Spectrum Analysis Tutorial

This notebook demonstrates the core functionality of the Exoplanet Spectrum Recovery platform.

## Contents
1. Loading and visualizing spectral data
2. Applying the denoising model
3. Feature detection and identification
4. Atmospheric retrieval basics

In [None]:
# Import libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import torch

# Configure plotting
plt.style.use('dark_background')
%matplotlib inline

## 1. Loading Spectral Data

We'll start by loading a sample transmission spectrum. The platform supports FITS, CSV, and JSON formats.

In [None]:
# Generate sample spectrum data for demonstration
# In practice, you would load real observational data

np.random.seed(42)

# Wavelength grid (0.5 to 5 microns)
wavelength = np.linspace(0.5, 5.0, 500)

# Create a synthetic transmission spectrum with molecular features
def gaussian_absorption(x, center, width, depth):
    return depth * np.exp(-((x - center) ** 2) / (2 * width ** 2))

# Base transit depth (ppm)
base_depth = 100

# Add molecular features
clean_spectrum = base_depth + (
    gaussian_absorption(wavelength, 1.4, 0.1, 20) +   # H2O
    gaussian_absorption(wavelength, 1.9, 0.15, 15) +  # H2O
    gaussian_absorption(wavelength, 2.3, 0.08, 12) +  # CH4
    gaussian_absorption(wavelength, 4.3, 0.2, 25)     # CO2
)

# Add noise
noise_level = 8  # ppm
noisy_spectrum = clean_spectrum + np.random.normal(0, noise_level, len(wavelength))
error = np.full_like(wavelength, noise_level)

print(f"Wavelength range: {wavelength.min():.2f} - {wavelength.max():.2f} μm")
print(f"Number of data points: {len(wavelength)}")
print(f"Mean transit depth: {noisy_spectrum.mean():.1f} ppm")

In [None]:
# Interactive visualization with Plotly
fig = go.Figure()

# Noisy spectrum with error bars
fig.add_trace(go.Scatter(
    x=wavelength,
    y=noisy_spectrum,
    mode='markers',
    name='Observed',
    marker=dict(size=4, color='#818cf8'),
    error_y=dict(type='data', array=error, visible=True, color='rgba(129, 140, 248, 0.3)')
))

# Clean spectrum (ground truth)
fig.add_trace(go.Scatter(
    x=wavelength,
    y=clean_spectrum,
    mode='lines',
    name='True Spectrum',
    line=dict(color='#22c55e', width=2)
))

fig.update_layout(
    title='Synthetic Exoplanet Transmission Spectrum',
    xaxis_title='Wavelength (μm)',
    yaxis_title='Transit Depth (ppm)',
    template='plotly_dark',
    height=500
)

fig.show()

## 2. Applying the Denoising Model

Load and apply our trained denoiser to recover the underlying spectrum.

In [None]:
import sys
sys.path.insert(0, '..')

from models.architectures.denoiser import create_denoiser

# Create model (in practice, load trained weights)
model = create_denoiser(model_type='v1')
model.eval()

# Check if GPU is available
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

model = model.to(device)
print(f"Model parameters: {sum(p.numel() for p in model.parameters()):,}")

In [None]:
# Prepare input tensor
input_tensor = torch.FloatTensor(noisy_spectrum).unsqueeze(0).unsqueeze(0)  # (1, 1, 500)
input_tensor = input_tensor.to(device)

# Run inference
with torch.no_grad():
    recovered = model(input_tensor)

recovered_spectrum = recovered.squeeze().cpu().numpy()

# Note: With untrained model, results won't be meaningful
# This is for demonstration of the pipeline only
print(f"Input shape: {input_tensor.shape}")
print(f"Output shape: {recovered.shape}")

## 3. Feature Detection

Identify molecular absorption features in the spectrum.

In [None]:
# Known molecular feature wavelengths
molecular_features = {
    'H2O': [1.15, 1.4, 1.9, 2.7],
    'CO2': [2.0, 4.3],
    'CH4': [2.3, 3.3],
    'CO': [4.6],
    'Na': [0.589],
    'K': [0.766],
}

# Simple feature detection (in practice, use ML model)
def detect_features(wavelength, flux, threshold_sigma=2.0):
    """Detect significant deviations from baseline."""
    # Fit baseline (simple median filter)
    from scipy.ndimage import median_filter
    baseline = median_filter(flux, size=50)
    
    # Find deviations
    residual = flux - baseline
    sigma = np.std(residual)
    
    # Detect features above threshold
    feature_mask = np.abs(residual) > threshold_sigma * sigma
    
    return baseline, residual, feature_mask

baseline, residual, features = detect_features(wavelength, noisy_spectrum)

print(f"Detected {features.sum()} data points with significant features")

In [None]:
# Visualize feature detection
fig = make_subplots(rows=2, cols=1, shared_xaxes=True, vertical_spacing=0.1,
                    subplot_titles=['Spectrum with Baseline', 'Residuals'])

# Top plot: spectrum and baseline
fig.add_trace(go.Scatter(x=wavelength, y=noisy_spectrum, mode='markers',
                         name='Observed', marker=dict(size=3, color='#818cf8')), row=1, col=1)
fig.add_trace(go.Scatter(x=wavelength, y=baseline, mode='lines',
                         name='Baseline', line=dict(color='#f43f5e', width=2)), row=1, col=1)

# Bottom plot: residuals with detected features
fig.add_trace(go.Scatter(x=wavelength, y=residual, mode='markers',
                         name='Residual', marker=dict(size=3, color='#818cf8')), row=2, col=1)
fig.add_trace(go.Scatter(x=wavelength[features], y=residual[features], mode='markers',
                         name='Detected Features', marker=dict(size=6, color='#22c55e')), row=2, col=1)

# Add molecular feature annotations
for molecule, wavelengths in molecular_features.items():
    for w in wavelengths:
        if wavelength.min() <= w <= wavelength.max():
            fig.add_vline(x=w, line_dash='dash', line_color='rgba(255,255,255,0.3)', row=1, col=1)

fig.update_layout(height=600, template='plotly_dark', showlegend=True)
fig.update_xaxes(title_text='Wavelength (μm)', row=2, col=1)
fig.update_yaxes(title_text='Transit Depth (ppm)', row=1, col=1)
fig.update_yaxes(title_text='Residual (ppm)', row=2, col=1)

fig.show()

## 4. Next Steps

This notebook demonstrated basic spectrum loading, visualization, and feature detection.

For more advanced usage:
- See `02_model_training.ipynb` for training custom models
- See `03_atmospheric_retrieval.ipynb` for Bayesian parameter estimation
- See the API documentation for integrating with the web interface