# SETUP

In [1]:
import os

# Set the environment variable 'TF_CPP_MIN_LOG_LEVEL' to '3'.
# This suppresses most of TensorFlow's logging output, keeping the console clean.
# Levels: 0 = all logs, 1 = no info, 2 = no warnings, 3 = no errors.
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

# Set the environment variable 'TF_ENABLE_ONEDNN_OPTS' to '0'.
# This disables oneDNN/MKL optimizations in TensorFlow
os.environ['TF_ENABLE_ONEDNN_OPTS'] = '0'

import math
import numpy as np
import matplotlib.pyplot as plt

import json

import keras_tuner as kt
import tensorflow as tf

from geexhp import datavis as dvis
dvis.configure_matplotlib()

In [2]:
# The number of training samples in the dataset.
TRAIN_SAMPLES = 818862

# THe number of validation samples in the dataset.
VAL_SAMPLES = 79031

# Define the batch size to use for training, validation, and testing.
# A batch size of 256 means that the model will process 256 samples at a time.
BATCH_SIZE = 256

steps_per_epoch = math.ceil(TRAIN_SAMPLES / BATCH_SIZE)
validation_steps = math.ceil(VAL_SAMPLES / BATCH_SIZE)

# DATA PREPROCESSING PIPELINE

In [3]:
def compute_normalization_stats(train_tfrecord_path):
    # Initialize accumulators
    stats = {
        'inputs': {'UV': {'sum': 0., 'sq_sum': 0., 'count': 0},
                    'Vis': {'sum': 0., 'sq_sum': 0., 'count': 0},
                    'NIR': {'sum': 0., 'sq_sum': 0., 'count': 0}},
        'outputs': {key: {'sum': 0., 'sq_sum': 0., 'count': 0} 
                   for key in ['OBJECT-RADIUS-REL-EARTH', 'OBJECT-GRAVITY', 
                                'ATMOSPHERE-TEMPERATURE', 'ATMOSPHERE-PRESSURE',
                                'log_C2H6', 'log_CH4', 'log_CO', 'log_CO2', 'log_H2O', 'log_N2', 'log_N2O', 'log_O2', 'log_O3']}
    }

    # Parse function for raw data
    def parse_fn(example):
        features = {
            # Inputs
            # 'NOISY_ALBEDO_B-NIR': tf.io.VarLenFeature(tf.float32),
            # 'NOISY_ALBEDO_B-UV': tf.io.VarLenFeature(tf.float32),
            # 'NOISY_ALBEDO_B-Vis': tf.io.VarLenFeature(tf.float32),

            'ALBEDO_B-NIR': tf.io.VarLenFeature(tf.float32),
            'ALBEDO_B-UV': tf.io.VarLenFeature(tf.float32),
            'ALBEDO_B-Vis': tf.io.VarLenFeature(tf.float32),

            # Outputs
            'OBJECT-RADIUS-REL-EARTH': tf.io.FixedLenFeature([], tf.float32),
            'OBJECT-GRAVITY': tf.io.FixedLenFeature([], tf.float32),
            'ATMOSPHERE-TEMPERATURE': tf.io.FixedLenFeature([], tf.float32),
            'ATMOSPHERE-PRESSURE': tf.io.FixedLenFeature([], tf.float32),
            'log_C2H6': tf.io.FixedLenFeature([], tf.float32),
            'log_CH4': tf.io.FixedLenFeature([], tf.float32),
            'log_CO': tf.io.FixedLenFeature([], tf.float32),
            'log_CO2': tf.io.FixedLenFeature([], tf.float32),
            'log_H2O': tf.io.FixedLenFeature([], tf.float32),
            'log_N2': tf.io.FixedLenFeature([], tf.float32),
            'log_N2O': tf.io.FixedLenFeature([], tf.float32),
            'log_O2': tf.io.FixedLenFeature([], tf.float32),
            'log_O3': tf.io.FixedLenFeature([], tf.float32)
        }
        return tf.io.parse_single_example(example, features)

    # Process dataset
    dataset = tf.data.TFRecordDataset(train_tfrecord_path)
    dataset = dataset.map(parse_fn)
    
    for batch in dataset.batch(1000):  # Process in chunks
        # Inputs
        for region in ['UV', 'Vis', 'NIR']:
            key = f'ALBEDO_B-{region}'
            data = tf.sparse.to_dense(batch[key]).numpy()
            stats['inputs'][region]['sum'] += np.sum(data)
            stats['inputs'][region]['sq_sum'] += np.sum(data**2)
            stats['inputs'][region]['count'] += data.size
            
        # Outputs 
        for key in stats['outputs']:
            data = batch[key].numpy()
            stats['outputs'][key]['sum'] += np.sum(data)
            stats['outputs'][key]['sq_sum'] += np.sum(data**2)
            stats['outputs'][key]['count'] += data.size

    # Calculate final stats
    final_stats = {}
    
    # Input stats
    final_stats['inputs'] = {}
    for region in ['UV', 'Vis', 'NIR']:
        mean = stats['inputs'][region]['sum'] / stats['inputs'][region]['count']
        std = np.sqrt((stats['inputs'][region]['sq_sum'] / stats['inputs'][region]['count']) - mean**2)
        final_stats['inputs'][region] = {'mean': float(mean), 'std': float(std)}
    
    # Output stats
    final_stats['outputs'] = {}
    for key in stats['outputs']:
        mean = stats['outputs'][key]['sum'] / stats['outputs'][key]['count']
        std = np.sqrt((stats['outputs'][key]['sq_sum'] / stats['outputs'][key]['count']) - mean**2)
        final_stats['outputs'][key] = {'mean': float(mean), 'std': float(std)}
    
    # Save to JSON
    with open('normalization_stats.json', 'w') as f:
        json.dump(final_stats, f)
        
    return final_stats

# Run this once on your training data
# stats = compute_normalization_stats("../data/train.tfrecord")

# DATA LOADING PIPELINE IMPLEMENTATION

In [4]:
# Load statistics from Phase 1
with open('normalization_stats.json') as f:
    stats = json.load(f)

# Create lookup dictionaries for TF operations
input_stats = {
    'UV': (stats['inputs']['UV']['mean'], stats['inputs']['UV']['std']),
    'Vis': (stats['inputs']['Vis']['mean'], stats['inputs']['Vis']['std']),
    'NIR': (stats['inputs']['NIR']['mean'], stats['inputs']['NIR']['std'])
}

output_stats = {
    key: (stats['outputs'][key]['mean'], stats['outputs'][key]['std'])
    for key in stats['outputs']
}

In [5]:
def parse_example(example_proto, input_stats, output_stats):
    raw_input_features = {
        'ALBEDO_B-NIR': tf.io.VarLenFeature(tf.float32),
        'ALBEDO_B-UV': tf.io.VarLenFeature(tf.float32),
        'ALBEDO_B-Vis': tf.io.VarLenFeature(tf.float32),
    }

    raw_output_features = {
        "OBJECT-RADIUS-REL-EARTH": tf.io.FixedLenFeature([], tf.float32),
        "OBJECT-GRAVITY": tf.io.FixedLenFeature([], tf.float32),
        "ATMOSPHERE-TEMPERATURE": tf.io.FixedLenFeature([], tf.float32),
        "ATMOSPHERE-PRESSURE": tf.io.FixedLenFeature([], tf.float32),

        'log_C2H6': tf.io.FixedLenFeature([], tf.float32),
        'log_CH4': tf.io.FixedLenFeature([], tf.float32),
        'log_CO': tf.io.FixedLenFeature([], tf.float32),
        'log_CO2': tf.io.FixedLenFeature([], tf.float32),
        'log_H2O': tf.io.FixedLenFeature([], tf.float32),
        'log_N2': tf.io.FixedLenFeature([], tf.float32),
        'log_N2O': tf.io.FixedLenFeature([], tf.float32),
        'log_O2': tf.io.FixedLenFeature([], tf.float32),
        'log_O3': tf.io.FixedLenFeature([], tf.float32)
    }

    all_features = {**raw_input_features, **raw_output_features}
    parsed = tf.io.parse_single_example(example_proto, all_features)

    # ----------------------
    # Input Normalization
    # ----------------------
    normalized_inputs = {}
    for region in ['UV', 'Vis', 'NIR']:
        # Get raw spectrum
        spectrum = parsed[f'ALBEDO_B-{region}']
        if isinstance(spectrum, tf.SparseTensor):
            spectrum = tf.sparse.to_dense(spectrum, default_value=0.0)
        
        # Normalize using dataset statistics
        mean = input_stats[region][0]
        std = input_stats[region][1]
        normalized = (spectrum - mean) / std
        
        # Reshape to appropriate dimensions
        if region == 'UV':
            normalized = tf.reshape(normalized, [8, 1])
        elif region == 'Vis':
            normalized = tf.reshape(normalized, [94, 1])
        elif region == 'NIR':
            normalized = tf.reshape(normalized, [49, 1])
            
        normalized_inputs[f'ALBEDO_B-{region}'] = normalized

    # ----------------------
    # Output Processing
    # ----------------------
    processed_outputs = {}
    
    radius = parsed['OBJECT-RADIUS-REL-EARTH']
    processed_outputs['OBJECT-RADIUS-REL-EARTH'] = (radius - output_stats['OBJECT-RADIUS-REL-EARTH'][0]) / output_stats['OBJECT-RADIUS-REL-EARTH'][1]
    
    gravity = parsed['OBJECT-GRAVITY']
    processed_outputs['OBJECT-GRAVITY'] = (gravity - output_stats['OBJECT-GRAVITY'][0]) / output_stats['OBJECT-GRAVITY'][1]
    
    temperature = parsed['ATMOSPHERE-TEMPERATURE']
    processed_outputs['ATMOSPHERE-TEMPERATURE'] = (temperature - output_stats['ATMOSPHERE-TEMPERATURE'][0]) / output_stats['ATMOSPHERE-TEMPERATURE'][1]
    
    pressure = parsed['ATMOSPHERE-PRESSURE']
    processed_outputs['ATMOSPHERE-PRESSURE'] = (pressure - output_stats['ATMOSPHERE-PRESSURE'][0]) / output_stats['ATMOSPHERE-PRESSURE'][1]
    
    # Process chemical abundances (already in log scale)
    for chem in ['log_C2H6', 'log_CH4', 'log_CO', 'log_CO2', 'log_H2O', 'log_N2', 'log_N2O', 'log_O2', 'log_O3']:
        chem_value = parsed[chem]
        processed_outputs[f'{chem}'] = (chem_value - output_stats[f'{chem}'][0]) /  output_stats[f'{chem}'][1]

    return normalized_inputs, processed_outputs

In [6]:
def read_tfrecord(
    file_path, 
    input_stats, 
    output_stats, 
    batch_size=256, 
    shuffle_buffer=None,  # or shuffle=False
    repeat=False
):
    dataset = tf.data.TFRecordDataset(file_path, num_parallel_reads=tf.data.AUTOTUNE)

    if repeat:
        dataset = dataset.repeat(None)

    if shuffle_buffer is not None and shuffle_buffer > 0:
        dataset = dataset.shuffle(buffer_size=shuffle_buffer)

    parsed_dataset = dataset.map(
        lambda x: parse_example(x, input_stats, output_stats), 
        num_parallel_calls=tf.data.AUTOTUNE
    )

    parsed_dataset = parsed_dataset.batch(batch_size, drop_remainder=False)
    parsed_dataset = parsed_dataset.prefetch(tf.data.AUTOTUNE)
    
    # parsed_dataset = tf.data.Dataset.range(1).interleave(
    #     lambda _: parsed_dataset, 
    #     num_parallel_calls=tf.data.AUTOTUNE
    # )

    return parsed_dataset

In [7]:
train_ds = read_tfrecord("../data/train.tfrecord", input_stats, output_stats, shuffle_buffer=10000, repeat=True)
val_ds = read_tfrecord("../data/val.tfrecord", input_stats, output_stats)
test_ds = read_tfrecord("../data/test.tfrecord", input_stats, output_stats)

# Inspect a batch
sample_inputs, sample_outputs = next(iter(train_ds.take(1)))

print("\nInput Shapes:")
for k,v in sample_inputs.items():
    print(f"{k}: {v.shape}")  # Should be (batch_size, channels, 1)

print("\nOutput Ranges (standardized):")
for k,v in sample_outputs.items():
    print(f"{k}: {tf.reduce_mean(v):.3f} ± {tf.math.reduce_std(v):.3f}")


Input Shapes:
ALBEDO_B-UV: (256, 8, 1)
ALBEDO_B-Vis: (256, 94, 1)
ALBEDO_B-NIR: (256, 49, 1)

Output Ranges (standardized):
OBJECT-RADIUS-REL-EARTH: -0.029 ± 1.016
OBJECT-GRAVITY: -0.026 ± 1.023
ATMOSPHERE-TEMPERATURE: 0.031 ± 1.058
ATMOSPHERE-PRESSURE: -0.019 ± 1.032
log_C2H6: 0.694 ± 0.000
log_CH4: -1.259 ± 0.484
log_CO: -0.865 ± 0.546
log_CO2: -0.910 ± 0.690
log_H2O: -0.197 ± 0.776
log_N2: -0.711 ± 1.549
log_N2O: -0.454 ± 0.205
log_O2: 0.260 ± 0.529
log_O3: -0.413 ± 0.248


# MODEL ARCHITECTURE DESIGN

In [8]:
# Define a function that constructs and compiles a multi-output neural network model.
def build_model():
    # Define a list of output names. These correspond to the target variables the model will predict.
    outputs_list = [
        "OBJECT-RADIUS-REL-EARTH",
        "OBJECT-GRAVITY",
        "ATMOSPHERE-TEMPERATURE",
        "ATMOSPHERE-PRESSURE",
        'log_C2H6',
        'log_CH4',
        'log_CO',
        'log_CO2',
        'log_H2O',
        'log_N2',
        'log_N2O',
        'log_O2',
        'log_O3'
    ]

    # ----------------------
    # 1. Input Processing
    # ----------------------
    # Define separate input layers for each spectral range (UV, Visible, and NIR).
    # Each input layer has a specific shape corresponding to the dimensions of the input data.
    input_uv = tf.keras.layers.Input(shape=(8, 1), name='ALBEDO_B-UV')
    input_vis = tf.keras.layers.Input(shape=(94, 1), name='ALBEDO_B-Vis')
    input_nir = tf.keras.layers.Input(shape=(49, 1), name='ALBEDO_B-NIR')

    # ----------------------
    # 2. Spectral Feature Extraction Branches
    # ----------------------
    # Add L2 kernel regularization
    l2_reg = tf.keras.regularizers.l2(1e-4)

    # Define a helper function to create a feature extraction branch for each spectral range.
    # This function applies convolutional layers, batch normalization, skip connections, and pooling.
    def create_branch(input_layer, filters, kernel_size, pool_size=2):
        x = input_layer
        for f in filters:
            # Initial convolution with a smaller kernel (3x1) to capture local patterns.
            x_init = tf.keras.layers.Conv1D(f, 3, padding='same', 
                                        kernel_initializer='he_normal', kernel_regularizer=l2_reg)(x)
            x_init = tf.keras.layers.BatchNormalization()(x_init)
            
            # Main path: Apply a convolution with the specified kernel size.
            x = tf.keras.layers.Conv1D(f, kernel_size, padding='same',
                                    kernel_initializer='he_normal', kernel_regularizer=l2_reg)(x_init)
            x = tf.keras.layers.BatchNormalization()(x)
            x = tf.keras.layers.Activation('swish')(x)  # Swish activation for better performance.
            
            # Add a skip connection to combine the initial and main paths.
            x = tf.keras.layers.Add()([x_init, x])
            
            # Apply average pooling to downsample the feature maps.
            # Using strides=1 and padding='same' ensures less aggressive downsampling.
            x = tf.keras.layers.AveragePooling1D(
                pool_size=pool_size, 
                strides=1,  
                padding='same'
            )(x)
        return x

    # Create a feature extraction branch for the UV spectral range.
    uv_branch = create_branch(input_uv, filters=[16, 32], kernel_size=3)
    
    # Create a feature extraction branch for the Visible spectral range.
    vis_branch = create_branch(input_vis, filters=[32, 64, 128], kernel_size=7, pool_size=3)
    
    # Create a feature extraction branch for the NIR spectral range.
    nir_branch = create_branch(input_nir, filters=[32, 64], kernel_size=5)

    # ----------------------
    # 3. Cross-Spectral Attention Fusion
    # ----------------------
    # Define a Squeeze-and-Excitation (SE) block to enhance important features.
    def squeeze_excite(x, ratio=8):
        filters = x.shape[-1]  # Number of filters in the input feature map.
        se = tf.keras.layers.GlobalAveragePooling1D()(x)  # Global average pooling.
        se = tf.keras.layers.Dense(filters//ratio, activation='swish', kernel_regularizer=l2_reg)(se)  # Dense layer with Swish activation.
        se = tf.keras.layers.Dense(filters, activation='sigmoid', kernel_regularizer=l2_reg)(se)  # Dense layer with Sigmoid activation.
        return tf.keras.layers.Multiply()([x, se])  # Multiply the input feature map with the SE weights.

    # Apply the SE block to each spectral branch to enhance important features.
    uv_branch = squeeze_excite(uv_branch)
    vis_branch = squeeze_excite(vis_branch)
    nir_branch = squeeze_excite(nir_branch)

    # Concatenate the global average pooled features from all branches.
    merged = tf.keras.layers.concatenate([
        tf.keras.layers.GlobalAveragePooling1D()(uv_branch),
        tf.keras.layers.GlobalAveragePooling1D()(vis_branch),
        tf.keras.layers.GlobalAveragePooling1D()(nir_branch)
    ])

    # ----------------------
    # 4. Dense Processing with Skip Connections
    # ----------------------
    # Apply dense layers to process the concatenated features.
    x = tf.keras.layers.Dense(256, activation='swish', kernel_regularizer=l2_reg)(merged)
    x = tf.keras.layers.Dropout(0.4)(x)  # Dropout for regularization.
    x = tf.keras.layers.BatchNormalization()(x)  # Batch normalization for stability.
    
    x = tf.keras.layers.Dense(128, activation='swish', kernel_regularizer=l2_reg)(x)
    x = tf.keras.layers.Dropout(0.4)(x)
    x = tf.keras.layers.BatchNormalization()(x)

    # ----------------------
    # 5. Output Heads
    # ----------------------
    # Create separate output heads for each target variable.
    outputs = {}
    loss_weights = {}
    for output_name in outputs_list:
        # Add a dense layer with 64 units and Swish activation for each output head.
        head = tf.keras.layers.Dense(64, activation='swish', kernel_regularizer=l2_reg)(x)
        head = tf.keras.layers.Dropout(0.3)(head)
        # Add a final dense layer with 1 unit (for regression) and name it after the target variable.
        outputs[output_name] = tf.keras.layers.Dense(1, name=output_name)(head)
        
        # Assign loss weights based on the scale of the target variable.
        if 'log_' in output_name:
            loss_weights[output_name] = 1.0  # Higher weight for logarithmic outputs.
        else:
            loss_weights[output_name] = 0.5  # Lower weight for linear outputs.

    # ----------------------
    # 6. Model Compilation
    # ----------------------
    # Define the model by specifying its inputs and outputs.
    model = tf.keras.Model(
        inputs=[input_uv, input_vis, input_nir],
        outputs=outputs
    )

    # Define loss functions for each output.
    losses = {output_name: tf.keras.losses.MeanSquaredError() for output_name in outputs_list}

    # Define the optimizer with a custom learning rate, weight decay, and gradient clipping.
    optimizer = tf.keras.optimizers.AdamW(
        learning_rate=1e-5,  # Start with a lower learning rate.
        weight_decay=1e-5,  # Weight decay for regularization.
        global_clipnorm=1.0  # Gradient clipping to prevent exploding gradients.
    )

    # Compile the model with the optimizer, loss functions, and loss weights.
    model.compile(
        optimizer=optimizer,
        loss=losses, 
        loss_weights=loss_weights,  # Assign weights to balance the losses.
    )

    # Return the compiled model.
    return model

In [9]:
model = build_model()
model.summary()

# TRAINING STRATEGY & OPTIMIZATION

In [10]:
EPOCHS = 100

callbacks = [
    tf.keras.callbacks.EarlyStopping(
        monitor='val_loss',
        patience=10,
        min_delta=0.001,
        restore_best_weights=True
    ),
    tf.keras.callbacks.ReduceLROnPlateau(
        monitor='val_loss',
        factor=0.5,
        patience=5,
        min_lr=1e-7
    ),

]

# Final training call
history = model.fit(
    train_ds,
    epochs=EPOCHS,
    steps_per_epoch=steps_per_epoch,
    validation_data=val_ds,
    callbacks=callbacks,
)

Epoch 1/100
[1m3199/3199[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m662s[0m 202ms/step - ATMOSPHERE-PRESSURE_loss: 0.8202 - ATMOSPHERE-TEMPERATURE_loss: 1.0015 - OBJECT-GRAVITY_loss: 0.8152 - OBJECT-RADIUS-REL-EARTH_loss: 0.7957 - log_C2H6_loss: 2.0808 - log_CH4_loss: 2.0327 - log_CO2_loss: 1.5893 - log_CO_loss: 1.2486 - log_H2O_loss: 1.5447 - log_N2O_loss: 1.0436 - log_N2_loss: 2.3636 - log_O2_loss: 1.4644 - log_O3_loss: 1.3258 - loss: 18.4332 - val_ATMOSPHERE-PRESSURE_loss: 0.3273 - val_ATMOSPHERE-TEMPERATURE_loss: 0.5122 - val_OBJECT-GRAVITY_loss: 0.3497 - val_OBJECT-RADIUS-REL-EARTH_loss: 0.3429 - val_log_C2H6_loss: 0.6485 - val_log_CH4_loss: 0.8097 - val_log_CO2_loss: 0.8243 - val_log_CO_loss: 0.8887 - val_log_H2O_loss: 0.6927 - val_log_N2O_loss: 0.6999 - val_log_N2_loss: 1.0828 - val_log_O2_loss: 0.8179 - val_log_O3_loss: 0.7995 - val_loss: 9.1433 - learning_rate: 1.0000e-05
Epoch 2/100


  self.gen.throw(typ, value, traceback)


[1m3199/3199[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m650s[0m 203ms/step - ATMOSPHERE-PRESSURE_loss: 0.4567 - ATMOSPHERE-TEMPERATURE_loss: 0.6691 - OBJECT-GRAVITY_loss: 0.4538 - OBJECT-RADIUS-REL-EARTH_loss: 0.4629 - log_C2H6_loss: 0.8562 - log_CH4_loss: 1.2454 - log_CO2_loss: 1.1121 - log_CO_loss: 0.9244 - log_H2O_loss: 0.9599 - log_N2O_loss: 0.6646 - log_N2_loss: 1.7841 - log_O2_loss: 0.8740 - log_O3_loss: 0.7263 - loss: 11.4933 - val_ATMOSPHERE-PRESSURE_loss: 0.3215 - val_ATMOSPHERE-TEMPERATURE_loss: 0.5331 - val_OBJECT-GRAVITY_loss: 0.3005 - val_OBJECT-RADIUS-REL-EARTH_loss: 0.2965 - val_log_C2H6_loss: 0.6462 - val_log_CH4_loss: 0.8800 - val_log_CO2_loss: 0.7694 - val_log_CO_loss: 0.6720 - val_log_H2O_loss: 0.7205 - val_log_N2O_loss: 0.5522 - val_log_N2_loss: 0.9383 - val_log_O2_loss: 0.7811 - val_log_O3_loss: 0.5693 - val_loss: 8.3143 - learning_rate: 1.0000e-05
Epoch 3/100
[1m3199/3199[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m650s[0m 203ms/step - ATMOSPHERE-PRESSU

KeyboardInterrupt: 