# LEAP - Atmospheric Physiscs using AI
1. Data Pipeline
2. Model
3. Training

## Set up the Notebook:

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
!pip install lightning

Collecting lightning
  Downloading lightning-2.5.0.post0-py3-none-any.whl.metadata (40 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/40.4 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m40.4/40.4 kB[0m [31m3.2 MB/s[0m eta [36m0:00:00[0m
Collecting lightning-utilities<2.0,>=0.10.0 (from lightning)
  Downloading lightning_utilities-0.12.0-py3-none-any.whl.metadata (5.6 kB)
Collecting torchmetrics<3.0,>=0.7.0 (from lightning)
  Downloading torchmetrics-1.6.1-py3-none-any.whl.metadata (21 kB)
Collecting pytorch-lightning (from lightning)
  Downloading pytorch_lightning-2.5.0.post0-py3-none-any.whl.metadata (21 kB)
Downloading lightning-2.5.0.post0-py3-none-any.whl (815 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m815.2/815.2 kB[0m [31m50.2 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading lightning_utilities-0.12.0-py3-none-any.whl (28 kB)
Downloading torchmetrics-1.6.1-py3-none-any

In [3]:
import os
import torch
from torch import nn
from torch.utils.data import Dataset, DataLoader, random_split
from torchvision import datasets, transforms
import lightning as L
import torch.nn.functional as F
from lightning.pytorch.loggers import TensorBoardLogger
from lightning.pytorch.callbacks import EarlyStopping, ModelCheckpoint
from torchmetrics import MeanSquaredError
from lightning.pytorch import Trainer
#from mapping_dict import input_to_target_mapping
from tensorboard.backend.event_processing.event_accumulator import EventAccumulator
import pandas as pd

train_data_path = "/content/drive/MyDrive/LEAP_project/train_dataset.parquet"
test_data_path = "/content/drive/MyDrive/LEAP_project/test_dataset.parquet"

# Setting the seed
L.seed_everything(114)

INFO: Seed set to 114
INFO:lightning.fabric.utilities.seed:Seed set to 114


114

## 1. Data Module to load, normalize and transform the data:

In [9]:
class LEAPDataset(Dataset):
    def __init__(self, data_file, target_name, seed=42):
        if not os.path.exists(data_file):
            raise FileNotFoundError(f"Data file '{data_file}' not found.")
        if target_name not in input_to_target_mapping:
            raise ValueError(f"Target '{target_name}' not found in mapping.")


        # Step 1: Load the dataset
        self.data = pd.read_parquet(data_file)

        # Step 2: Pick corresponding inputs and targets
        self.inputs = self.data.iloc[:,input_to_target_mapping[target_name][0]]
        self.targets = self.data.iloc[:,input_to_target_mapping[target_name][1]]

        # Step 3: Compute column-wise max and min for normalization
        self.col_max = self.inputs.max(axis=0)  # Max of each column
        self.col_min = self.inputs.min(axis=0) # Min of each column
        self.col_max_target = self.targets.max(axis=0)
        self.col_min_target = self.targets.min(axis=0)



    def __len__(self):
        return len(self.inputs)

    def __getitem__(self, idx):
        # Extract features and targets for the given index
        features = self.inputs.iloc[idx].values
        targets = self.targets.iloc[idx].values

        # Normalize features and targets

        epsilon = 1e-8
        features = 2 * (features - self.col_min) / (self.col_max - self.col_min + epsilon) - 1
        targets = 2 * (targets - self.col_min_target) / (self.col_max_target - self.col_min_target + epsilon) - 1
        # Convert to tensors
        features = torch.tensor(features, dtype=torch.float32)
        targets = torch.tensor(targets, dtype=torch.float32)

        return features, targets

# Data Module

class LEAPDataModule(L.LightningDataModule):
    def __init__(self, data_file, target_name, batch_size=64, val_size=0.2, test_data_file=None):
        super().__init__()
        self.data_file = data_file
        self.target_name = target_name
        self.batch_size = batch_size
        self.val_size = val_size
        self.test_data_file = test_data_file

    def setup(self, stage=None):
        # Load the dataset
        specific_dataset = LEAPDataset(self.data_file, self.target_name)

        # Split into train and validation sets
        val_size = int(self.val_size * len(specific_dataset))
        train_size = len(specific_dataset) - val_size

        if stage == 'fit' or stage is None:
            self.LEAP_train, self.LEAP_val = random_split(specific_dataset, [train_size, val_size])

        if stage == 'test' or stage is None:
            self.LEAP_test = LEAPDataset(self.test_data_file, self.target_name)

    def train_dataloader(self):
        return DataLoader(self.LEAP_train, batch_size=self.batch_size, shuffle=True)

    def val_dataloader(self):
        return DataLoader(self.LEAP_val, batch_size=self.batch_size)

    def test_dataloader(self):
        return DataLoader(self.LEAP_test, batch_size=self.batch_size)

## 2. Model Definition:

In [5]:
# setting up a MLP
class MLP(nn.Module):
    def __init__(self, input_dim, output_dim, hidden_dims=[512, 64]):
        super(MLP, self).__init__()
        layers = []
        prev_dim = input_dim

        # Hidden layers
        for hidden_dim in hidden_dims:
            layers.append(nn.Linear(prev_dim, hidden_dim))
            layers.append(nn.ReLU())
            prev_dim = hidden_dim

        # Output layer
        layers.append(nn.Linear(prev_dim, output_dim))

        self.network = nn.Sequential(*layers)

    def forward(self, x):
        return self.network(x)

In [6]:
#setting up a transformer:
class TransformerModel(nn.Module):
    def __init__(self, input_dim, output_dim, d_model=128, num_heads=4, num_layers=2, dim_feedforward=512, dropout=0.1):
        super(TransformerModel, self).__init__()

        self.input_dim = input_dim
        self.output_dim = output_dim
        self.d_model = d_model

        # Linear projection of input features to d_model
        self.input_projection = nn.Linear(input_dim, d_model)

        # Transformer Encoder Layer
        encoder_layer = nn.TransformerEncoderLayer(d_model=d_model, nhead=num_heads, dim_feedforward=dim_feedforward, dropout=dropout)
        self.transformer_encoder = nn.TransformerEncoder(encoder_layer, num_layers=num_layers)

        # Output Projection
        self.output_projection = nn.Linear(d_model, output_dim)

    def forward(self, x):
        # Project input to d_model dimensions
        x = self.input_projection(x)

        # Pass through Transformer Encoder
        x = self.transformer_encoder(x)

        # Project to output dimension
        x = self.output_projection(x)

        return x

In [7]:
#building a LEAP family of models:
class LEAP_fam(L.LightningModule):
    def __init__(self, target_name, input_to_target_mapping, model_type="mlp", learning_rate=1e-3):
        super(LEAP_fam, self).__init__()

        # Fetch input-output sizes from the mapping for the target
        input_indices, output_indices = input_to_target_mapping[target_name]
        input_dim = len(input_indices)
        output_dim = len(output_indices)

        # Choose model type
        if model_type == "transformer":
            self.model = TransformerModel(input_dim=input_dim, output_dim=output_dim)
        else:
            self.model = MLP(input_dim=input_dim, output_dim=output_dim, hidden_dims=[512, 64])  # Default to MLP


        # Define loss function and metric
        self.loss_fn = nn.MSELoss()  # Or another loss function depending on the task
        self.metric = MeanSquaredError()

        # For logging
        self.target_name = target_name

        # Initialize storage for test outputs
        self.test_outputs = []  # Ensure this attribute is initialized here
        self.train_losses = []  # To store batch-level training losses

    def forward(self, x):
        return self.model(x)

    def training_step(self, batch, batch_idx):
        inputs, targets = batch
        outputs = self(inputs)
        loss = self.loss_fn(outputs, targets)

        # Store the training loss for this batch
        self.train_losses.append(loss.item())
        self.log(f"{self.target_name}_train_loss", loss)

        return loss

    def on_training_epoch_end(self):
        # Compute the average loss for the epoch
        avg_train_loss = sum(self.train_losses) / len(self.train_losses)

        # Log the average training loss for the epoch
        self.log(f"{self.target_name}_train_loss_epoch", avg_train_loss)

        # Reset the list for the next epoch
        self.train_losses = []

    def validation_step(self, batch, batch_idx):
        inputs, targets = batch
        outputs = self(inputs)
        loss = self.loss_fn(outputs, targets)
        self.log(f"{self.target_name}_val_loss", loss)
        self.metric(outputs, targets)
        return loss

    def on_validation_epoch_end(self):
        # Log metric value for the entire validation epoch
        self.log(f"{self.target_name}_val_loss_epoch", self.metric.compute())
        # Reset the metric for the next epoch
        self.metric.reset()

    def test_step(self, batch, batch_idx):
        inputs, targets = batch
        outputs = self(inputs)
        loss = self.loss_fn(outputs, targets)
        self.log(f"{self.target_name}_test_loss", loss)

        # Accumulate predictions and targets for further processing
        self.test_outputs.append({"preds": outputs.detach(), "targets": targets.detach()})

        return {"preds": outputs.detach(), "targets": targets.detach()}

    def on_test_epoch_end(self):
        # Gather all predictions and targets from the accumulated outputs
        all_preds = torch.cat([out["preds"] for out in self.test_outputs], dim=0)
        all_targets = torch.cat([out["targets"] for out in self.test_outputs], dim=0)

        # Save predictions and targets as attributes for later access
        self.test_predictions = all_preds
        self.test_targets = all_targets

        # Clear the accumulated outputs
        self.test_outputs = []

    def configure_optimizers(self):
        optimizer = torch.optim.Adam(self.parameters(), lr=1e-3)
        return optimizer

## 3. Training

### Temperature:
- heating tendency  
(not shown here, training done in another notebook. Results: see project_summary.png)

### Clouds:
- moistening tendency
- change in liquid cloud mixing ratio
- change in ice cloud mixing ratio

In [None]:
#pick the target and model type to train here:
target_to_train = "moistening_and_clouds"
model_type = "transformer"
if __name__ == '__main__':
    batch_size = 1024 #choose batch size
    target_name = target_to_train

    # Data Module
    data_module = LEAPDataModule(
        data_file=train_data_path,
        target_name=target_name,
        batch_size=batch_size,
        val_size=0.2
    )

    # Model
    model = LEAP_fam(target_name=target_name, input_to_target_mapping=input_to_target_mapping, model_type=model_type)

    # Callbacks
    early_stopping = EarlyStopping(
        monitor=f"{target_name}_val_loss",
        patience=5,
        mode="min",
        verbose=True
    )
    model_checkpoint = ModelCheckpoint(
        monitor=f"{target_name}_val_loss",
        save_top_k=1,
        mode='min',
        dirpath="/content/drive/MyDrive/LEAP_Project/saved_models",
        filename=f"LEAPfam-{target_name}_best_model"
    )

    # TensorBoard Logger
    logger = TensorBoardLogger(
        save_dir="/content/drive/MyDrive/LEAP_Project/logs",
        name=f"LEAP_model_logs_{target_name}",
        default_hp_metric=False
    )

    # Trainer
    trainer = L.Trainer(
        accelerator="auto",
        devices="auto",
        max_epochs=50,
        logger=logger,
        callbacks=[early_stopping, model_checkpoint]
    )

    # Train
    trainer.fit(model, data_module)

INFO: GPU available: True (cuda), used: True
INFO:lightning.pytorch.utilities.rank_zero:GPU available: True (cuda), used: True
INFO: TPU available: False, using: 0 TPU cores
INFO:lightning.pytorch.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO: HPU available: False, using: 0 HPUs
INFO:lightning.pytorch.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO: LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:lightning.pytorch.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO: 
  | Name    | Type             | Params | Mode 
-----------------------------------------------------
0 | model   | TransformerModel | 438 K  | train
1 | loss_fn | MSELoss          | 0      | train
2 | metric  | MeanSquaredError | 0      | train
-----------------------------------------------------
438 K     Trainable params
0         Non-trainable params
438 K     Total params
1.753     Total estimated model params size (MB)
27        Modules in train mode
0         Modules in 

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

  features = torch.tensor(features, dtype=torch.float32)
  targets = torch.tensor(targets, dtype=torch.float32)


Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Metric moistening_and_clouds_val_loss improved. New best score: 0.004
INFO:lightning.pytorch.callbacks.early_stopping:Metric moistening_and_clouds_val_loss improved. New best score: 0.004


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Metric moistening_and_clouds_val_loss improved by 0.000 >= min_delta = 0.0. New best score: 0.003
INFO:lightning.pytorch.callbacks.early_stopping:Metric moistening_and_clouds_val_loss improved by 0.000 >= min_delta = 0.0. New best score: 0.003


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Metric moistening_and_clouds_val_loss improved by 0.000 >= min_delta = 0.0. New best score: 0.003
INFO:lightning.pytorch.callbacks.early_stopping:Metric moistening_and_clouds_val_loss improved by 0.000 >= min_delta = 0.0. New best score: 0.003


### Wind:
- zonal acceleration
- meridional acceleration
(not shown here, training done in another notebook. Results: see project_summary.png)

### Radiation:
LW radiation

In [10]:
# Train the models
#pick the target and model type to train here:
target_to_train = "FLWDS"
model_type = "mlp"
if __name__ == '__main__':
    batch_size = 1024 #choose batch size
    target_name = target_to_train

    # Data Module
    data_module = LEAPDataModule(
        data_file=train_data_path,
        target_name=target_name,
        batch_size=batch_size,
        val_size=0.2
    )

    # Model
    model = LEAP_fam(target_name=target_name, input_to_target_mapping=input_to_target_mapping, model_type=model_type)

    # Callbacks
    early_stopping = EarlyStopping(
        monitor=f"{target_name}_val_loss",
        patience=5,
        mode="min",
        verbose=True
    )
    model_checkpoint = ModelCheckpoint(
        monitor=f"{target_name}_val_loss",
        save_top_k=1,
        mode='min',
        dirpath="/content/drive/MyDrive/LEAP_Project/saved_models",
        filename=f"LEAPfam-{target_name}_best_model"
    )

    # TensorBoard Logger
    logger = TensorBoardLogger(
        save_dir="/content/drive/MyDrive/LEAP_Project/logs",
        name=f"LEAP_model_logs_{target_name}",
        default_hp_metric=False
    )

    # Trainer
    trainer = L.Trainer(
        accelerator="auto",
        devices="auto",
        max_epochs=50,
        logger=logger,
        callbacks=[early_stopping, model_checkpoint]
    )

    # Train
    trainer.fit(model, data_module)

INFO: GPU available: False, used: False
INFO:lightning.pytorch.utilities.rank_zero:GPU available: False, used: False
INFO: TPU available: False, using: 0 TPU cores
INFO:lightning.pytorch.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO: HPU available: False, using: 0 HPUs
INFO:lightning.pytorch.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO: 
  | Name    | Type             | Params | Mode 
-----------------------------------------------------
0 | model   | MLP              | 251 K  | train
1 | loss_fn | MSELoss          | 0      | train
2 | metric  | MeanSquaredError | 0      | train
-----------------------------------------------------
251 K     Trainable params
0         Non-trainable params
251 K     Total params
1.006     Total estimated model params size (MB)
9         Modules in train mode
0         Modules in eval mode
INFO:lightning.pytorch.callbacks.model_summary:
  | Name    | Type             | Params | Mode 
-----------------------------------

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

  features = torch.tensor(features, dtype=torch.float32)
  targets = torch.tensor(targets, dtype=torch.float32)


Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Metric FLWDS_val_loss improved. New best score: 0.006
INFO:lightning.pytorch.callbacks.early_stopping:Metric FLWDS_val_loss improved. New best score: 0.006


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Metric FLWDS_val_loss improved by 0.001 >= min_delta = 0.0. New best score: 0.005
INFO:lightning.pytorch.callbacks.early_stopping:Metric FLWDS_val_loss improved by 0.001 >= min_delta = 0.0. New best score: 0.005


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Metric FLWDS_val_loss improved by 0.002 >= min_delta = 0.0. New best score: 0.003
INFO:lightning.pytorch.callbacks.early_stopping:Metric FLWDS_val_loss improved by 0.002 >= min_delta = 0.0. New best score: 0.003


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Metric FLWDS_val_loss improved by 0.000 >= min_delta = 0.0. New best score: 0.003
INFO:lightning.pytorch.callbacks.early_stopping:Metric FLWDS_val_loss improved by 0.000 >= min_delta = 0.0. New best score: 0.003


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Metric FLWDS_val_loss improved by 0.000 >= min_delta = 0.0. New best score: 0.002
INFO:lightning.pytorch.callbacks.early_stopping:Metric FLWDS_val_loss improved by 0.000 >= min_delta = 0.0. New best score: 0.002


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Metric FLWDS_val_loss improved by 0.000 >= min_delta = 0.0. New best score: 0.002
INFO:lightning.pytorch.callbacks.early_stopping:Metric FLWDS_val_loss improved by 0.000 >= min_delta = 0.0. New best score: 0.002


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Metric FLWDS_val_loss improved by 0.000 >= min_delta = 0.0. New best score: 0.002
INFO:lightning.pytorch.callbacks.early_stopping:Metric FLWDS_val_loss improved by 0.000 >= min_delta = 0.0. New best score: 0.002


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Metric FLWDS_val_loss improved by 0.000 >= min_delta = 0.0. New best score: 0.002
INFO:lightning.pytorch.callbacks.early_stopping:Metric FLWDS_val_loss improved by 0.000 >= min_delta = 0.0. New best score: 0.002


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Monitored metric FLWDS_val_loss did not improve in the last 5 records. Best score: 0.002. Signaling Trainer to stop.
INFO:lightning.pytorch.callbacks.early_stopping:Monitored metric FLWDS_val_loss did not improve in the last 5 records. Best score: 0.002. Signaling Trainer to stop.


### Precipitation:
- rain rate
- snow rate
(not shown here, training done in another notebook. Results: see project_summary.png)