# Example of how to use our modules

## PreProcess

In [1]:
from preprocess import ProcessingConfig, HDF5Processor
from neuralop.data.transforms.normalizers import UnitGaussianNormalizer

config = ProcessingConfig(
        test_split=.2,
        chunk_size=100,
        subsample_rate=2,
        start_idx=1000,    # Start from the 1000th timestep
        end_idx=5000,       # End at the 5000th timestep
        normalizer = UnitGaussianNormalizer(dim=[0,2,3]),
        normalizer_size = 100,
        output_performance_report=True
    )
    
processor = HDF5Processor("/Users/anthonypoole/Repositories/hw_snapshots.h5")
metadata = processor.process_file(
    train_output_file="train_processed.h5",
    test_output_file="test_processed.h5",
    config=config
)
print(f"Processed file metadata: {metadata}")

Original data - Mean: -0.0000, Std: 0.6839
Normalized data - Mean: -0.0000, Std: 0.6539

Performance Report:
normalization_time: 61.83 seconds
subsample_time: 53.34 seconds
train_save_time: 8.36 seconds
test_save_time: 2.26 seconds
split_and_save_time: 10.63 seconds
total_processing_time: 125.90 seconds
Processed file metadata: {'train': {'n_chunks': 16, 'chunk_size': 100, 'processed_shape': (16, 3, 512, 512, 100), 'actual_timesteps': 1600, 'split': 'train', 'invariants': ['$\\Gamma_c$', '$\\Gamma_n$', '$\\mathcal{D}^E$', '$\\mathcal{D}^U$', 'energy', 'enstrophy', 'time']}, 'test': {'n_chunks': 4, 'chunk_size': 100, 'processed_shape': (4, 3, 512, 512, 100), 'actual_timesteps': 400, 'split': 'test', 'invariants': ['$\\Gamma_c$', '$\\Gamma_n$', '$\\mathcal{D}^E$', '$\\mathcal{D}^U$', 'energy', 'enstrophy', 'time']}, 'total_chunks': 20, 'subsample_rate': 2, 'invariants_processed': ['$\\Gamma_c$', '$\\Gamma_n$', '$\\mathcal{D}^E$', '$\\mathcal{D}^U$', 'energy', 'enstrophy', 'time']}


## S3 utils

In [3]:
import os
from s3_utils import check_env_variables, upload, download

# # Set the environment variables (for demonstration purposes)
# os.environ['AWS_ACCESS_KEY_ID'] = 'your_access_key_id'
# os.environ['AWS_SECRET_ACCESS_KEY'] = 'your_secret_access_key'
# os.environ['AWS_REGION'] = 'us-east-2'

# Check AWS environment variables
try:
    check_env_variables()
except EnvironmentError as e:
    print(e)
    exit(1)

# Upload example
upload("/Users/anthonypoole/Repositories/test2.txt", "fnobucket",) 

# Download example
download("test2.txt", "fnobucket", ".")

All required AWS environment variables are set.
Uploaded /Users/anthonypoole/Repositories/test2.txt to s3://fnobucket/test2.txt
Downloaded s3://fnobucket/test2.txt to ./test2.txt


## Create Dataset and Loaders

In [2]:
from data_utils import CustomDataset, create_dataloaders

# Create dataloader
train_loader, test_loader = create_dataloaders(
    train_file="train_processed.h5",
    test_file="test_processed.h5",
    batch_size=2,
    input_size=20,
    drop_last=True,
)

# Example iteration
for batch in test_loader:
    x = batch['x']  # Shape: (batch_size, channels, x_dim, y_dim, input_size)
    y = batch['y']  # Shape: (batch_size, channels, x_dim, y_dim, chunk_size - input_size)
    der_x = batch['der_x']  # Dict of tensors, each shape: (batch_size, input_size, n_invariants)
    der_y = batch['der_y']  # Dict of tensors, each shape: (batch_size, chunk_size - input_size, n_invariants)
    
    print(x.shape)
    print(y.shape)
    print(der_x.shape)
    print(der_y.shape)
    break

torch.Size([2, 3, 512, 512, 20])
torch.Size([2, 3, 512, 512, 80])
torch.Size([2, 7, 20])
torch.Size([2, 7, 80])


## Model Training

In [None]:
import os
import torch
import matplotlib.pyplot as plt
import sys
from datetime import datetime
from neuralop.models import TFNO
from neuralop import Trainer
from neuralop.training import AdamW
from neuralop.data.datasets import load_darcy_flow_small
from neuralop.utils import count_model_params
from neuralop import LpLoss, H1Loss
import wandb

# Clear CUDA cache and set device
torch.cuda.empty_cache()
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(f'Device: {device}')

# Model parameters
model_params = {
    'n_modes': (1, 16, 16),
    'in_channels': 1,
    'out_channels': 1,
    'hidden_channels': 16,
    'projection_channels': 64,
    'factorization': 'tucker',
    'rank': 0.42,
    'epochs': 150,
}
optimizer_params = {
    "lr": 1e-44,
    "weight_decay": 1e-4,
    "betas": (0.95, 0.999),
}

# Create model
model = TFNO(**model_params)
model = model.to(device)
n_params = count_model_params(model)
print(f'\nOur model has {n_params:,} parameters.')
sys.stdout.flush()

# Training setup
optimizer = AdamW(model.parameters(), **optimizer_params)

scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=30)

# Loss setup
l2loss = LpLoss(d=3, p=2)
h1loss = H1Loss(d=3)
train_loss = h1loss
eval_losses = {'h1': h1loss, 'l2': l2loss}

# Print setup information
print('\n### MODEL ###\n', model)
print('\n### OPTIMIZER ###\n', optimizer)
print('\n### SCHEDULER ###\n', scheduler)
print('\n### LOSSES ###')
print(f'\n * Train: {train_loss}')
print(f'\n * Test: {eval_losses}')
sys.stdout.flush()

# Check to ensure that we have a valid env varibale for wanbd
try:
    os.environ['WANDB_API_KEY']
except:
    print("There is no env varible for our wandb")
    raise KeyError

# Initialize wandb
wandb.init(
    project="fno",
)

# Create trainer
trainer = Trainer(model=model,
                 n_epochs=model_params['epochs'],
                 device=device,
                 wandb_log=True,
                 eval_interval=3,
                 use_distributed=False,
                 verbose=True,
                 #mixed_precision=True
                  )

# Train model
trainer.train(train_loader=train_loader,
             test_loaders=test_loader,
             optimizer=optimizer,
             scheduler=scheduler,
             regularizer=False,
             training_loss=train_loss,
             eval_losses=eval_losses,
             )

# Finish the run
wandb.finish()

# Function to generate unique save path
def get_unique_save_path(base_path):
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    model_name = f"TFNO_ch{model_params['in_channels']}to{model_params['out_channels']}_" \
                 f"hidden{model_params['hidden_channels']}_" \
                 f"proj{model_params['projection_channels']}_" \
                 f"modes{'x'.join(map(str, model_params['n_modes']))}"

    full_path = os.path.join(base_path, f"{model_name}_{timestamp}")

    # Ensure uniqueness
    counter = 1
    while os.path.exists(f"{full_path}.pt"):
        full_path = os.path.join(base_path, f"{model_name}_{timestamp}_{counter}")
        counter += 1

    return f"{full_path}.pt"


# Save model
save_base_path = '/content/drive/MyDrive'
save_path = get_unique_save_path(save_base_path)

# Ensure the directory exists
os.makedirs(os.path.dirname(save_path), exist_ok=True)

# Save model with metadata
torch.save({
    'model_state_dict': model.state_dict(),
    'optimizer_state_dict': optimizer.state_dict(),
    'model_params': model_params,
}, save_path)

print(f"Model saved to {save_path}")