<center><h1>Deep Learning Pipeline</h1></center>

In [1]:
# Data Manipulation
import pandas as pd
import json
from datetime import datetime

pd.set_option('display.max_columns', None)

# System Settings
import warnings
import sys
import os

sys.path.append(os.path.abspath(os.path.join(os.getcwd(), '..')))
sys.path.append(os.path.abspath('../atmoseer'))
sys.path.append(os.path.abspath('../utils'))
warnings.filterwarnings("ignore")

# Data Visualization
import matplotlib.pyplot as plt
import seaborn as sns

# Custom Database Operations
from utils.postgres_processor import load_table

# Deep Learning Operations
import torch
from atmoseer.atmoseer_core import BayesianTuner
from atmoseer.preprocessors.atmoseer_preprocessor import AtmoSeerPreprocessor
from atmoseer.configs.atmoseer_config import BayesianTunerConfig
from atmoseer.evaluation.atmoseer_eval import EvaluateAtmoSeer

<center><h1>Carbon Dioxide (CO<sub>2</sub>)</h1></center>

In [3]:
co2_df = load_table("CO2DataNOAA", "postgres", "Godragons6")
print(co2_df.shape)
co2_df.head().style.format(precision=2).set_properties(**{'border': '1.5px solid blue'}).background_gradient(cmap='coolwarm')

(201850, 17)


Unnamed: 0,date,site,ppm,latitude,longitude,altitude,year,month,day,season,co2_change_rate,month_sin,month_cos,ppm_lag_14,ppm_lag_30,ppm_lag_365,biomass_density
0,1968-01-16,NWR,324.38,40.05,-105.63,3526.0,1968,1,16,Winter,1.5,0.5,0.87,324.38,324.38,324.38,328352893.44
1,1968-01-17,NWR,323.69,40.05,-105.63,3526.0,1968,1,17,Winter,-0.16,0.5,0.87,324.38,324.38,324.38,328352893.44
2,1968-02-29,NWR,325.53,40.05,-105.63,3526.0,1968,2,29,Winter,1.84,0.87,0.5,324.38,324.38,324.38,736124172.48
3,1968-03-07,NWR,326.49,40.05,-105.63,3526.0,1968,3,7,Spring,0.96,1.0,0.0,324.38,324.38,324.38,1143895451.52
4,1968-03-14,NWR,326.09,40.05,-105.63,3526.0,1968,3,14,Spring,-0.4,1.0,0.0,324.38,324.38,324.38,1143895451.52


## Preprocessing

In [3]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

Using device: cuda


In [None]:
preprocessor = AtmoSeerPreprocessor()
co2_dataloaders = preprocessor.prepare_data(co2_df)

print("Initializing tuner")

tuner_config = BayesianTunerConfig(gas_type='co2')

tuner = BayesianTuner(
    train_loader=co2_dataloaders['train_loader'],
    val_loader=co2_dataloaders['val_loader'],
    config=tuner_config
)

Missing value counts:
biomass_density    6090
dtype: int64
Initializing tuner


## Train & Tune

In [4]:
print(f"Starting optimization process ({tuner_config.n_trials} trials)...")
best_params, best_loss = tuner.optimize()

print("\nOptimization completed!")
print(f"Best validation loss: {best_loss:.6f}")
print("\nBest parameters:")
for param, value in best_params.items():
    print(f"{param}: {value}")

Starting optimization process (16 trials)...

Trial 1/16
Epoch 1/30
Epoch 0: New best validation loss: 0.151055
Epoch 2/30
Epoch 1: New best validation loss: 0.102529
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Early stopping triggered at epoch 8. Best epoch was 1 with validation loss 0.102529

Trial 2/16
Epoch 1/30
Epoch 0: New best validation loss: 0.104868
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Early stopping triggered at epoch 7. Best epoch was 0 with validation loss 0.104868

Trial 3/16
Epoch 1/30
Epoch 0: New best validation loss: 0.114491
Epoch 2/30
Epoch 1: New best validation loss: 0.102654
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Early stopping triggered at epoch 8. Best epoch was 1 with validation loss 0.102654

Trial 4/16
Epoch 1/30
Epoch 0: New best validation loss: 0.106843
Epoch 2/30
Epoch 3/30
Epoch 2: New best validation loss: 0.106198
Epoch 4/30
Epoch 3: New best

## Test

In [6]:
preprocessor = AtmoSeerPreprocessor()
co2_dataloaders = preprocessor.prepare_data(co2_df)

best_co2_model = BayesianTuner.load_best_model(gas_type='co2')
best_co2_model.prepare_prediction_defaults(co2_df)

dates_to_predict = [
    "06/1996",
    datetime.now().strftime("%m/%Y"),
    f"{datetime.now().month}/{datetime.now().year + 1}"
]

def test_model_responses(model, test_loader):
    """Test if model responds to different inputs"""
    model.eval()
    device = next(model.parameters()).device
    
    # Get two different batches
    batch1 = next(iter(test_loader))
    batch2 = next(iter(test_loader))
    
    with torch.no_grad():
        # Test first batch
        X1, y1 = batch1
        X1 = X1.to(device)
        out1 = model(X1)
        
        # Test second batch
        X2, y2 = batch2
        X2 = X2.to(device)
        out2 = model(X2)
        
        print("\nModel Response Test:")
        print(f"Input 1 mean: {X1.mean().item():.6f}")
        print(f"Input 2 mean: {X2.mean().item():.6f}")
        print(f"Output 1 mean: {out1.mean().item():.6f}")
        print(f"Output 2 mean: {out2.mean().item():.6f}")
        print(f"Output 1 std: {out1.std().item():.6f}")
        print(f"Output 2 std: {out2.std().item():.6f}")
        
        # Check if model weights seem normal
        print("\nModel Weight Check:")
        total_params = 0
        zero_params = 0
        for name, param in model.named_parameters():
            param_data = param.data
            zeros = (param_data == 0).sum().item()
            total = param_data.numel()
            print(f"{name}: mean={param_data.mean().item():.6f}, std={param_data.std().item():.6f}, zeros={zeros}/{total}")
            total_params += total
            zero_params += zeros
            
        print(f"\nTotal zero weights: {zero_params}/{total_params} ({zero_params/total_params*100:.2f}%)")

# Use it:
print("Testing model responses...")
test_model_responses(best_co2_model, co2_dataloaders['test_loader'])

Missing value counts:
biomass_density    6090
dtype: int64
Testing model responses...

Model Response Test:
Input 1 mean: 5.487273
Input 2 mean: 5.487273
Output 1 mean: 1.045468
Output 2 mean: 1.045468
Output 1 std: 0.017346
Output 2 std: 0.017346

Model Weight Check:
input_norm.weight: mean=2.453863, std=0.941377, zeros=0/12
input_norm.bias: mean=-0.009101, std=0.545603, zeros=0/12
lstm.weight_ih_l0: mean=0.090957, std=0.713716, zeros=0/10560
lstm.weight_hh_l0: mean=0.003904, std=0.348899, zeros=0/193600
lstm.bias_ih_l0: mean=-0.088282, std=0.425470, zeros=0/880
lstm.bias_hh_l0: mean=-0.088282, std=0.425470, zeros=0/880
lstm.weight_ih_l0_reverse: mean=-0.002981, std=0.466908, zeros=0/10560
lstm.weight_hh_l0_reverse: mean=-0.000116, std=0.214788, zeros=0/193600
lstm.bias_ih_l0_reverse: mean=0.116542, std=0.407518, zeros=0/880
lstm.bias_hh_l0_reverse: mean=0.116542, std=0.407518, zeros=0/880
attention.0.weight: mean=-0.000936, std=0.403263, zeros=0/96800
attention.0.bias: mean=-0.087158

## Evaluation

In [None]:
best_co2_model = BayesianTuner.load_best_model(gas_type='co2')
preprocessor = AtmoSeerPreprocessor()
co2_dataloaders = preprocessor.prepare_data(co2_df)
co2_test_loader = co2_dataloaders['test_loader']

evaluator = EvaluateAtmoSeer('co2', best_co2_model, co2_test_loader)

metrics = evaluator.get_metrics()

<center><h1>Methane (CH<sub>4</sub>)</h1></center>

In [None]:
ch4_df = load_table("CH4DataNOAA", "postgres", "Godragons6")
print(ch4_df.shape)
ch4_df.head().style.format(precision=2).set_properties(**{'border': '1.5px solid blue'}).background_gradient(cmap='coolwarm')

## Preprocessing

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

## Train & Tune

## Test

## Evaluation