<center><h1>Deep Learning Pipeline</h1></center>

In [1]:
# Data Manipulation
import pandas as pd
from datetime import date

pd.set_option('display.max_columns', None)

# System Settings
import warnings
import sys
import os

sys.path.append(os.path.abspath(os.path.join(os.getcwd(), '..')))
sys.path.append(os.path.abspath('../atmoseer'))
sys.path.append(os.path.abspath('../utils'))
warnings.filterwarnings("ignore")

# Custom Database Operations
from utils.postgres_processor import load_table
from utils.ppm_lookup import NOAALookup

# Deep Learning Operations
import torch
from atmoseer.atmoseer_core import BayesianTuner
from atmoseer.preprocessors.atmoseer_preprocessor import AtmoSeerPreprocessor
from atmoseer.configs.atmoseer_config import BayesianTunerConfig
from atmoseer.preprocessors.forecast_setup import CO2_CH4ForecastHelper, N2O_SF6ForecastHelper

# Initialze the GPU if available, otherwise fallback to CPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

Using device: cuda


<center><h1>Carbon Dioxide (CO<sub>2</sub>)</h1></center>

In [None]:
co2_df = load_table("CO2DataNOAA", "postgres", "password")
print(co2_df.shape)
co2_df.head().style.format(precision=2).set_properties(**{'border': '1.5px solid blue'}).background_gradient(cmap='coolwarm')

(201850, 17)


Unnamed: 0,date,site,ppm,latitude,longitude,altitude,year,month,day,season,co2_change_rate,month_sin,month_cos,ppm_lag_14,ppm_lag_30,ppm_lag_365,biomass_density
0,1968-01-16,NWR,324.38,40.05,-105.63,3526.0,1968,1,16,Winter,1.5,0.5,0.87,324.38,324.38,324.38,328352893.44
1,1968-01-17,NWR,323.69,40.05,-105.63,3526.0,1968,1,17,Winter,-0.16,0.5,0.87,324.38,324.38,324.38,328352893.44
2,1968-02-29,NWR,325.53,40.05,-105.63,3526.0,1968,2,29,Winter,1.84,0.87,0.5,324.38,324.38,324.38,736124172.48
3,1968-03-07,NWR,326.49,40.05,-105.63,3526.0,1968,3,7,Spring,0.96,1.0,0.0,324.38,324.38,324.38,1143895451.52
4,1968-03-14,NWR,326.09,40.05,-105.63,3526.0,1968,3,14,Spring,-0.4,1.0,0.0,324.38,324.38,324.38,1143895451.52


In [11]:
lookup = NOAALookup(co2_df)

# Print date range info
print(f"Data available from {lookup.earliest_date} to {lookup.latest_date}")

# Get all available measurement sites
sites = lookup.get_available_sites()
print("\nAvailable measurement sites:")
for site in sites:
    print(f"Site: {site['site']}")
    print(f"Location: {site['latitude']}, {site['longitude']}, altitude: {site['altitude']}m")
    print(f"Data range: {site['date_range']}\n")

Data available from 1968-01-16 to 2024-05-31

Available measurement sites:
Site: ABP
Location: -12.76, -38.16, altitude: 6.0m
Data range: 2006-10-27 to 2010-01-13

Site: ALT
Location: 82.4508, -62.5072, altitude: 190.0m
Data range: 1985-06-17 to 2023-12-27

Site: AMS
Location: -37.95, 77.53, altitude: 153.0m
Data range: 1982-03-07 to 1990-11-12

Site: AMT
Location: 45.0345, -68.6821, altitude: 160.4m
Data range: 2003-09-19 to 2024-05-31

Site: AMY
Location: 36.5389, 126.3295, altitude: 87.0m
Data range: 2013-12-03 to 2023-11-30

Site: ASC
Location: -7.9667, -14.4, altitude: 87.0m
Data range: 1979-08-27 to 2023-12-31

Site: ASK
Location: 23.2625, 5.6322, altitude: 2715.0m
Data range: 1995-09-12 to 2023-12-31

Site: AVI
Location: 17.75, -64.75, altitude: 5.0m
Data range: 1979-03-03 to 1990-08-29

Site: AZR
Location: 38.75, -27.08, altitude: 22.0m
Data range: 1979-12-31 to 2022-03-21

Site: BAL
Location: 55.5, 16.67, altitude: 28.0m
Data range: 1992-09-03 to 2011-06-22

Site: BAO
Location

In [None]:
date_records = lookup.lookup_date('1968-02-01')
print("\nMeasurements for 1968-02-01 (or nearest date):")
for record in date_records:
    print(f"Site: {record.site}")
    print(f"PPM: {record.ppm}")
    print(f"Location: {record.latitude}, {record.longitude}\n")

Note: No data for 1968-02-01. Using nearest available date: 1968-01-17

Measurements for 1968-02-01 (or nearest date):
Site: NWR
PPM: 323.69
Location: 40.05, -105.63



In [None]:
dict_records = lookup.lookup_date('1968-02-01', as_dict=True)
print("\nData as dictionary:")
for record in dict_records:
    print(record)

Note: No data for 1968-02-01. Using nearest available date: 1968-01-17

Data as dictionary:
{'date': datetime.date(1968, 1, 17), 'site': 'NWR', 'ppm': 323.69, 'latitude': 40.05, 'longitude': -105.63, 'altitude': 3526.0}


In [None]:
range_records = lookup.lookup_range('1968-02-01', '1969-05-31')
print("\nMeasurements for 1968-02-01' - '1969-05-31:")
for d, records in range_records.items():
    print(f"\nDate: {d}")
    for record in records:
        print(f"Site: {record.site}, PPM: {record.ppm}")


Measurements for 1968-02-01' - '1969-05-31:

Date: 1968-02-29
Site: NWR, PPM: 325.53

Date: 1968-03-07
Site: NWR, PPM: 326.49

Date: 1968-03-14
Site: NWR, PPM: 326.09

Date: 1968-03-22
Site: NWR, PPM: 325.98

Date: 1968-03-28
Site: NWR, PPM: 326.26

Date: 1968-04-11
Site: NWR, PPM: 326.75

Date: 1968-04-30
Site: NWR, PPM: 326.65

Date: 1968-05-14
Site: NWR, PPM: 326.16

Date: 1968-05-21
Site: NWR, PPM: 327.41

Date: 1968-05-28
Site: NWR, PPM: 326.47

Date: 1968-06-06
Site: NWR, PPM: 323.94

Date: 1968-06-13
Site: NWR, PPM: 325.25

Date: 1968-07-03
Site: NWR, PPM: 320.33

Date: 1968-08-07
Site: NWR, PPM: 318.2

Date: 1968-10-03
Site: NWR, PPM: 320.09

Date: 1968-11-07
Site: NWR, PPM: 322.49

Date: 1968-11-26
Site: NWR, PPM: 323.76

Date: 1968-11-29
Site: STC, PPM: 324.32

Date: 1968-12-10
Site: NWR, PPM: 324.6

Date: 1968-12-17
Site: NWR, PPM: 324.48

Date: 1968-12-21
Site: STC, PPM: 324.88

Date: 1968-12-23
Site: NWR, PPM: 323.7

Date: 1969-01-01
Site: NWR, PPM: 324.6

Date: 1969-01-0

## Preprocessing

In [3]:
preprocessor = AtmoSeerPreprocessor()
co2_dataloaders = preprocessor.prepare_data(co2_df)

print("Initializing tuner")

tuner_config = BayesianTunerConfig(gas_type='co2')

tuner = BayesianTuner(
    train_loader=co2_dataloaders['train_loader'],
    val_loader=co2_dataloaders['val_loader'],
    config=tuner_config
)

Missing value counts:
biomass_density    6090
dtype: int64
Initializing tuner


## Train & Tune

In [4]:
print(f"Starting optimization process ({tuner_config.n_trials} trials)...")
best_params, best_loss = tuner.optimize()

print("\nOptimization completed!")
print(f"Best validation loss: {best_loss:.6f}")
print("\nBest parameters:")
for param, value in best_params.items():
    print(f"{param}: {value}")

Starting optimization process (25 trials)...

Trial 1/25
Epoch: 0 
New best validation loss: 0.098067 for current trial
Epoch: 1 
New best validation loss: 0.092317 for current trial
Epoch: 3 
New best validation loss: 0.091035 for current trial
Epoch: 4 
New best validation loss: 0.079872 for current trial
Epoch: 5 
New best validation loss: 0.079418 for current trial
Epoch: 6 
New best validation loss: 0.076297 for current trial
Epoch: 7 
New best validation loss: 0.075524 for current trial
Epoch: 9 
New best validation loss: 0.070546 for current trial
Epoch: 16 
New best validation loss: 0.065076 for current trial
Early stopping triggered at epoch 31. Best epoch was 16 with validation loss 0.065076

Trial 2/25
Epoch: 0 
New best validation loss: 0.154402 for current trial
Epoch: 1 
New best validation loss: 0.109914 for current trial
Epoch: 2 
New best validation loss: 0.102618 for current trial
Epoch: 4 
New best validation loss: 0.102540 for current trial
Early stopping triggered 

## Forecasting

This method implements an iterative forecasting process where each prediction becomes part of the input for the next 
prediction. It uses Monte Carlo sampling with added Gaussian noise to estimate prediction uncertainty, which naturally 
grows over time as predictions are chained together. Essentially the Monte Carlo sampling accounts for the inherent randomness in time series data, as real life values often flucuate within a certain range, rather than an exact y=x relationship, even if the overall trend is linear. Instead of making a prediction based off of one single point, this 
forecast method will create a normal distribution around a specific prediction point using 100 normally distributed values, 
where the mean is the prediction point and the standard deviation is the noise_scale. This will create a range of possible 
values that the prediction could be, which will be used to create the uncertainty bounds. The further out into the future 
that the predictions go, the wider the uncertainty bounds become. The Bayesian Tuner will go through many trials to find 
the optimal sequence length (lookback window in days) and then this forecast method will take that sequence length and use 
it to generate predictions. For dates that are past this sequence length, the predicted values will be entirely based on 
other predicted values (not trained data points), which will increase the uncertainty by a larger and larger amount.

In [None]:
atmoseer_co2 = BayesianTuner.load_best_model(gas_type='co2', device=device)
co2_df = load_table("CO2DataNOAA", "postgres", "password")

forecaster = CO2_CH4ForecastHelper(atmoseer_co2, co2_df)

target_date = date(2025, 4, 15) 
prediction, historical = forecaster.predict_for_date(target_date)

print(f"\nPrediction for {prediction.date}:")
print(f"CO2 Level: {prediction.ppm:.2f} ppm")
print(f"Confidence Interval: ({prediction.lower_bound:.2f} - {prediction.upper_bound:.2f} ppm)")

Missing value counts:
biomass_density    6090
dtype: int64

Prediction for 2025-04-15:
CO2 Level: 428.02 ppm
Confidence Interval: (400.47 - 455.56 ppm)


In [3]:
for d, result in historical.items():
    print(f"{d}: {result.ppm:.2f} ppm ({result.lower_bound:.2f} - {result.upper_bound:.2f})")

2024-06-01: 422.02 ppm (419.98 - 424.06)
2024-06-02: 422.03 ppm (419.95 - 424.11)
2024-06-03: 422.04 ppm (419.92 - 424.16)
2024-06-04: 422.04 ppm (419.88 - 424.20)
2024-06-05: 422.05 ppm (419.85 - 424.25)
2024-06-06: 422.05 ppm (419.81 - 424.29)
2024-06-07: 422.06 ppm (419.78 - 424.34)
2024-06-08: 422.06 ppm (419.74 - 424.38)
2024-06-09: 422.07 ppm (419.71 - 424.43)
2024-06-10: 422.07 ppm (419.67 - 424.47)
2024-06-11: 422.08 ppm (419.64 - 424.52)
2024-06-12: 422.08 ppm (419.60 - 424.56)
2024-06-13: 422.09 ppm (419.57 - 424.61)
2024-06-14: 422.10 ppm (419.54 - 424.66)
2024-06-15: 422.10 ppm (419.50 - 424.70)
2024-06-16: 422.11 ppm (419.47 - 424.75)
2024-06-17: 422.11 ppm (419.43 - 424.79)
2024-06-18: 422.12 ppm (419.40 - 424.84)
2024-06-19: 422.12 ppm (419.36 - 424.88)
2024-06-20: 422.13 ppm (419.33 - 424.93)
2024-06-21: 422.13 ppm (419.29 - 424.97)
2024-06-22: 422.14 ppm (419.26 - 425.02)
2024-06-23: 422.15 ppm (419.23 - 425.07)
2024-06-24: 422.15 ppm (419.19 - 425.11)
2024-06-25: 422.

<center><h1>Methane (CH<sub>4</sub>)</h1></center>

In [None]:
ch4_df = load_table("CH4DataNOAA", "postgres", "password")
print(ch4_df.shape)
ch4_df.head().style.format(precision=2).set_properties(**{'border': '1.5px solid blue'}).background_gradient(cmap='coolwarm')

(146240, 17)


Unnamed: 0,date,site,ppm,latitude,longitude,altitude,year,month,day,season,ch4_change_rate,month_sin,month_cos,ppm_lag_14,ppm_lag_30,ppm_lag_365,biomass_density
0,1983-01-01,PSA,1556.13,-64.77,-64.05,13.0,1983,1,1,Winter,0.0,0.5,0.87,1556.13,1556.13,1556.13,50.0
1,1983-01-11,PSA,1549.04,-64.77,-64.05,13.0,1983,1,11,Winter,-7.09,0.5,0.87,1556.13,1556.13,1556.13,50.0
2,1983-01-17,HBA,1560.18,-75.56,-27.02,13.0,1983,1,17,Winter,0.0,0.5,0.87,1556.13,1556.13,1556.13,50.0
3,1983-01-21,AMS,1548.03,-37.95,77.53,153.0,1983,1,21,Winter,0.0,0.5,0.87,1556.13,1556.13,1556.13,2274643146.23
4,1983-01-24,HBA,1564.23,-75.56,-27.02,13.0,1983,1,24,Winter,4.05,0.5,0.87,1556.13,1556.13,1556.13,50.0


## Preprocessing

In [3]:
preprocessor = AtmoSeerPreprocessor(gas_type='ch4')
ch4_dataloaders = preprocessor.prepare_data(ch4_df)

print("Initializing tuner")

tuner_config = BayesianTunerConfig(gas_type='ch4')

tuner = BayesianTuner(
    train_loader=ch4_dataloaders['train_loader'],
    val_loader=ch4_dataloaders['val_loader'],
    config=tuner_config
)

Dropped 'biomass_density' column for ch4 gas type
Capping 5 outliers in column 'altitude'
Capping 294 outliers in column 'ch4_change_rate'
Initializing tuner


## Train & Tune

Since the tuner did not improve past trial 7, the tuner will now only do 10 trials instead of 25.

In [4]:
print(f"Starting optimization process ({tuner_config.n_trials} trials)...")
best_params, best_loss = tuner.optimize()

print("\nOptimization completed!")
print(f"Best validation loss: {best_loss:.6f}")
print("\nBest parameters:")
for param, value in best_params.items():
    print(f"{param}: {value}")

Starting optimization process (10 trials)...

Trial 1/10
Adjusted input_dim to 11 for ch4 gas type
Initializing AtmoSeer for ch4 with input_dim=11
Epoch: 0 
New best validation loss: 0.621477 for current trial
Epoch: 1 
New best validation loss: 0.579731 for current trial
Epoch: 2 
New best validation loss: 0.508537 for current trial
Epoch: 3 
New best validation loss: 0.475276 for current trial
Early stopping triggered at epoch 18. Best epoch was 3 with validation loss 0.475276

Trial 2/10
Adjusted input_dim to 11 for ch4 gas type
Initializing AtmoSeer for ch4 with input_dim=11
Epoch: 0 
New best validation loss: 0.653091 for current trial
Epoch: 1 
New best validation loss: 0.596076 for current trial
Epoch: 2 
New best validation loss: 0.560468 for current trial
Epoch: 3 
New best validation loss: 0.534408 for current trial
Epoch: 4 
New best validation loss: 0.527964 for current trial
Epoch: 5 
New best validation loss: 0.523983 for current trial
Epoch: 6 
New best validation loss: 

## Forecasting

In [None]:
atmoseer_ch4 = BayesianTuner.load_best_model(gas_type='ch4', device=device)
ch4_df = load_table("CH4DataNOAA", "postgres", "password")

forecaster = CO2_CH4ForecastHelper(atmoseer_ch4, ch4_df)

target_date = date(2025, 4, 15) 
prediction, historical = forecaster.predict_for_date(target_date)

print(f"\nPrediction for {prediction.date}:")
print(f"CH4 Level: {prediction.ppm:.2f} ppm")
print(f"Confidence Interval: ({prediction.lower_bound:.2f} - {prediction.upper_bound:.2f} ppm)")

Initializing AtmoSeer for ch4 with input_dim=11
Capping 5 outliers in column 'altitude'
Dropped 'biomass_density' column for ch4 gas type
Capping 5 outliers in column 'altitude'
Capping 294 outliers in column 'ch4_change_rate'

Prediction for 2025-04-15:
CH4 Level: 2049.02 ppm
Confidence Interval: (2021.48 - 2076.57 ppm)


In [3]:
for d, result in historical.items():
    print(f"{d}: {result.ppm:.2f} ppm ({result.lower_bound:.2f} - {result.upper_bound:.2f})")

2024-06-01: 2030.31 ppm (2028.27 - 2032.35)
2024-06-02: 2030.32 ppm (2028.24 - 2032.40)
2024-06-03: 2030.32 ppm (2028.20 - 2032.44)
2024-06-04: 2030.33 ppm (2028.17 - 2032.49)
2024-06-05: 2030.33 ppm (2028.13 - 2032.53)
2024-06-06: 2030.34 ppm (2028.10 - 2032.58)
2024-06-07: 2030.34 ppm (2028.06 - 2032.62)
2024-06-08: 2030.35 ppm (2028.03 - 2032.67)
2024-06-09: 2030.36 ppm (2028.00 - 2032.72)
2024-06-10: 2030.36 ppm (2027.96 - 2032.76)
2024-06-11: 2030.37 ppm (2027.93 - 2032.81)
2024-06-12: 2030.37 ppm (2027.89 - 2032.85)
2024-06-13: 2030.38 ppm (2027.86 - 2032.90)
2024-06-14: 2030.38 ppm (2027.82 - 2032.94)
2024-06-15: 2030.39 ppm (2027.79 - 2032.99)
2024-06-16: 2030.39 ppm (2027.75 - 2033.03)
2024-06-17: 2030.40 ppm (2027.72 - 2033.08)
2024-06-18: 2030.40 ppm (2027.68 - 2033.12)
2024-06-19: 2030.41 ppm (2027.65 - 2033.17)
2024-06-20: 2030.42 ppm (2027.62 - 2033.22)
2024-06-21: 2030.42 ppm (2027.58 - 2033.26)
2024-06-22: 2030.43 ppm (2027.55 - 2033.31)
2024-06-23: 2030.43 ppm (2027.51

<center><h1>Nitrous Oxide (N<sub>2</sub>O)</h1></center>

In [None]:
n2o_df = load_table("N2ODataNOAA", "postgres", "password")
print(n2o_df.shape)
n2o_df.head().style.format(precision=2).set_properties(**{'border': '1.5px solid blue'}).background_gradient(cmap='coolwarm')

(93927, 17)


Unnamed: 0,date,site,ppm,latitude,longitude,altitude,year,month,day,season,n2o_change_rate,month_sin,month_cos,ppm_lag_14,ppm_lag_30,ppm_lag_365,biomass_density
0,1996-02-15,HBA,310.36,-75.61,-26.21,35.0,1996,2,15,Winter,0.0,0.87,0.5,310.36,310.36,310.36,55.0
1,1996-03-03,HBA,310.16,-75.61,-26.21,35.0,1996,3,3,Spring,-0.2,1.0,0.0,310.36,310.36,310.36,58.66
2,1996-03-15,HBA,310.43,-75.61,-26.21,35.0,1996,3,15,Spring,0.27,1.0,0.0,310.36,310.36,310.36,58.66
3,1996-03-27,HBA,310.74,-75.61,-26.21,35.0,1996,3,27,Spring,0.31,1.0,0.0,310.36,310.36,310.36,58.66
4,1996-05-15,HBA,310.24,-75.61,-26.21,35.0,1996,5,15,Spring,-0.5,0.5,-0.87,310.36,310.36,310.36,58.66


## Preprocessing

In [3]:
preprocessor = AtmoSeerPreprocessor(gas_type='n2o')
n2o_dataloaders = preprocessor.prepare_data(n2o_df)

print("Initializing tuner")

tuner_config = BayesianTunerConfig(gas_type='n2o')

tuner = BayesianTuner(
    train_loader=n2o_dataloaders['train_loader'],
    val_loader=n2o_dataloaders['val_loader'],
    config=tuner_config
)

Dropped 'biomass_density' column for n2o gas type
Capping 5 outliers in column 'altitude'
Capping 188 outliers in column 'n2o_change_rate'
Initializing tuner


## Train & Tune

In [None]:
print(f"Starting optimization process ({tuner_config.n_trials} trials)...")
best_params, best_loss = tuner.optimize()

print("\nOptimization completed!")
print(f"Best validation loss: {best_loss:.6f}")
print("\nBest parameters:")
for param, value in best_params.items():
    print(f"{param}: {value}")

Starting optimization process (10 trials)...

Trial 1/10
Adjusted input_dim to 11 for n2o gas type
Initializing AtmoSeer for n2o with input_dim=11
Epoch: 0 
New best validation loss: 0.053443 for current trial
Early stopping triggered at epoch 15. Best epoch was 0 with validation loss 0.053443

Trial 2/10
Adjusted input_dim to 11 for n2o gas type
Initializing AtmoSeer for n2o with input_dim=11
Epoch: 0 
New best validation loss: 0.055879 for current trial
Epoch: 12 
New best validation loss: 0.053676 for current trial
Epoch: 21 
New best validation loss: 0.051918 for current trial
Early stopping triggered at epoch 36. Best epoch was 21 with validation loss 0.051918

Trial 3/10
Adjusted input_dim to 11 for n2o gas type
Initializing AtmoSeer for n2o with input_dim=11
Epoch: 0 
New best validation loss: 0.063126 for current trial
Epoch: 1 
New best validation loss: 0.058150 for current trial
Epoch: 11 
New best validation loss: 0.057001 for current trial
Epoch: 12 
New best validation los

## Forecasting

In [None]:
atmoseer_n2o = BayesianTuner.load_best_model(gas_type='n2o', device=device)
n2o_df = load_table("N2ODataNOAA", "postgres", "Godragons6")

forecaster = N2O_SF6ForecastHelper(atmoseer_n2o, n2o_df)

target_date = date(2025, 4, 15) 
prediction, historical = forecaster.predict_for_date(target_date)

print(f"\nPrediction for {prediction.date}:")
print(f"N2O Level: {prediction.ppm:.2f} ppm")
print(f"Confidence Interval: ({prediction.lower_bound:.2f} - {prediction.upper_bound:.2f} ppm)")

In [None]:
for d, result in historical.items():
    print(f"{d}: {result.ppm:.2f} ppm ({result.lower_bound:.2f} - {result.upper_bound:.2f})")

<center><h1>Sulfer Hexafluoride (SF<sub>6</sub>)</h1></center>

In [2]:
sf6_df = load_table("SF6DataNOAA", "postgres", "Godragons6")
print(sf6_df.shape)
sf6_df.head().style.format(precision=2).set_properties(**{'border': '1.5px solid blue'}).background_gradient(cmap='coolwarm')

(94954, 17)


Unnamed: 0,date,site,ppm,latitude,longitude,altitude,year,month,day,season,sf6_change_rate,month_sin,month_cos,ppm_lag_14,ppm_lag_30,ppm_lag_365,biomass_density
0,1996-12-04,ASK,3.97,23.26,5.63,2715.0,1996,12,4,Winter,0.0,-0.0,1.0,3.97,3.97,3.97,524249231.29
1,1996-12-19,ASK,3.96,23.26,5.63,2715.0,1996,12,19,Winter,-0.01,-0.0,1.0,3.97,3.97,3.97,524249231.29
2,1997-01-04,ASK,3.9,23.26,5.63,2715.0,1997,1,4,Winter,-0.06,0.5,0.87,3.97,3.97,3.97,535639470.79
3,1997-01-16,SPO,3.8,-89.98,-24.8,2815.0,1997,1,16,Winter,0.08,0.5,0.87,3.97,3.97,3.97,50.0
4,1997-01-18,ASK,4.06,23.26,5.63,2715.0,1997,1,18,Winter,0.16,0.5,0.87,3.97,3.97,3.97,535639470.79


## Preprocessing

In [3]:
preprocessor = AtmoSeerPreprocessor(gas_type='sf6')
sf6_dataloaders = preprocessor.prepare_data(sf6_df)

print("Initializing tuner")

tuner_config = BayesianTunerConfig(gas_type='sf6')

tuner = BayesianTuner(
    train_loader=sf6_dataloaders['train_loader'],
    val_loader=sf6_dataloaders['val_loader'],
    config=tuner_config
)

Dropped 'biomass_density' column for sf6 gas type
Capping 59 outliers in column 'altitude'
Capping 189 outliers in column 'sf6_change_rate'
Initializing tuner


## Train & Tune

In [4]:
print(f"Starting optimization process ({tuner_config.n_trials} trials)...")
best_params, best_loss = tuner.optimize()

print("\nOptimization completed!")
print(f"Best validation loss: {best_loss:.6f}")
print("\nBest parameters:")
for param, value in best_params.items():
    print(f"{param}: {value}")

Starting optimization process (10 trials)...

Trial 1/10
Adjusted input_dim to 11 for sf6 gas type
Initializing AtmoSeer for sf6 with input_dim=11
Epoch: 0 
New best validation loss: 0.038062 for current trial
Epoch: 1 
New best validation loss: 0.036542 for current trial
Early stopping triggered at epoch 16. Best epoch was 1 with validation loss 0.036542

Trial 2/10
Adjusted input_dim to 11 for sf6 gas type
Initializing AtmoSeer for sf6 with input_dim=11
Epoch: 0 
New best validation loss: 0.029088 for current trial
Early stopping triggered at epoch 15. Best epoch was 0 with validation loss 0.029088

Trial 3/10
Adjusted input_dim to 11 for sf6 gas type
Initializing AtmoSeer for sf6 with input_dim=11
Epoch: 0 
New best validation loss: 0.031703 for current trial
Early stopping triggered at epoch 15. Best epoch was 0 with validation loss 0.031703

Trial 4/10
Adjusted input_dim to 11 for sf6 gas type
Initializing AtmoSeer for sf6 with input_dim=11
Epoch: 0 
New best validation loss: 0.03

## Forecasting

In [None]:
atmoseer_sf6 = BayesianTuner.load_best_model(gas_type='sf6', device=device)
sf6_df = load_table("SF6DataNOAA", "postgres", "Godragons6")

forecaster = N2O_SF6ForecastHelper(atmoseer_sf6, sf6_df)

target_date = date(2024, 4, 15) 
prediction, historical = forecaster.predict_for_date(target_date)

print(f"\nPrediction for {prediction.date}:")
print(f"SF6 Level: {prediction.ppm:.2f} ppm")
print(f"Confidence Interval: ({prediction.lower_bound:.2f} - {prediction.upper_bound:.2f} ppm)")

In [None]:
for d, result in historical.items():
    print(f"{d}: {result.ppm:.2f} ppm ({result.lower_bound:.2f} - {result.upper_bound:.2f})")