<center><h1>Deep Learning Pipeline</h1></center>

In [1]:
# Data Manipulation
import pandas as pd
import json
from datetime import datetime

pd.set_option('display.max_columns', None)

# System Settings
import warnings
import sys
import os

sys.path.append(os.path.abspath(os.path.join(os.getcwd(), '..')))
sys.path.append(os.path.abspath('../atmoseer'))
sys.path.append(os.path.abspath('../utils'))
warnings.filterwarnings("ignore")

# Data Visualization
import matplotlib.pyplot as plt
import seaborn as sns

# Custom Database Operations
from utils.postgres_processor import load_table

# Deep Learning Operations
import torch
from atmoseer.atmoseer_core import BayesianTuner
from atmoseer.preprocessors.atmoseer_preprocessor import AtmoSeerPreprocessor
from atmoseer.configs.atmoseer_config import BayesianTunerConfig
from atmoseer.evaluation.atmoseer_eval import EvaluateAtmoSeer

<center><h1>Carbon Dioxide (CO<sub>2</sub>)</h1></center>

In [2]:
co2_df = load_table("CO2DataNOAA", "postgres", "Godragons6")
print(co2_df.shape)
co2_df.head().style.format(precision=2).set_properties(**{'border': '1.5px solid blue'}).background_gradient(cmap='coolwarm')

Unnamed: 0,date,site,ppm,latitude,longitude,altitude,year,month,day,season,co2_change_rate,month_sin,month_cos,ppm_lag_14,ppm_lag_30,ppm_lag_365,biomass_density
0,1968-01-16,NWR,324.38,40.05,-105.63,3526.0,1968,1,16,Winter,1.5,0.5,0.87,324.38,324.38,324.38,328352893.44
1,1968-01-17,NWR,323.69,40.05,-105.63,3526.0,1968,1,17,Winter,-0.16,0.5,0.87,324.38,324.38,324.38,328352893.44
2,1968-02-29,NWR,325.53,40.05,-105.63,3526.0,1968,2,29,Winter,1.84,0.87,0.5,324.38,324.38,324.38,736124172.48
3,1968-03-07,NWR,326.49,40.05,-105.63,3526.0,1968,3,7,Spring,0.96,1.0,0.0,324.38,324.38,324.38,1143895451.52
4,1968-03-14,NWR,326.09,40.05,-105.63,3526.0,1968,3,14,Spring,-0.4,1.0,0.0,324.38,324.38,324.38,1143895451.52


## Preprocessing

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

In [None]:
preprocessor = AtmoSeerPreprocessor()
co2_dataloaders = preprocessor.prepare_data(co2_df)

print("Initializing tuner...")

tuner_config = BayesianTunerConfig(
    n_trials=50,
    gas_type='co2'
)

tuner = BayesianTuner(
    train_loader=co2_dataloaders['train_loader'],
    val_loader=co2_dataloaders['val_loader'],
    config=tuner_config
)

## Train & Tune

In [None]:
print(f"Starting optimization process ({tuner_config.n_trials} trials)...")
best_params, best_loss = tuner.optimize()

print("\nOptimization completed!")
print(f"Best validation loss: {best_loss:.6f}")
print("\nBest parameters:")
for param, value in best_params.items():
    print(f"{param}: {value}")

In [None]:
with open(tuner_config.gas_dir / 'optimization_results.json', 'r') as f:
    opt_history = json.load(f)

plt.figure(figsize=(12, 6))
trial_numbers = range(1, len(opt_history['trials']) + 1)
losses = [-trial['target'] for trial in opt_history['trials']]  # Convert back to loss

plt.plot(trial_numbers, losses, 'b-', label='Trial Loss')
plt.plot(trial_numbers, pd.Series(losses).cummin(), 'r-', label='Best Loss')
plt.xlabel('Trial Number')
plt.ylabel('Validation Loss')
plt.title('Optimization Progress')
plt.legend()
plt.grid(True)
plt.show()

# Plot parameter distributions for top 10 trials
top_10_trials = sorted(opt_history['trials'], key=lambda x: -x['target'])[:10]
param_values = {param: [trial['params'][param] for trial in top_10_trials] 
               for param in best_params.keys()}

fig, axes = plt.subplots(2, 3, figsize=(15, 10))
axes = axes.ravel()

for i, (param, values) in enumerate(param_values.items()):
    sns.boxplot(y=values, ax=axes[i])
    axes[i].set_title(f'Top 10 {param} Values')
    axes[i].set_ylabel(param)

plt.tight_layout()
plt.show()

## Test

In [None]:
best_model = BayesianTuner.load_best_model(gas_type='co2')

preprocessor = AtmoSeerPreprocessor()
co2_dataloaders = preprocessor.prepare_data(co2_df)
co2_test_loader = co2_dataloaders['test_loader']

best_model.prepare_prediction_defaults(co2_df)

dates_to_predict = [
    "06/1996",                                           # Past date
    datetime.now().strftime("%m/%Y"),                    # Current date
    f"{datetime.now().month}/{datetime.now().year + 1}"  # Future date
]

for date in dates_to_predict:
    prediction = best_model.predict(date, co2_test_loader)
    print(f"\nPrediction for {date}:")
    print(f"PPM: {prediction['prediction']:.2f}")
    print(f"Confidence Interval: ({prediction['confidence_interval'][0]:.2f}, "
          f"{prediction['confidence_interval'][1]:.2f})")

## Evaluation

In [None]:
best_co2_model = BayesianTuner.load_best_model(gas_type='co2')
preprocessor = AtmoSeerPreprocessor()
co2_dataloaders = preprocessor.prepare_data(co2_df)
co2_test_loader = co2_dataloaders['test_loader']

evaluator = EvaluateAtmoSeer('co2', best_co2_model, co2_test_loader)

metrics = evaluator.get_metrics()