This notebook is used to perform KIM to predict ET from selected predictors using SRDB database.

In [3]:
# Libraries
from pathlib import Path
import pandas as pd
import numpy as np

from kim.map import KIM
from kim.data import Data
from kim.mapping_model import MLP


# Read the data

In [2]:
# File and folder paths
f_forcing = Path("./data/US-Bi1-forcings.csv")
f_forcing_test = Path("./data/US-Bi1-forcings-test.csv")
f_obs = Path("./data/US-Bi1-fluxes.csv")
f_obs_test = Path("./data/US-Bi1-fluxes-test.csv")


In [13]:
x, x_eval = np.loadtxt(f_forcing, delimiter=','), np.loadtxt(f_forcing_test, delimiter=',')
y = pd.read_csv(f_obs)[['LE_F_MDS', 'NETRAD', 'H_F_MDS']].values
y_eval = pd.read_csv(f_obs_test)[['LE_F_MDS', 'NETRAD', 'H_F_MDS']].values


In [9]:
x.shape, y.shape, x_eval.shape, y_eval.shape

((35088, 14), (35088, 3), (26304, 14), (26304, 3))

# Configurations

In [14]:
Ns_train = 30000
hidden_activation = 'sigmoid'
final_activation = 'leaky_relu'
seed = 1024
seed_predict = 3636
seed_dl = 10
seed_model = 100
seed_shuffle = 1234
training_verbose = 1


In [20]:
# Data configuration
data_params = {
    "xscaler_type": "standard",
    "yscaler_type": "standard",
    # "xscaler_type": "",
    # "yscaler_type": "",
}

# Sensitivity analysis configuration
sensitivity_params = {
    "method": "pc", "metric": "corr",
    "sst": True, "ntest": 100, "alpha": 0.05, "k": 3,
    "seed_shuffle": seed_shuffle,
}

# Mapping parameters for each test below
map_configs = {
    "model_type": MLP,
    'n_model': 10,
    'ensemble_type': 'ens_random',
    'model_hp_choices': {
        "depth": [1,3,5,6],
        "width_size": [3,6,10]
    },
    'model_hp_fixed': {
        "hidden_activation": hidden_activation,
        "final_activation": final_activation,
        "model_seed": seed_model
    },
    'optax_hp_choices': {
        'learning_rate': [0.01, 0.005, 0.003],
    },
    'optax_hp_fixed': {
        'nsteps': 100,
        'optimizer_type': 'adam',
    },
    'dl_hp_choices': {
    },
    'dl_hp_fixed': {
        'dl_seed': seed_dl,
        'num_train_sample': Ns_train,
        'batch_size': 64
    },
    'ens_seed': seed,
    'training_parallel': True,
    'parallel_config': {
        'n_jobs': 5, 
        'backend': 'loky',
        'verbose': 0
    },
    'device': None,
}

# Exploratory data analysis

In [16]:
data = Data(x, y, **data_params)
data.calculate_sensitivity(**sensitivity_params)


  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]


In [17]:
data.sensitivity_mask

array([[False,  True,  True],
       [False, False, False],
       [ True,  True, False],
       [ True,  True,  True],
       [ True,  True,  True],
       [ True,  True,  True],
       [ True,  True,  True],
       [False, False, False],
       [False, False,  True],
       [ True,  True,  True],
       [ True,  True,  True],
       [False, False, False],
       [False, False, False],
       [ True,  True, False]])

In [18]:
data.cond_sensitivity_mask

array([[False,  True,  True],
       [False, False, False],
       [ True, False, False],
       [ True, False, False],
       [ True,  True,  True],
       [ True,  True, False],
       [False, False, False],
       [False, False, False],
       [False, False,  True],
       [ True, False, False],
       [False, False, False],
       [False, False, False],
       [False, False, False],
       [ True, False, False]])

# Train the inverse mapping

In [21]:
# Initialize three diffferent KIMs
kim1 = KIM(data, map_configs, map_option='many2many')
kim2 = KIM(data, map_configs, mask_option="sensitivity", map_option='many2one')
kim3 = KIM(data, map_configs, mask_option="cond_sensitivity", map_option='many2one')

# Train the mappings
kim1.train()
kim2.train()
kim3.train()



 Performing ensemble training in parallel with 10 model configurations...



  pid = os.fork()
  4%|▍         | 4/100 [01:21<32:25, 20.27s/it]

KeyboardInterrupt: 