This notebook is used to perform KIM to predict soil respirations from selected predictors using SRDB database.

In [7]:
# Libraries
from pathlib import Path
import pandas as pd

from kim.map import KIM
from kim.data import Data
from kim.mapping_model import MLP


# Read the data

In [2]:
# File and folder paths
f_data = Path('./selected_SRDB.csv')

In [8]:
df = pd.read_csv(f_data)
df.head()

Unnamed: 0,Site_ID,Latitude,Longitude,MAT,MAP,Annual_coverage,Soil_BD,Rs_annual
0,AU-HOS,-12.485,130.94,31.5,1700.0,1.0,1.41,1430.0
1,AU-RMK-ICBURN,-34.01,140.983,17.5,242.0,1.0,1.6,337.0
2,AU-RMK-ICCK,-34.0,140.966,17.5,242.0,1.0,1.6,375.0
3,AU-RMK-UCBURN,-34.01,140.983,17.5,242.0,1.0,1.6,571.0
4,AU-RMK-UCCK,-34.0,140.966,17.5,242.0,1.0,1.6,598.0


In [9]:
# Predictors
predictors = [
    "Latitude", "Longitude", "MAT", "MAP", "Annual_coverage", "Soil_BD"
    # "Study_temp", "Study_precip", "Chamber_area", "Time_of_day", "LAI",
    # "Age_disturbance", "Elevation", "Soil_BD", "Soil_CN", 
    # "Soil_sand", "Soil_silt", "Soil_clay",  "C_AG", "C_BG"
]

# Predictands
predictands = [
    "Rs_annual"
    # , "Ra_annual", "Rh_annual", "GPP"
]


In [10]:
x, y = df[predictors].values, df[predictands].values


In [12]:
x.shape, y.shape

((823, 6), (823, 1))

# Configurations

In [5]:
Ns_train = 600
hidden_activation = 'sigmoid'
final_activation = 'leaky_relu'
seed = 1024
seed_predict = 3636
seed_dl = 10
seed_model = 100
seed_shuffle = 1234
training_verbose = 1


In [42]:
# Data configuration
data_params = {
    "xscaler_type": "standard",
    "yscaler_type": "standard",
    # "xscaler_type": "",
    # "yscaler_type": "",
}

# Sensitivity analysis configuration
sensitivity_params = {
    "method": "pc", "metric": "corr",
    "sst": True, "ntest": 100, "alpha": 0.05, "k": 3,
    "seed_shuffle": seed_shuffle,
}

# Mapping parameters for each test below
map_configs = {
    "model_type": MLP,
    'n_model': 10,
    'ensemble_type': 'ens_random',
    'model_hp_choices': {
        "depth": [1,3,5,6],
        "width_size": [3,6,10]
    },
    'model_hp_fixed': {
        "hidden_activation": hidden_activation,
        "final_activation": final_activation,
        "model_seed": seed_model
    },
    'optax_hp_choices': {
        'learning_rate': [0.01, 0.005, 0.003],
    },
    'optax_hp_fixed': {
        'nsteps': 100,
        'optimizer_type': 'adam',
    },
    'dl_hp_choices': {
        'batch_size': [8, 16]
    },
    'dl_hp_fixed': {
        'dl_seed': seed_dl,
        'num_train_sample': Ns_train
    },
    'ens_seed': seed,
    'training_parallel': True,
    'parallel_config': {
        'n_jobs': 5, 
        'backend': 'loky',
        'verbose': 0
    },
    'device': None,
}

# Exploratory data analysis

In [39]:
data = Data(x, y, **data_params)
data.calculate_sensitivity(**sensitivity_params)


In [40]:
data.sensitivity_mask

array([[False],
       [False],
       [ True],
       [ True],
       [False],
       [False]])

# Train the inverse mapping

In [43]:
# Initialize three diffferent KIMs
kim1 = KIM(data, map_configs, map_option='many2many')
kim2 = KIM(data, map_configs, mask_option="sensitivity", map_option='many2one')
kim3 = KIM(data, map_configs, mask_option="cond_sensitivity", map_option='many2one')

# Train the mappings
kim1.train()
kim2.train()
kim3.train()



 Performing ensemble training in parallel with 10 model configurations...



100%|██████████| 100/100 [00:51<00:00,  1.95it/s]
100%|██████████| 100/100 [00:55<00:00,  1.82it/s]
100%|██████████| 100/100 [00:55<00:00,  1.81it/s]
100%|██████████| 100/100 [01:02<00:00,  1.59it/s]
100%|██████████| 100/100 [01:04<00:00,  1.54it/s]
100%|██████████| 100/100 [00:49<00:00,  2.03it/s]
100%|██████████| 100/100 [00:53<00:00,  1.88it/s]
100%|██████████| 100/100 [00:57<00:00,  1.75it/s]
100%|██████████| 100/100 [00:52<00:00,  1.92it/s]
100%|██████████| 100/100 [01:00<00:00,  1.67it/s]
  0%|          | 0/100 [00:00<?, ?it/s]

Training completes.

 Performing ensemble training in parallel with 10 model configurations...



100%|██████████| 100/100 [00:48<00:00,  2.04it/s]
100%|██████████| 100/100 [00:52<00:00,  1.90it/s]
100%|██████████| 100/100 [00:56<00:00,  1.75it/s]
100%|██████████| 100/100 [00:59<00:00,  1.69it/s]
100%|██████████| 100/100 [01:00<00:00,  1.66it/s]
100%|██████████| 100/100 [00:51<00:00,  1.93it/s]
100%|██████████| 100/100 [00:51<00:00,  1.93it/s]
100%|██████████| 100/100 [00:50<00:00,  1.97it/s]
100%|██████████| 100/100 [00:55<00:00,  1.79it/s]
100%|██████████| 100/100 [00:54<00:00,  1.82it/s]
  0%|          | 0/100 [00:00<?, ?it/s]

Training completes.

 Performing ensemble training in parallel with 10 model configurations...



100%|██████████| 100/100 [00:48<00:00,  2.05it/s]
100%|██████████| 100/100 [00:48<00:00,  2.05it/s]
100%|██████████| 100/100 [00:53<00:00,  1.88it/s]
100%|██████████| 100/100 [01:02<00:00,  1.60it/s]
100%|██████████| 100/100 [01:03<00:00,  1.58it/s]
100%|██████████| 100/100 [00:47<00:00,  2.09it/s]
100%|██████████| 100/100 [00:51<00:00,  1.95it/s]
100%|██████████| 100/100 [00:51<00:00,  1.93it/s]
100%|██████████| 100/100 [00:53<00:00,  1.86it/s]
 99%|█████████▉| 99/100 [00:57<00:00,  1.76it/s]

Training completes.


100%|██████████| 100/100 [00:57<00:00,  1.72it/s]


In [66]:
kim3.maps[-1].loss_test_ens[4]

Array([1.3408536, 1.3440315, 1.3477818, 1.3521355, 1.356592 , 1.3608091,
       1.3648275, 1.368861 , 1.3728536, 1.3765261, 1.3797904, 1.3826338,
       1.3849363, 1.386616 , 1.3877108, 1.3880396, 1.3884785, 1.389326 ,
       1.3905051, 1.3920324, 1.3938538, 1.3958851, 1.3981603, 1.4005944,
       1.4030664, 1.4057751, 1.4084688, 1.4108762, 1.4137963, 1.4164336,
       1.4190885, 1.421689 , 1.4242334, 1.4267199, 1.429139 , 1.4314883,
       1.4337643, 1.4359657, 1.4380914, 1.4401406, 1.4421135, 1.4440106,
       1.4458326, 1.4475803, 1.449255 , 1.4508581, 1.452391 , 1.4538561,
       1.455254 , 1.4565872, 1.4578574, 1.4590665, 1.4602165, 1.4613092,
       1.4623463, 1.4633301, 1.4642622, 1.4651449, 1.4659795, 1.466768 ,
       1.4675125, 1.468214 , 1.4688748, 1.4694966, 1.470081 , 1.4706291,
       1.4711428, 1.367012 , 1.3615686, 1.3609289, 1.3623009, 1.3638952,
       1.3650914, 1.3661805, 1.3681632, 1.3712499, 1.370545 , 1.3783376,
       1.3685814, 1.3704404, 1.3712581, 1.3687035, 