This notebook is used to use KIM to predict ET from selected predictors using eddy covariance data.

In [1]:
# Libraries
from pathlib import Path
import pandas as pd
import numpy as np

from kim.map import KIM
from kim.data import Data
from kim.mapping_model import MLP

import jax

%load_ext autoreload
%autoreload 2


In [2]:
# device = jax.devices("cpu")[0]
# jax.config.update('jax_platform_name', 'cpu')

In [3]:
jax.devices()

An NVIDIA GPU may be present on this machine, but a CUDA-enabled jaxlib is not installed. Falling back to cpu.


[CpuDevice(id=0)]

# Read the data

In [4]:
# File and folder paths
f_y = Path("./data/Output_fluxes.csv")
f_x = Path("./data/Input_forcings.csv")


In [5]:
df_x, df_y = pd.read_csv(f_x, index_col=0),pd.read_csv(f_y, index_col=0)
y_keys, x_keys = df_y.keys(), df_x.keys()
y, x = df_y.values, df_x.values

In [6]:
x.shape, y.shape


((35088, 14), (35088, 3))

# Configurations

## Preliminary analysis configuration

In [7]:
seed_shuffle = 1234
f_data_save = Path("./results/data")


In [8]:
# Data configuration
data_params = {
    "xscaler_type": "minmax",
    "yscaler_type": "minmax",
}

# Sensitivity analysis configuration
sensitivity_params = {
    "method": "pc", "metric": "it-knn",
    "sst": True, "ntest": 100, "alpha": 0.05, "k": 3,
    "n_jobs": 50, "seed_shuffle": seed_shuffle,
    "verbose": 1
}


## Ensemble learning configuration

In [9]:
Ns_train = 25000
Ns_val = 5000
hidden_activation = 'sigmoid'
final_activation = 'leaky_relu'
seed_ens = 1024
seed_predict = 3636
seed_dl = 10
seed_model = 100
training_verbose = 1
n_models = 100
n_jobs = 50

f_kim_save1 = Path("./results/map_many2many")
f_kim_save2 = Path("./results/map_many2one")
f_kim_save3 = Path("./results/map_many2one_cond")


In [10]:
# Mapping parameters for each test below
map_configs = {
    "model_type": MLP,
    'n_model': n_models,
    'ensemble_type': 'ens_random',
    'model_hp_choices': {
        "depth": [1,3,5,6],
        "width_size": [3,6,10]
    },
    'model_hp_fixed': {
        "hidden_activation": hidden_activation,
        "final_activation": final_activation,
        "model_seed": seed_model
    },
    'optax_hp_choices': {
        'learning_rate': [0.01, 0.005, 0.003],
    },
    'optax_hp_fixed': {
        'nsteps': 300,
        'optimizer_type': 'adam',
    },
    'dl_hp_choices': {
    },
    'dl_hp_fixed': {
        'dl_seed': seed_dl,
        'num_train_sample': Ns_train,
        'num_val_sample': Ns_val,
        'batch_size': 64
    },
    'ens_seed': seed_ens,
    'training_parallel': True,
    'parallel_config': {
        'n_jobs': n_jobs, 
        'backend': 'loky',
        'verbose': 1
    },
    'device': None,
}

# Exploratory data analysis

In [11]:
data = Data(x, y, **data_params)
data.calculate_sensitivity(**sensitivity_params)
# Save the sensitivity analysis to disk
data.save(f_data_save)


Using the kNN-based information theoretic metrics ...
Performing pairwise analysis to remove insensitive inputs ...


  pid = os.fork()
100%|██████████| 14/14 [15:37<00:00, 66.98s/it]  


Performing conditional independence testing to remove redundant inputs ...


In [12]:
data.sensitivity_mask

array([[False,  True, False],
       [ True,  True,  True],
       [ True,  True,  True],
       [ True,  True,  True],
       [ True,  True,  True],
       [ True,  True,  True],
       [ True,  True,  True],
       [ True,  True,  True],
       [ True,  True,  True],
       [ True,  True,  True],
       [ True,  True,  True],
       [ True,  True,  True],
       [False, False, False],
       [ True,  True,  True]])

In [13]:
data.cond_sensitivity_mask

array([[False,  True, False],
       [ True,  True,  True],
       [False,  True,  True],
       [ True,  True,  True],
       [ True,  True,  True],
       [ True,  True,  True],
       [ True,  True,  True],
       [ True,  True,  True],
       [ True,  True,  True],
       [ True,  True,  True],
       [False,  True,  True],
       [ True,  True,  True],
       [False, False, False],
       [ True,  True,  True]])

# Train the inverse mapping

In [14]:
d = jax.numpy.array([0,3,5,7])
d.devices()

{CpuDevice(id=0)}

In [15]:
# Initialize three diffferent KIMs
kim1 = KIM(data, map_configs, map_option='many2many')
kim2 = KIM(data, map_configs, mask_option="sensitivity", map_option='many2one')
kim3 = KIM(data, map_configs, mask_option="cond_sensitivity", map_option='many2one')

# Train the mappings
kim1.train()
kim2.train()
kim3.train()



 Performing ensemble training in parallel with 100 model configurations...



[Parallel(n_jobs=50)]: Using backend LokyBackend with 50 concurrent workers.
  pid = os.fork()
100%|██████████| 300/300 [02:15<00:00,  2.22it/s]]
100%|██████████| 300/300 [02:16<00:00,  2.21it/s]
100%|██████████| 300/300 [02:16<00:00,  2.19it/s]
100%|██████████| 300/300 [02:17<00:00,  2.19it/s]
100%|██████████| 300/300 [02:17<00:00,  2.18it/s]
100%|██████████| 300/300 [02:17<00:00,  2.18it/s]
100%|██████████| 300/300 [02:18<00:00,  2.17it/s]
100%|██████████| 300/300 [02:18<00:00,  2.17it/s]
100%|██████████| 300/300 [02:18<00:00,  2.16it/s]
100%|██████████| 300/300 [02:19<00:00,  2.16it/s]
100%|██████████| 300/300 [02:19<00:00,  2.15it/s]
100%|██████████| 300/300 [02:20<00:00,  2.14it/s]
100%|██████████| 300/300 [02:20<00:00,  2.13it/s]
100%|██████████| 300/300 [02:33<00:00,  1.96it/s]
100%|██████████| 300/300 [02:33<00:00,  1.96it/s]
100%|██████████| 300/300 [02:34<00:00,  1.95it/s]
100%|██████████| 300/300 [02:34<00:00,  1.94it/s]
100%|██████████| 300/300 [02:35<00:00,  1.93it/s]
100%

Training completes.

 Performing ensemble training in parallel with 100 model configurations...



100%|██████████| 300/300 [02:14<00:00,  2.22it/s]
100%|██████████| 300/300 [02:15<00:00,  2.21it/s]
100%|██████████| 300/300 [02:16<00:00,  2.19it/s]
100%|██████████| 300/300 [02:16<00:00,  2.19it/s]
100%|██████████| 300/300 [02:17<00:00,  2.18it/s]
100%|██████████| 300/300 [02:17<00:00,  2.18it/s]
100%|██████████| 300/300 [02:17<00:00,  2.18it/s]
100%|██████████| 300/300 [02:18<00:00,  2.17it/s]
100%|██████████| 300/300 [02:18<00:00,  2.17it/s]
100%|██████████| 300/300 [02:18<00:00,  2.17it/s]
100%|██████████| 300/300 [02:18<00:00,  2.17it/s]
100%|██████████| 300/300 [02:18<00:00,  2.16it/s]
100%|██████████| 300/300 [02:20<00:00,  2.13it/s]
100%|██████████| 300/300 [02:34<00:00,  1.95it/s]
100%|██████████| 300/300 [02:34<00:00,  1.94it/s]
100%|██████████| 300/300 [02:35<00:00,  1.93it/s]
100%|██████████| 300/300 [02:35<00:00,  1.93it/s]
100%|██████████| 300/300 [02:36<00:00,  1.92it/s]
100%|██████████| 300/300 [02:36<00:00,  1.92it/s]
100%|██████████| 300/300 [02:36<00:00,  1.92it/s]


Training completes.

 Performing ensemble training in parallel with 100 model configurations...



100%|██████████| 300/300 [02:15<00:00,  2.21it/s]
100%|██████████| 300/300 [02:15<00:00,  2.21it/s]
100%|██████████| 300/300 [02:17<00:00,  2.19it/s]
100%|██████████| 300/300 [02:17<00:00,  2.18it/s]
100%|██████████| 300/300 [02:17<00:00,  2.18it/s]
100%|██████████| 300/300 [02:17<00:00,  2.18it/s]
100%|██████████| 300/300 [02:18<00:00,  2.17it/s]
100%|██████████| 300/300 [02:18<00:00,  2.17it/s]
100%|██████████| 300/300 [02:18<00:00,  2.17it/s]
100%|██████████| 300/300 [02:18<00:00,  2.16it/s]
100%|██████████| 300/300 [02:18<00:00,  2.16it/s]
100%|██████████| 300/300 [02:18<00:00,  2.16it/s]
100%|██████████| 300/300 [02:19<00:00,  2.16it/s]
100%|██████████| 300/300 [02:33<00:00,  1.96it/s]
100%|██████████| 300/300 [02:33<00:00,  1.96it/s]
100%|██████████| 300/300 [02:33<00:00,  1.95it/s]
100%|██████████| 300/300 [02:34<00:00,  1.94it/s]
100%|██████████| 300/300 [02:34<00:00,  1.94it/s]
100%|██████████| 300/300 [02:35<00:00,  1.93it/s]
100%|██████████| 300/300 [02:35<00:00,  1.93it/s]


Training completes.

 Performing ensemble training in parallel with 100 model configurations...



100%|██████████| 300/300 [02:15<00:00,  2.22it/s]
100%|██████████| 300/300 [02:15<00:00,  2.21it/s]
100%|██████████| 300/300 [02:16<00:00,  2.20it/s]
100%|██████████| 300/300 [02:16<00:00,  2.19it/s]
100%|██████████| 300/300 [02:17<00:00,  2.18it/s]
100%|██████████| 300/300 [02:17<00:00,  2.18it/s]
100%|██████████| 300/300 [02:17<00:00,  2.18it/s]
100%|██████████| 300/300 [02:17<00:00,  2.17it/s]
100%|██████████| 300/300 [02:18<00:00,  2.16it/s]
100%|██████████| 300/300 [02:18<00:00,  2.16it/s]
100%|██████████| 300/300 [02:19<00:00,  2.16it/s]
100%|██████████| 300/300 [02:19<00:00,  2.15it/s]
100%|██████████| 300/300 [02:19<00:00,  2.15it/s]
100%|██████████| 300/300 [02:34<00:00,  1.94it/s]
100%|██████████| 300/300 [02:34<00:00,  1.94it/s]
100%|██████████| 300/300 [02:35<00:00,  1.93it/s]
100%|██████████| 300/300 [02:35<00:00,  1.93it/s]
100%|██████████| 300/300 [02:35<00:00,  1.93it/s]
100%|██████████| 300/300 [02:36<00:00,  1.92it/s]
100%|██████████| 300/300 [02:37<00:00,  1.91it/s]


Training completes.

 Performing ensemble training in parallel with 100 model configurations...



100%|██████████| 300/300 [02:15<00:00,  2.21it/s]
100%|██████████| 300/300 [02:16<00:00,  2.20it/s]
100%|██████████| 300/300 [02:16<00:00,  2.20it/s]
100%|██████████| 300/300 [02:16<00:00,  2.20it/s]
100%|██████████| 300/300 [02:16<00:00,  2.20it/s]
100%|██████████| 300/300 [02:17<00:00,  2.19it/s]
100%|██████████| 300/300 [02:17<00:00,  2.18it/s]
100%|██████████| 300/300 [02:17<00:00,  2.18it/s]
100%|██████████| 300/300 [02:17<00:00,  2.17it/s]
100%|██████████| 300/300 [02:18<00:00,  2.17it/s]
100%|██████████| 300/300 [02:18<00:00,  2.17it/s]
100%|██████████| 300/300 [02:18<00:00,  2.16it/s]
100%|██████████| 300/300 [02:19<00:00,  2.15it/s]
100%|██████████| 300/300 [02:33<00:00,  1.95it/s]
100%|██████████| 300/300 [02:34<00:00,  1.95it/s]
100%|██████████| 300/300 [02:34<00:00,  1.95it/s]
100%|██████████| 300/300 [02:34<00:00,  1.94it/s]
100%|██████████| 300/300 [02:35<00:00,  1.94it/s]
100%|██████████| 300/300 [02:35<00:00,  1.93it/s]
100%|██████████| 300/300 [02:35<00:00,  1.92it/s]


Training completes.

 Performing ensemble training in parallel with 100 model configurations...



100%|██████████| 300/300 [02:14<00:00,  2.23it/s]
100%|██████████| 300/300 [02:15<00:00,  2.22it/s]
100%|██████████| 300/300 [02:16<00:00,  2.20it/s]
100%|██████████| 300/300 [02:16<00:00,  2.19it/s]
100%|██████████| 300/300 [02:17<00:00,  2.19it/s]
100%|██████████| 300/300 [02:17<00:00,  2.18it/s]
100%|██████████| 300/300 [02:17<00:00,  2.18it/s]
100%|██████████| 300/300 [02:18<00:00,  2.17it/s]
100%|██████████| 300/300 [02:18<00:00,  2.17it/s]
100%|██████████| 300/300 [02:18<00:00,  2.17it/s]
100%|██████████| 300/300 [02:18<00:00,  2.16it/s]
100%|██████████| 300/300 [02:19<00:00,  2.16it/s]
100%|██████████| 300/300 [02:20<00:00,  2.13it/s]
100%|██████████| 300/300 [02:32<00:00,  1.97it/s]
100%|██████████| 300/300 [02:33<00:00,  1.95it/s]
100%|██████████| 300/300 [02:34<00:00,  1.94it/s]
100%|██████████| 300/300 [02:34<00:00,  1.94it/s]
100%|██████████| 300/300 [02:35<00:00,  1.94it/s]
100%|██████████| 300/300 [02:35<00:00,  1.93it/s]
100%|██████████| 300/300 [02:36<00:00,  1.92it/s]


Training completes.

 Performing ensemble training in parallel with 100 model configurations...



100%|██████████| 300/300 [02:14<00:00,  2.23it/s]
100%|██████████| 300/300 [02:16<00:00,  2.20it/s]
100%|██████████| 300/300 [02:16<00:00,  2.20it/s]
100%|██████████| 300/300 [02:16<00:00,  2.19it/s]
100%|██████████| 300/300 [02:16<00:00,  2.19it/s]
100%|██████████| 300/300 [02:17<00:00,  2.18it/s]
100%|██████████| 300/300 [02:18<00:00,  2.17it/s]
100%|██████████| 300/300 [02:18<00:00,  2.17it/s]
100%|██████████| 300/300 [02:18<00:00,  2.16it/s]
100%|██████████| 300/300 [02:18<00:00,  2.16it/s]
100%|██████████| 300/300 [02:18<00:00,  2.16it/s]
100%|██████████| 300/300 [02:20<00:00,  2.14it/s]
100%|██████████| 300/300 [02:20<00:00,  2.13it/s]
100%|██████████| 300/300 [02:34<00:00,  1.95it/s]
100%|██████████| 300/300 [02:34<00:00,  1.94it/s]
100%|██████████| 300/300 [02:34<00:00,  1.94it/s]
100%|██████████| 300/300 [02:35<00:00,  1.93it/s]
100%|██████████| 300/300 [02:35<00:00,  1.93it/s]
100%|██████████| 300/300 [02:35<00:00,  1.92it/s]
100%|██████████| 300/300 [02:36<00:00,  1.92it/s]


Training completes.


100%|██████████| 300/300 [03:02<00:00,  1.64it/s]
[Parallel(n_jobs=50)]: Done 100 out of 100 | elapsed:  6.2min finished


In [16]:
# Save 
kim1.save(f_kim_save1)
kim2.save(f_kim_save2)
kim3.save(f_kim_save3)
