This notebook is used to use KIM to predict ET from selected predictors using eddy covariance data.

In [1]:
# Libraries
from pathlib import Path
import pandas as pd
import numpy as np

from kim.map import KIM
from kim.data import Data
from kim.mapping_model import MLP

import jax

%load_ext autoreload
%autoreload 2


In [2]:
# device = jax.devices("cpu")[0]
# jax.config.update('jax_platform_name', 'cpu')

In [3]:
jax.devices()

[CpuDevice(id=0)]

# Read the data

In [4]:
# File and folder paths
f_y = Path("./data/Output_fluxes_daily.csv")
f_x = Path("./data/Input_forcings_daily.csv")


In [5]:
df_x, df_y = pd.read_csv(f_x, index_col=0),pd.read_csv(f_y, index_col=0)
y_keys, x_keys = df_y.keys(), df_x.keys()
y, x = df_y.values, df_x.values

In [6]:
x.shape, y.shape


((1279, 10), (1279, 3))

# Configurations

## Preliminary analysis configuration

In [9]:
seed_shuffle = 1234
f_data_save = Path("./results/data_daily")


In [10]:
# Data configuration
data_params = {
    "xscaler_type": "minmax",
    "yscaler_type": "minmax",
}

# Sensitivity analysis configuration
sensitivity_params = {
    "method": "pc", "metric": "it-knn",
    "sst": True, "ntest": 100, "alpha": 0.05, "k": 3,
    "n_jobs": 50, "seed_shuffle": seed_shuffle,
    "verbose": 1
}


## Ensemble learning configuration

In [16]:
Ns_train = 365
Ns_val = 365
hidden_activation = 'sigmoid'
final_activation = 'leaky_relu'
seed_ens = 1024
seed_predict = 3636
seed_dl = 10
seed_model = 100
training_verbose = 1
n_models = 100
n_jobs = 50

f_kim_save1 = Path("./results/map_many2many_daily")
f_kim_save2 = Path("./results/map_many2one_daily")
f_kim_save3 = Path("./results/map_many2one_cond_daily")


In [17]:
# Mapping parameters for each test below
map_configs = {
    "model_type": MLP,
    'n_model': n_models,
    'ensemble_type': 'ens_random',
    'model_hp_choices': {
        "depth": [1,3,5,6],
        "width_size": [3,6,10]
    },
    'model_hp_fixed': {
        "hidden_activation": hidden_activation,
        "final_activation": final_activation,
        "model_seed": seed_model
    },
    'optax_hp_choices': {
        'learning_rate': [0.01, 0.005, 0.003],
    },
    'optax_hp_fixed': {
        'nsteps': 300,
        'optimizer_type': 'adam',
    },
    'dl_hp_choices': {
    },
    'dl_hp_fixed': {
        'dl_seed': seed_dl,
        'num_train_sample': Ns_train,
        'num_val_sample': Ns_val,
        'batch_size': 64
    },
    'ens_seed': seed_ens,
    'training_parallel': True,
    'parallel_config': {
        'n_jobs': n_jobs, 
        'backend': 'loky',
        'verbose': 1
    },
    'device': None,
}

# Exploratory data analysis

In [18]:
data = Data(x, y, **data_params)
data.calculate_sensitivity(**sensitivity_params)
# Save the sensitivity analysis to disk
data.save(f_data_save)


Using the kNN-based information theoretic metrics ...
Performing pairwise analysis to remove insensitive inputs ...


  pid = os.fork()
100%|██████████| 10/10 [07:43<00:00, 46.40s/it]


Performing conditional independence testing to remove redundant inputs ...


In [21]:
data.sensitivity_mask

array([[ True,  True,  True],
       [ True,  True,  True],
       [ True,  True,  True],
       [ True,  True,  True],
       [ True,  True,  True],
       [ True,  True,  True],
       [ True,  True,  True],
       [ True,  True,  True],
       [ True,  True,  True],
       [ True,  True,  True]])

In [22]:
data.cond_sensitivity_mask

array([[ True,  True,  True],
       [ True,  True,  True],
       [ True,  True,  True],
       [ True,  True,  True],
       [ True,  True, False],
       [ True,  True,  True],
       [ True,  True,  True],
       [ True,  True,  True],
       [ True,  True,  True],
       [ True,  True,  True]])

# Train the inverse mapping

In [23]:
d = jax.numpy.array([0,3,5,7])
d.devices()

{CpuDevice(id=0)}

In [24]:
# Initialize three diffferent KIMs
kim1 = KIM(data, map_configs, map_option='many2many')
kim2 = KIM(data, map_configs, mask_option="sensitivity", map_option='many2one')
kim3 = KIM(data, map_configs, mask_option="cond_sensitivity", map_option='many2one')

# Train the mappings
kim1.train()
kim2.train()
kim3.train()



 Performing ensemble training in parallel with 100 model configurations...



[Parallel(n_jobs=50)]: Using backend LokyBackend with 50 concurrent workers.
100%|██████████| 300/300 [00:04<00:00, 72.85it/s]
100%|██████████| 300/300 [00:04<00:00, 71.48it/s]
100%|██████████| 300/300 [00:04<00:00, 72.99it/s]
100%|██████████| 300/300 [00:04<00:00, 71.23it/s]
100%|██████████| 300/300 [00:04<00:00, 72.87it/s]
100%|██████████| 300/300 [00:04<00:00, 71.86it/s]
100%|██████████| 300/300 [00:04<00:00, 74.59it/s]
100%|██████████| 300/300 [00:04<00:00, 71.61it/s]
100%|██████████| 300/300 [00:04<00:00, 71.09it/s]
100%|██████████| 300/300 [00:04<00:00, 71.33it/s]
100%|██████████| 300/300 [00:04<00:00, 71.24it/s]
100%|██████████| 300/300 [00:04<00:00, 70.93it/s]
100%|██████████| 300/300 [00:04<00:00, 73.38it/s]
100%|██████████| 300/300 [00:04<00:00, 65.37it/s]
100%|██████████| 300/300 [00:04<00:00, 65.03it/s]
100%|██████████| 300/300 [00:04<00:00, 66.70it/s]
100%|██████████| 300/300 [00:04<00:00, 65.64it/s]
100%|██████████| 300/300 [00:04<00:00, 65.05it/s]
100%|██████████| 300/30

Training completes.

 Performing ensemble training in parallel with 100 model configurations...



100%|██████████| 300/300 [00:03<00:00, 79.41it/s]
100%|██████████| 300/300 [00:03<00:00, 78.68it/s]
100%|██████████| 300/300 [00:03<00:00, 78.56it/s]
100%|██████████| 300/300 [00:03<00:00, 77.90it/s]
100%|██████████| 300/300 [00:03<00:00, 77.50it/s]
100%|██████████| 300/300 [00:03<00:00, 78.69it/s]
100%|██████████| 300/300 [00:03<00:00, 75.69it/s]
100%|██████████| 300/300 [00:03<00:00, 79.14it/s]
100%|██████████| 300/300 [00:04<00:00, 74.45it/s]
100%|██████████| 300/300 [00:04<00:00, 74.21it/s]
100%|██████████| 300/300 [00:03<00:00, 76.63it/s]
100%|██████████| 300/300 [00:04<00:00, 74.49it/s]
100%|██████████| 300/300 [00:03<00:00, 75.90it/s]
100%|██████████| 300/300 [00:04<00:00, 68.31it/s]
100%|██████████| 300/300 [00:04<00:00, 68.19it/s]
100%|██████████| 300/300 [00:04<00:00, 67.89it/s]
100%|██████████| 300/300 [00:04<00:00, 65.85it/s]
100%|██████████| 300/300 [00:04<00:00, 65.83it/s]
100%|██████████| 300/300 [00:04<00:00, 65.73it/s]
100%|██████████| 300/300 [00:04<00:00, 66.30it/s]


Training completes.

 Performing ensemble training in parallel with 100 model configurations...



100%|██████████| 300/300 [00:05<00:00, 58.57it/s]
[Parallel(n_jobs=50)]: Done 100 out of 100 | elapsed:   55.2s finished
[Parallel(n_jobs=50)]: Using backend LokyBackend with 50 concurrent workers.
100%|██████████| 300/300 [00:03<00:00, 79.60it/s]
100%|██████████| 300/300 [00:03<00:00, 77.57it/s]
100%|██████████| 300/300 [00:04<00:00, 71.32it/s]
100%|██████████| 300/300 [00:03<00:00, 75.36it/s]
100%|██████████| 300/300 [00:04<00:00, 73.73it/s]
100%|██████████| 300/300 [00:04<00:00, 68.76it/s]
100%|██████████| 300/300 [00:04<00:00, 65.72it/s]
100%|██████████| 300/300 [00:04<00:00, 68.94it/s]
100%|██████████| 300/300 [00:04<00:00, 63.92it/s]
100%|██████████| 300/300 [00:05<00:00, 58.55it/s]
100%|██████████| 300/300 [00:05<00:00, 57.96it/s]
100%|██████████| 300/300 [00:05<00:00, 58.02it/s]
100%|██████████| 300/300 [00:05<00:00, 55.78it/s]
100%|██████████| 300/300 [00:05<00:00, 56.33it/s]
100%|██████████| 300/300 [00:05<00:00, 57.07it/s]
100%|██████████| 300/300 [00:05<00:00, 53.99it/s]
10

Training completes.

 Performing ensemble training in parallel with 100 model configurations...



100%|██████████| 300/300 [00:03<00:00, 80.91it/s]
100%|██████████| 300/300 [00:03<00:00, 79.95it/s]
100%|██████████| 300/300 [00:03<00:00, 79.63it/s]
100%|██████████| 300/300 [00:03<00:00, 78.95it/s]
100%|██████████| 300/300 [00:03<00:00, 78.15it/s]
100%|██████████| 300/300 [00:03<00:00, 76.90it/s]
100%|██████████| 300/300 [00:03<00:00, 75.23it/s]
100%|██████████| 300/300 [00:03<00:00, 78.45it/s]
100%|██████████| 300/300 [00:03<00:00, 77.44it/s]
100%|██████████| 300/300 [00:04<00:00, 71.86it/s]
100%|██████████| 300/300 [00:03<00:00, 77.28it/s]
100%|██████████| 300/300 [00:04<00:00, 71.35it/s]
100%|██████████| 300/300 [00:03<00:00, 76.52it/s]
100%|██████████| 300/300 [00:04<00:00, 70.72it/s]
100%|██████████| 300/300 [00:03<00:00, 77.90it/s]
100%|██████████| 300/300 [00:04<00:00, 70.35it/s]
100%|██████████| 300/300 [00:04<00:00, 74.71it/s]
100%|██████████| 300/300 [00:04<00:00, 69.34it/s]
100%|██████████| 300/300 [00:04<00:00, 66.88it/s]
100%|██████████| 300/300 [00:04<00:00, 66.99it/s]


Training completes.

 Performing ensemble training in parallel with 100 model configurations...



100%|██████████| 300/300 [00:03<00:00, 81.91it/s]
100%|██████████| 300/300 [00:03<00:00, 81.87it/s]
100%|██████████| 300/300 [00:03<00:00, 80.50it/s]
100%|██████████| 300/300 [00:03<00:00, 79.40it/s]
100%|██████████| 300/300 [00:03<00:00, 78.79it/s]
100%|██████████| 300/300 [00:03<00:00, 79.01it/s]
100%|██████████| 300/300 [00:03<00:00, 78.54it/s]
100%|██████████| 300/300 [00:03<00:00, 78.06it/s]
100%|██████████| 300/300 [00:03<00:00, 77.90it/s]
100%|██████████| 300/300 [00:03<00:00, 77.21it/s]
100%|██████████| 300/300 [00:03<00:00, 77.34it/s]
100%|██████████| 300/300 [00:04<00:00, 72.99it/s]
100%|██████████| 300/300 [00:03<00:00, 78.33it/s]
100%|██████████| 300/300 [00:03<00:00, 77.27it/s]
100%|██████████| 300/300 [00:04<00:00, 71.30it/s]
100%|██████████| 300/300 [00:04<00:00, 71.07it/s]
100%|██████████| 300/300 [00:04<00:00, 70.49it/s]
100%|██████████| 300/300 [00:04<00:00, 69.31it/s]
100%|██████████| 300/300 [00:04<00:00, 70.34it/s]
100%|██████████| 300/300 [00:04<00:00, 68.68it/s]


Training completes.

 Performing ensemble training in parallel with 100 model configurations...



100%|██████████| 300/300 [00:03<00:00, 81.66it/s]
100%|██████████| 300/300 [00:03<00:00, 79.88it/s]
100%|██████████| 300/300 [00:03<00:00, 79.74it/s]
100%|██████████| 300/300 [00:03<00:00, 79.51it/s]
100%|██████████| 300/300 [00:03<00:00, 79.48it/s]
100%|██████████| 300/300 [00:03<00:00, 79.44it/s]
100%|██████████| 300/300 [00:03<00:00, 79.26it/s]
100%|██████████| 300/300 [00:03<00:00, 78.46it/s]
100%|██████████| 300/300 [00:03<00:00, 78.02it/s]
100%|██████████| 300/300 [00:03<00:00, 77.93it/s]
100%|██████████| 300/300 [00:03<00:00, 77.81it/s]
100%|██████████| 300/300 [00:04<00:00, 73.50it/s]
100%|██████████| 300/300 [00:03<00:00, 75.43it/s]
100%|██████████| 300/300 [00:04<00:00, 70.19it/s]
100%|██████████| 300/300 [00:04<00:00, 69.18it/s]
100%|██████████| 300/300 [00:04<00:00, 70.36it/s]
100%|██████████| 300/300 [00:04<00:00, 70.43it/s]
100%|██████████| 300/300 [00:04<00:00, 68.04it/s]
 83%|████████▎ | 249/300 [00:04<00:00, 71.86it/s]
100%|██████████| 300/300 [00:04<00:00, 68.80it/s]


Training completes.

 Performing ensemble training in parallel with 100 model configurations...



100%|██████████| 300/300 [00:03<00:00, 80.36it/s]
100%|██████████| 300/300 [00:03<00:00, 79.99it/s]
100%|██████████| 300/300 [00:03<00:00, 78.77it/s]
100%|██████████| 300/300 [00:03<00:00, 78.69it/s]
100%|██████████| 300/300 [00:03<00:00, 78.51it/s]
100%|██████████| 300/300 [00:03<00:00, 78.34it/s]
100%|██████████| 300/300 [00:03<00:00, 78.27it/s]
100%|██████████| 300/300 [00:03<00:00, 78.42it/s]
100%|██████████| 300/300 [00:03<00:00, 78.16it/s]
100%|██████████| 300/300 [00:03<00:00, 77.98it/s]
100%|██████████| 300/300 [00:03<00:00, 78.27it/s]
100%|██████████| 300/300 [00:03<00:00, 77.99it/s]
100%|██████████| 300/300 [00:03<00:00, 76.55it/s]
 72%|███████▏  | 217/300 [00:04<00:01, 72.74it/s]
100%|██████████| 300/300 [00:04<00:00, 69.97it/s]
100%|██████████| 300/300 [00:04<00:00, 69.46it/s]
100%|██████████| 300/300 [00:04<00:00, 69.32it/s]
100%|██████████| 300/300 [00:04<00:00, 69.47it/s]
100%|██████████| 300/300 [00:04<00:00, 69.16it/s]
100%|██████████| 300/300 [00:04<00:00, 68.87it/s]


Training completes.


100%|██████████| 300/300 [00:05<00:00, 58.90it/s]
[Parallel(n_jobs=50)]: Done 100 out of 100 | elapsed:   11.2s finished


In [26]:
# Save 
kim1.save(f_kim_save1)
kim2.save(f_kim_save2)
kim3.save(f_kim_save3)
