This notebook is used to use KIM to predict ET from selected predictors using SRDB database.

In [1]:
# Libraries
from pathlib import Path
import pandas as pd
import numpy as np

from kim.map import KIM
from kim.data import Data
from kim.mapping_model import MLP

import jax

%load_ext autoreload
%autoreload 2


In [2]:
# device = jax.devices("cpu")[0]
# jax.config.update('jax_platform_name', 'cpu')

In [3]:
jax.devices()

An NVIDIA GPU may be present on this machine, but a CUDA-enabled jaxlib is not installed. Falling back to cpu.


[CpuDevice(id=0)]

# Read the data

In [4]:
# File and folder paths
f_forcing = Path("./data/US-Bi1-forcings.csv")
f_forcing_test = Path("./data/US-Bi1-forcings-test.csv")
f_obs = Path("./data/US-Bi1-fluxes.csv")
f_obs_test = Path("./data/US-Bi1-fluxes-test.csv")


In [5]:
x, x_eval = np.loadtxt(f_forcing, delimiter=','), np.loadtxt(f_forcing_test, delimiter=',')
y = pd.read_csv(f_obs)[['LE_F_MDS', 'NETRAD', 'H_F_MDS']].values
y_eval = pd.read_csv(f_obs_test)[['LE_F_MDS', 'NETRAD', 'H_F_MDS']].values


In [6]:
x.shape, y.shape, x_eval.shape, y_eval.shape

((35088, 14), (35088, 3), (26304, 14), (26304, 3))

# Configurations

In [7]:
Ns_train = 30000
hidden_activation = 'sigmoid'
final_activation = 'leaky_relu'
seed = 1024
seed_predict = 3636
seed_dl = 10
seed_model = 100
seed_shuffle = 1234
training_verbose = 1

n_models = 100


In [8]:
# Data configuration
data_params = {
    "xscaler_type": "standard",
    "yscaler_type": "standard",
    # "xscaler_type": "",
    # "yscaler_type": "",
}

# Sensitivity analysis configuration
sensitivity_params = {
    "method": "pc", "metric": "corr",
    "sst": True, "ntest": 100, "alpha": 0.05, "k": 3,
    "n_jobs": 100, "seed_shuffle": seed_shuffle,
}

# Mapping parameters for each test below
map_configs = {
    "model_type": MLP,
    'n_model': n_models,
    'ensemble_type': 'ens_random',
    'model_hp_choices': {
        "depth": [1,3,5,6],
        "width_size": [3,6,10]
        # "depth": [6],
        # "width_size": [6]
    },
    'model_hp_fixed': {
        "hidden_activation": hidden_activation,
        "final_activation": final_activation,
        "model_seed": seed_model
    },
    'optax_hp_choices': {
        'learning_rate': [0.01, 0.005, 0.003],
    },
    'optax_hp_fixed': {
        'nsteps': 300,
        'optimizer_type': 'adam',
    },
    'dl_hp_choices': {
    },
    'dl_hp_fixed': {
        'dl_seed': seed_dl,
        'num_train_sample': Ns_train,
        'batch_size': 64
    },
    'ens_seed': seed,
    'training_parallel': True,
    'parallel_config': {
        'n_jobs': n_models, 
        'backend': 'loky',
        'verbose': 1
    },
    'device': None,
}

# Exploratory data analysis

In [9]:
data = Data(x, y, **data_params)
data.calculate_sensitivity(**sensitivity_params)


  pid = os.fork()
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:,

In [10]:
data.sensitivity_mask

array([[False,  True,  True],
       [False, False, False],
       [ True,  True, False],
       [ True,  True,  True],
       [ True,  True,  True],
       [ True,  True,  True],
       [ True,  True,  True],
       [False, False, False],
       [False, False,  True],
       [ True,  True,  True],
       [ True,  True,  True],
       [False, False, False],
       [False, False, False],
       [ True,  True, False]])

In [11]:
data.cond_sensitivity_mask

array([[False,  True,  True],
       [False, False, False],
       [ True, False, False],
       [ True, False, False],
       [ True,  True,  True],
       [ True,  True, False],
       [False, False, False],
       [False, False, False],
       [False, False,  True],
       [ True, False, False],
       [False, False, False],
       [False, False, False],
       [False, False, False],
       [ True, False, False]])

# Train the inverse mapping

In [12]:
d = jax.numpy.array([0,3,5,7])
d.devices()

{CpuDevice(id=0)}

In [13]:
# Initialize three diffferent KIMs
kim1 = KIM(data, map_configs, map_option='many2many')
kim2 = KIM(data, map_configs, mask_option="sensitivity", map_option='many2one')
kim3 = KIM(data, map_configs, mask_option="cond_sensitivity", map_option='many2one')

# Train the mappings
kim1.train()
kim2.train()
kim3.train()



 Performing ensemble training in parallel with 100 model configurations...



[Parallel(n_jobs=100)]: Using backend LokyBackend with 100 concurrent workers.
An NVIDIA GPU may be present on this machine, but a CUDA-enabled jaxlib is not installed. Falling back to cpu.
An NVIDIA GPU may be present on this machine, but a CUDA-enabled jaxlib is not installed. Falling back to cpu.
An NVIDIA GPU may be present on this machine, but a CUDA-enabled jaxlib is not installed. Falling back to cpu.
An NVIDIA GPU may be present on this machine, but a CUDA-enabled jaxlib is not installed. Falling back to cpu.
An NVIDIA GPU may be present on this machine, but a CUDA-enabled jaxlib is not installed. Falling back to cpu.
An NVIDIA GPU may be present on this machine, but a CUDA-enabled jaxlib is not installed. Falling back to cpu.
An NVIDIA GPU may be present on this machine, but a CUDA-enabled jaxlib is not installed. Falling back to cpu.
An NVIDIA GPU may be present on this machine, but a CUDA-enabled jaxlib is not installed. Falling back to cpu.
An NVIDIA GPU may be present on t

Training completes.

 Performing ensemble training in parallel with 100 model configurations...



[Parallel(n_jobs=100)]: Using backend LokyBackend with 100 concurrent workers.
100%|██████████| 300/300 [07:15<00:00,  1.45s/it]
100%|██████████| 300/300 [07:19<00:00,  1.47s/it]
 70%|███████   | 211/300 [07:20<03:02,  2.05s/it][Parallel(n_jobs=100)]: Done   2 out of 100 | elapsed:  7.4min remaining: 360.6min
100%|██████████| 300/300 [07:22<00:00,  1.48s/it]
100%|██████████| 300/300 [07:22<00:00,  1.48s/it]
100%|██████████| 300/300 [07:23<00:00,  1.48s/it]
100%|██████████| 300/300 [07:23<00:00,  1.48s/it]
100%|██████████| 300/300 [07:24<00:00,  1.48s/it]
100%|██████████| 300/300 [07:24<00:00,  1.48s/it]
100%|██████████| 300/300 [07:25<00:00,  1.48s/it]
100%|██████████| 300/300 [07:24<00:00,  1.48s/it]
100%|██████████| 300/300 [07:26<00:00,  1.49s/it]1.92s/it]
100%|██████████| 300/300 [07:27<00:00,  1.49s/it]
100%|██████████| 300/300 [07:27<00:00,  1.49s/it]
100%|██████████| 300/300 [07:26<00:00,  1.49s/it]
100%|██████████| 300/300 [07:26<00:00,  1.49s/it]
100%|██████████| 300/300 [07:2

Training completes.

 Performing ensemble training in parallel with 100 model configurations...



100%|██████████| 300/300 [06:55<00:00,  1.39s/it]
100%|██████████| 300/300 [07:04<00:00,  1.41s/it]
 87%|████████▋ | 260/300 [07:04<01:25,  2.14s/it][Parallel(n_jobs=100)]: Done   2 out of 100 | elapsed:  7.1min remaining: 347.5min
100%|██████████| 300/300 [07:05<00:00,  1.42s/it]
100%|██████████| 300/300 [07:08<00:00,  1.43s/it]
100%|██████████| 300/300 [07:08<00:00,  1.43s/it]
100%|██████████| 300/300 [07:09<00:00,  1.43s/it]
100%|██████████| 300/300 [07:10<00:00,  1.44s/it]
100%|██████████| 300/300 [07:11<00:00,  1.44s/it]
100%|██████████| 300/300 [07:11<00:00,  1.44s/it]
100%|██████████| 300/300 [07:11<00:00,  1.44s/it]
100%|██████████| 300/300 [07:11<00:00,  1.44s/it]
100%|██████████| 300/300 [07:12<00:00,  1.44s/it]
100%|██████████| 300/300 [07:13<00:00,  1.44s/it]
100%|██████████| 300/300 [07:13<00:00,  1.44s/it]
100%|██████████| 300/300 [07:13<00:00,  1.44s/it]
100%|██████████| 300/300 [07:14<00:00,  1.45s/it]
100%|██████████| 300/300 [07:14<00:00,  1.45s/it]
100%|██████████| 3

Training completes.

 Performing ensemble training in parallel with 100 model configurations...



100%|██████████| 300/300 [05:49<00:00,  1.17s/it]
100%|██████████| 300/300 [05:50<00:00,  1.17s/it]
 74%|███████▍  | 222/300 [05:50<02:06,  1.62s/it][Parallel(n_jobs=100)]: Done   2 out of 100 | elapsed:  5.9min remaining: 286.9min
100%|██████████| 300/300 [05:52<00:00,  1.17s/it]
100%|██████████| 300/300 [05:52<00:00,  1.17s/it]
100%|██████████| 300/300 [05:54<00:00,  1.18s/it]
100%|██████████| 300/300 [05:54<00:00,  1.18s/it]
100%|██████████| 300/300 [05:55<00:00,  1.18s/it]
100%|██████████| 300/300 [05:56<00:00,  1.19s/it]
100%|██████████| 300/300 [05:56<00:00,  1.19s/it]
100%|██████████| 300/300 [05:57<00:00,  1.19s/it]
100%|██████████| 300/300 [05:58<00:00,  1.19s/it]
100%|██████████| 300/300 [05:58<00:00,  1.20s/it]
100%|██████████| 300/300 [05:58<00:00,  1.20s/it]
100%|██████████| 300/300 [05:58<00:00,  1.20s/it]
100%|██████████| 300/300 [06:00<00:00,  1.20s/it]
100%|██████████| 300/300 [05:59<00:00,  1.20s/it]
100%|██████████| 300/300 [05:59<00:00,  1.20s/it]
100%|██████████| 3

Training completes.

 Performing ensemble training in parallel with 100 model configurations...



[Parallel(n_jobs=100)]: Using backend LokyBackend with 100 concurrent workers.
100%|██████████| 300/300 [07:32<00:00,  1.51s/it]
100%|██████████| 300/300 [07:34<00:00,  1.51s/it]
 98%|█████████▊| 294/300 [07:34<00:10,  1.68s/it][Parallel(n_jobs=100)]: Done   2 out of 100 | elapsed:  7.6min remaining: 371.3min
100%|██████████| 300/300 [07:36<00:00,  1.52s/it]
100%|██████████| 300/300 [07:37<00:00,  1.52s/it]
100%|██████████| 300/300 [07:37<00:00,  1.52s/it]
100%|██████████| 300/300 [07:38<00:00,  1.53s/it]
100%|██████████| 300/300 [07:39<00:00,  1.53s/it]
100%|██████████| 300/300 [07:39<00:00,  1.53s/it]
100%|██████████| 300/300 [07:40<00:00,  1.53s/it]
100%|██████████| 300/300 [07:40<00:00,  1.53s/it]
100%|██████████| 300/300 [07:40<00:00,  1.53s/it]
100%|██████████| 300/300 [07:41<00:00,  1.54s/it]
100%|██████████| 300/300 [07:42<00:00,  1.54s/it]
100%|██████████| 300/300 [07:41<00:00,  1.54s/it]
100%|██████████| 300/300 [07:43<00:00,  1.55s/it]
100%|██████████| 300/300 [07:44<00:00, 

Training completes.

 Performing ensemble training in parallel with 100 model configurations...



100%|██████████| 300/300 [07:52<00:00,  1.58s/it]
100%|██████████| 300/300 [07:58<00:00,  1.60s/it]
100%|█████████▉| 299/300 [07:58<00:01,  1.34s/it][Parallel(n_jobs=100)]: Done   2 out of 100 | elapsed:  8.0min remaining: 392.1min
100%|██████████| 300/300 [07:59<00:00,  1.60s/it]
100%|██████████| 300/300 [07:59<00:00,  1.60s/it]
100%|██████████| 300/300 [08:00<00:00,  1.60s/it]
100%|██████████| 300/300 [08:00<00:00,  1.60s/it]
100%|██████████| 300/300 [08:00<00:00,  1.60s/it]
100%|██████████| 300/300 [08:01<00:00,  1.61s/it]
100%|██████████| 300/300 [08:02<00:00,  1.61s/it]
100%|██████████| 300/300 [08:03<00:00,  1.61s/it]
100%|██████████| 300/300 [08:03<00:00,  1.61s/it]
100%|██████████| 300/300 [08:04<00:00,  1.61s/it]
100%|██████████| 300/300 [08:04<00:00,  1.61s/it]
100%|██████████| 300/300 [08:05<00:00,  1.62s/it]
100%|██████████| 300/300 [08:05<00:00,  1.62s/it]
100%|██████████| 300/300 [08:06<00:00,  1.62s/it]
100%|██████████| 300/300 [08:06<00:00,  1.62s/it]
100%|██████████| 3

Training completes.

 Performing ensemble training in parallel with 100 model configurations...



100%|██████████| 300/300 [05:55<00:00,  1.19s/it]
100%|██████████| 300/300 [06:01<00:00,  1.20s/it]
 97%|█████████▋| 291/300 [06:01<00:09,  1.08s/it][Parallel(n_jobs=100)]: Done   2 out of 100 | elapsed:  6.0min remaining: 295.3min
100%|██████████| 300/300 [06:03<00:00,  1.21s/it]
100%|██████████| 300/300 [06:04<00:00,  1.22s/it]
100%|██████████| 300/300 [06:06<00:00,  1.22s/it]
100%|██████████| 300/300 [06:07<00:00,  1.23s/it]
100%|██████████| 300/300 [06:07<00:00,  1.23s/it]
100%|██████████| 300/300 [06:09<00:00,  1.23s/it]
100%|██████████| 300/300 [06:10<00:00,  1.23s/it]
100%|██████████| 300/300 [06:10<00:00,  1.23s/it]
100%|██████████| 300/300 [06:10<00:00,  1.24s/it]
100%|██████████| 300/300 [06:11<00:00,  1.24s/it]
100%|██████████| 300/300 [06:11<00:00,  1.24s/it]
100%|██████████| 300/300 [06:12<00:00,  1.24s/it]
100%|██████████| 300/300 [06:13<00:00,  1.24s/it]
100%|██████████| 300/300 [06:13<00:00,  1.25s/it]
100%|██████████| 300/300 [06:13<00:00,  1.24s/it]
100%|██████████| 3

Training completes.


100%|██████████| 300/300 [07:34<00:00,  1.52s/it]
100%|██████████| 300/300 [07:33<00:00,  1.51s/it]
[Parallel(n_jobs=100)]: Done 100 out of 100 | elapsed:  7.6min finished
