# Permutation Importance on Model M036

In [43]:
import pandas as pd
import catboost
from sklearn.model_selection import GroupKFold
from sklearn.metrics import mean_absolute_error
from tqdm import tqdm
import numpy as np

In [56]:
def permutation_importance(model, X_val, y_val, metric, threshold=0.005,
                           minimize=True, verbose=True):
    results = {}
    
    y_pred = model.predict(X_val)
    
    results['base_score'] = metric(y_val, y_pred)
    if verbose:
        print(f'Base score {results["base_score"]:.5}')

    
    for col in tqdm(X_val.columns):
        freezed_col = X_val[col].copy()

        X_val[col] = np.random.permutation(X_val[col])
        preds = model.predict(X_val, thread_count=20)
        results[col] = metric(y_val, preds)

        X_val[col] = freezed_col
        
        if verbose:
            print(f'column: {col} - {results[col]:.5}')
    
    if minimize:
        bad_features = [k for k in results if results[k] < results['base_score'] + threshold]
    else:
        bad_features = [k for k in results if results[k] > results['base_score'] + threshold]
    bad_features.remove('base_score')
    
    return results, bad_features

In [4]:
model = catboost.CatBoostRegressor()

In [13]:
model.load_model('../models/M036/M036-0705_2317-1JHC-1.model')

<catboost.core.CatBoostRegressor at 0x7fcfa9eeb080>

In [14]:
df = pd.read_parquet('../data/FE014/FE014-train-1JHC.parquet')

In [22]:
train_df = pd.read_parquet('../data/FE008_train.parquet') # only loading for skeleton not features
mol_group = train_df[['molecule_name','type']].copy()

In [51]:
N_FOLDS = 2
folds = GroupKFold(n_splits=N_FOLDS)
bond_type = '1JHC'
X = df[model.feature_names_]
y = df['scalar_coupling_constant']
mol_group_type = mol_group.loc[mol_group['type'] == bond_type]['molecule_name']

for fold_n, (train_idx, valid_idx) in enumerate(folds.split(X, groups=mol_group_type)):
    X_train, X_valid = X.iloc[train_idx], X.iloc[valid_idx]
    y_train, y_valid = y.iloc[train_idx], y.iloc[valid_idx]
    X_valid = X_valid.copy()
    model = catboost.CatBoostRegressor()
    model.load_model('../models/M036/M036-0705_2317-1JHC-{}.model'.format(fold_n + 1))
    results, bad_features = permutation_importance(model, X_valid, y_valid, metric=mean_absolute_error)
    break



  0%|          | 0/178 [00:00<?, ?it/s][A[A

Base score 0.61331




  1%|          | 1/178 [00:31<1:34:20, 31.98s/it][A[A

column: atom1_valence - 1.776




  1%|          | 2/178 [01:06<1:36:07, 32.77s/it][A[A

column: atom1_spin_multiplicity - 0.6139




  2%|▏         | 3/178 [01:39<1:35:36, 32.78s/it][A[A

column: distance - 1.1325




  2%|▏         | 4/178 [02:14<1:36:51, 33.40s/it][A[A

column: is_bond - 0.61331




  3%|▎         | 5/178 [02:49<1:37:29, 33.81s/it][A[A

column: distance_closest_to_0 - 2.5886




  3%|▎         | 6/178 [03:22<1:36:48, 33.77s/it][A[A

column: distance_2nd_closest_to_0 - 1.2727




  4%|▍         | 7/178 [03:54<1:34:51, 33.28s/it][A[A

column: distance_3rd_closest_to_0 - 1.0334




  4%|▍         | 8/178 [04:30<1:36:14, 33.97s/it][A[A

column: distance_4th_closest_to_0 - 0.86064




  5%|▌         | 9/178 [05:01<1:33:27, 33.18s/it][A[A

column: distance_5th_closest_to_0 - 0.80507




  6%|▌         | 10/178 [05:34<1:32:48, 33.15s/it][A[A

column: distance_6th_closest_to_0 - 0.79607




  6%|▌         | 11/178 [06:06<1:31:03, 32.72s/it][A[A

column: distance_7th_closest_to_0 - 0.82952




  7%|▋         | 12/178 [06:38<1:29:30, 32.35s/it][A[A

column: distance_8th_closest_to_0 - 0.79031




  7%|▋         | 13/178 [07:09<1:28:32, 32.20s/it][A[A

column: distance_9th_closest_to_0 - 0.72495




  8%|▊         | 14/178 [07:42<1:28:03, 32.21s/it][A[A

column: distance_10th_closest_to_0 - 0.70719




  8%|▊         | 15/178 [08:13<1:26:50, 31.97s/it][A[A

column: distance_closest_to_1 - 1.1037




  9%|▉         | 16/178 [08:44<1:25:43, 31.75s/it][A[A

column: distance_2nd_closest_to_1 - 1.8414




 10%|▉         | 17/178 [09:18<1:26:36, 32.27s/it][A[A

column: distance_3rd_closest_to_1 - 1.6567




 10%|█         | 18/178 [09:49<1:25:30, 32.07s/it][A[A

column: is_bond_3rd_closest_to_1 - 1.2667




 11%|█         | 19/178 [10:20<1:23:40, 31.57s/it][A[A

column: distance_4th_closest_to_1 - 1.0289




 11%|█         | 20/178 [10:50<1:22:25, 31.30s/it][A[A

column: distance_5th_closest_to_1 - 0.99229




 12%|█▏        | 21/178 [11:23<1:22:46, 31.63s/it][A[A

column: distance_6th_closest_to_1 - 0.88798




 12%|█▏        | 22/178 [11:52<1:20:33, 30.98s/it][A[A

column: distance_7th_closest_to_1 - 0.87393




 13%|█▎        | 23/178 [12:23<1:19:50, 30.90s/it][A[A

column: distance_8th_closest_to_1 - 1.0455




 13%|█▎        | 24/178 [12:53<1:18:18, 30.51s/it][A[A

column: distance_9th_closest_to_1 - 0.92209




 14%|█▍        | 25/178 [13:24<1:18:27, 30.77s/it][A[A

column: distance_10th_closest_to_1 - 0.88003




 15%|█▍        | 26/178 [13:58<1:20:25, 31.74s/it][A[A

column: closest_to_0_atomic_mass - 0.61331




 15%|█▌        | 27/178 [14:28<1:18:45, 31.30s/it][A[A

column: closest_to_0_valence - 1.6835




 16%|█▌        | 28/178 [14:59<1:17:46, 31.11s/it][A[A

column: closest_to_0_spin_multiplicity - 0.62148




 16%|█▋        | 29/178 [15:29<1:16:39, 30.87s/it][A[A

column: 2nd_closest_to_0_atomic_mass - 2.1351




 17%|█▋        | 30/178 [15:58<1:14:56, 30.38s/it][A[A

column: 2nd_closest_to_0_valence - 0.6762




 17%|█▋        | 31/178 [16:31<1:16:11, 31.10s/it][A[A

column: 2nd_closest_to_0_spin_multiplicity - 0.61361




 18%|█▊        | 32/178 [17:00<1:13:40, 30.28s/it][A[A

column: 3rd_closest_to_0_atomic_mass - 0.65557




 19%|█▊        | 33/178 [17:32<1:14:27, 30.81s/it][A[A

column: 3rd_closest_to_0_valence - 1.0774




 19%|█▉        | 34/178 [18:02<1:13:32, 30.64s/it][A[A

column: 3rd_closest_to_0_spin_multiplicity - 0.61615




 20%|█▉        | 35/178 [18:33<1:13:15, 30.74s/it][A[A

column: 4th_closest_to_0_atomic_mass - 0.63553




 20%|██        | 36/178 [19:03<1:12:02, 30.44s/it][A[A

column: 4th_closest_to_0_valence - 0.71002




 21%|██        | 37/178 [19:34<1:12:20, 30.78s/it][A[A

column: 4th_closest_to_0_spin_multiplicity - 0.61615




 21%|██▏       | 38/178 [20:06<1:12:15, 30.97s/it][A[A

column: 5th_closest_to_0_atomic_mass - 0.63519




 22%|██▏       | 39/178 [20:37<1:12:12, 31.17s/it][A[A

column: 5th_closest_to_0_valence - 0.63269




 22%|██▏       | 40/178 [21:10<1:13:05, 31.78s/it][A[A

column: 5th_closest_to_0_spin_multiplicity - 0.61618




 23%|██▎       | 41/178 [21:43<1:12:58, 31.96s/it][A[A

column: 6th_closest_to_0_atomic_mass - 0.72394




 24%|██▎       | 42/178 [22:13<1:11:21, 31.48s/it][A[A

column: 6th_closest_to_0_valence - 0.66566




 24%|██▍       | 43/178 [22:43<1:09:27, 30.87s/it][A[A

column: 6th_closest_to_0_spin_multiplicity - 0.61352




 25%|██▍       | 44/178 [23:13<1:08:33, 30.70s/it][A[A

column: 7th_closest_to_0_atomic_mass - 0.61864




 25%|██▌       | 45/178 [23:46<1:09:33, 31.38s/it][A[A

column: 7th_closest_to_0_valence - 0.63375




 26%|██▌       | 46/178 [24:16<1:08:17, 31.04s/it][A[A

column: 7th_closest_to_0_spin_multiplicity - 0.61382




 26%|██▋       | 47/178 [24:46<1:06:56, 30.66s/it][A[A

column: 8th_closest_to_0_atomic_mass - 0.62349




 27%|██▋       | 48/178 [25:18<1:07:31, 31.17s/it][A[A

column: 8th_closest_to_0_valence - 0.62552




 28%|██▊       | 49/178 [25:49<1:06:26, 30.90s/it][A[A

column: 8th_closest_to_0_spin_multiplicity - 0.61389




 28%|██▊       | 50/178 [26:19<1:05:32, 30.72s/it][A[A

column: 9th_closest_to_0_atomic_mass - 0.63851




 29%|██▊       | 51/178 [26:50<1:05:00, 30.71s/it][A[A

column: 9th_closest_to_0_valence - 0.63531




 29%|██▉       | 52/178 [27:20<1:04:34, 30.75s/it][A[A

column: 9th_closest_to_0_spin_multiplicity - 0.61359




 30%|██▉       | 53/178 [27:53<1:05:19, 31.36s/it][A[A

column: 10th_closest_to_0_atomic_mass - 0.61785




 30%|███       | 54/178 [28:25<1:05:15, 31.58s/it][A[A

column: 10th_closest_to_0_valence - 0.6202




 31%|███       | 55/178 [28:55<1:03:45, 31.10s/it][A[A

column: 10th_closest_to_0_spin_multiplicity - 0.61347




 31%|███▏      | 56/178 [29:28<1:04:12, 31.58s/it][A[A

column: closest_to_1_atomic_mass - 0.61331




 32%|███▏      | 57/178 [29:55<1:00:50, 30.17s/it][A[A

column: closest_to_1_valence - 0.61331




 33%|███▎      | 58/178 [30:24<59:42, 29.85s/it]  [A[A

column: closest_to_1_spin_multiplicity - 0.61331




 33%|███▎      | 59/178 [30:54<59:27, 29.98s/it][A[A

column: 2nd_closest_to_1_atomic_mass - 0.61577




 34%|███▎      | 60/178 [31:24<58:52, 29.94s/it][A[A

column: 2nd_closest_to_1_valence - 0.69077




 34%|███▍      | 61/178 [31:56<59:17, 30.41s/it][A[A

column: 2nd_closest_to_1_spin_multiplicity - 0.61465




 35%|███▍      | 62/178 [32:25<58:15, 30.14s/it][A[A

column: 3rd_closest_to_1_atomic_mass - 0.84223




 35%|███▌      | 63/178 [32:55<57:36, 30.06s/it][A[A

column: 3rd_closest_to_1_valence - 0.67584




 36%|███▌      | 64/178 [33:26<57:55, 30.49s/it][A[A

column: 3rd_closest_to_1_spin_multiplicity - 0.61635




 37%|███▋      | 65/178 [33:58<57:45, 30.67s/it][A[A

column: 4th_closest_to_1_atomic_mass - 0.93789




 37%|███▋      | 66/178 [34:30<58:14, 31.20s/it][A[A

column: 4th_closest_to_1_valence - 0.63747




 38%|███▊      | 67/178 [35:02<58:09, 31.43s/it][A[A

column: 4th_closest_to_1_spin_multiplicity - 0.61386




 38%|███▊      | 68/178 [35:34<57:57, 31.61s/it][A[A

column: 5th_closest_to_1_atomic_mass - 0.64639




 39%|███▉      | 69/178 [36:07<58:13, 32.05s/it][A[A

column: 5th_closest_to_1_valence - 0.68312




 39%|███▉      | 70/178 [36:38<57:16, 31.82s/it][A[A

column: 5th_closest_to_1_spin_multiplicity - 0.61421




 40%|███▉      | 71/178 [37:12<57:47, 32.41s/it][A[A

column: 6th_closest_to_1_atomic_mass - 0.64061




 40%|████      | 72/178 [37:45<57:39, 32.63s/it][A[A

column: 6th_closest_to_1_valence - 0.63212




 41%|████      | 73/178 [38:16<56:11, 32.11s/it][A[A

column: 6th_closest_to_1_spin_multiplicity - 0.6137




 42%|████▏     | 74/178 [38:50<56:19, 32.50s/it][A[A

column: 7th_closest_to_1_atomic_mass - 0.63553




 42%|████▏     | 75/178 [39:20<54:59, 32.03s/it][A[A

column: 7th_closest_to_1_valence - 0.63683




 43%|████▎     | 76/178 [39:51<53:49, 31.66s/it][A[A

column: 7th_closest_to_1_spin_multiplicity - 0.61374




 43%|████▎     | 77/178 [40:21<52:21, 31.11s/it][A[A

column: 8th_closest_to_1_atomic_mass - 0.62512




 44%|████▍     | 78/178 [40:53<52:00, 31.21s/it][A[A

column: 8th_closest_to_1_valence - 0.64077




 44%|████▍     | 79/178 [41:25<52:00, 31.52s/it][A[A

column: 8th_closest_to_1_spin_multiplicity - 0.61356




 45%|████▍     | 80/178 [41:54<50:14, 30.76s/it][A[A

column: 9th_closest_to_1_atomic_mass - 0.61882




 46%|████▌     | 81/178 [42:25<50:09, 31.02s/it][A[A

column: 9th_closest_to_1_valence - 0.62176




 46%|████▌     | 82/178 [42:56<49:27, 30.91s/it][A[A

column: 9th_closest_to_1_spin_multiplicity - 0.61354




 47%|████▋     | 83/178 [43:26<48:21, 30.54s/it][A[A

column: 10th_closest_to_1_atomic_mass - 0.61496




 47%|████▋     | 84/178 [43:57<48:17, 30.82s/it][A[A

column: 10th_closest_to_1_valence - 0.62028




 48%|████▊     | 85/178 [44:29<48:07, 31.05s/it][A[A

column: 10th_closest_to_1_spin_multiplicity - 0.61442




 48%|████▊     | 86/178 [45:01<47:58, 31.28s/it][A[A

column: tor_ang_2leftleft_mean - 0.67841




 49%|████▉     | 87/178 [45:33<48:04, 31.70s/it][A[A

column: tor_ang_2leftleft_min - 0.84259




 49%|████▉     | 88/178 [46:05<47:34, 31.72s/it][A[A

column: tor_ang_2leftleft_max - 0.8431




 50%|█████     | 89/178 [46:39<47:52, 32.28s/it][A[A

column: tor_ang_2leftleft_count - 1.6701




 51%|█████     | 90/178 [47:12<47:55, 32.67s/it][A[A

column: mol_wt - 0.63409




 51%|█████     | 91/178 [47:44<46:48, 32.28s/it][A[A

column: num_atoms - 0.61527




 52%|█████▏    | 92/178 [48:16<46:28, 32.43s/it][A[A

column: num_bonds - 0.72047




 52%|█████▏    | 93/178 [48:48<45:42, 32.27s/it][A[A

column: closest_to_0_dist_x_atomic_mass - 4.3528




 53%|█████▎    | 94/178 [49:23<46:00, 32.86s/it][A[A

column: 2nd_closest_to_0_dist_x_atomic_mass - 1.8096




 53%|█████▎    | 95/178 [49:55<45:13, 32.69s/it][A[A

column: 3rd_closest_to_0_dist_x_atomic_mass - 1.1726




 54%|█████▍    | 96/178 [50:29<45:10, 33.05s/it][A[A

column: 4th_closest_to_0_dist_x_atomic_mass - 0.81774




 54%|█████▍    | 97/178 [51:01<44:17, 32.80s/it][A[A

column: 5th_closest_to_0_dist_x_atomic_mass - 0.93882




 55%|█████▌    | 98/178 [51:32<43:14, 32.43s/it][A[A

column: 6th_closest_to_0_dist_x_atomic_mass - 0.76957




 56%|█████▌    | 99/178 [52:06<42:56, 32.61s/it][A[A

column: 7th_closest_to_0_dist_x_atomic_mass - 0.77507




 56%|█████▌    | 100/178 [52:36<41:29, 31.92s/it][A[A

column: 8th_closest_to_0_dist_x_atomic_mass - 0.71192




 57%|█████▋    | 101/178 [53:09<41:20, 32.21s/it][A[A

column: 9th_closest_to_0_dist_x_atomic_mass - 0.66743




 57%|█████▋    | 102/178 [53:40<40:20, 31.86s/it][A[A

column: 10th_closest_to_0_dist_x_atomic_mass - 0.64388




 58%|█████▊    | 103/178 [54:12<40:01, 32.02s/it][A[A

column: closest_to_1_dist_x_atomic_mass - 0.95639




 58%|█████▊    | 104/178 [54:42<38:48, 31.47s/it][A[A

column: 2nd_closest_to_1_dist_x_atomic_mass - 0.77687




 59%|█████▉    | 105/178 [55:15<38:50, 31.93s/it][A[A

column: 3rd_closest_to_1_dist_x_atomic_mass - 0.88634




 60%|█████▉    | 106/178 [55:46<37:43, 31.44s/it][A[A

column: 4th_closest_to_1_dist_x_atomic_mass - 0.95405




 60%|██████    | 107/178 [56:19<37:52, 32.01s/it][A[A

column: 5th_closest_to_1_dist_x_atomic_mass - 0.83194




 61%|██████    | 108/178 [56:49<36:48, 31.55s/it][A[A

column: 6th_closest_to_1_dist_x_atomic_mass - 0.7574




 61%|██████    | 109/178 [57:23<36:54, 32.09s/it][A[A

column: 7th_closest_to_1_dist_x_atomic_mass - 0.73054




 62%|██████▏   | 110/178 [57:53<35:51, 31.65s/it][A[A

column: 8th_closest_to_1_dist_x_atomic_mass - 0.70294




 62%|██████▏   | 111/178 [58:28<36:09, 32.39s/it][A[A

column: 9th_closest_to_1_dist_x_atomic_mass - 0.69386




 63%|██████▎   | 112/178 [58:58<35:05, 31.90s/it][A[A

column: 10th_closest_to_1_dist_x_atomic_mass - 0.66131




 63%|██████▎   | 113/178 [59:31<34:46, 32.10s/it][A[A

column: angle_clos_0_2nd - 0.61424




 64%|██████▍   | 114/178 [1:00:02<33:56, 31.82s/it][A[A

column: angle_clos_1_2nd - 0.61448




 65%|██████▍   | 115/178 [1:00:35<33:41, 32.09s/it][A[A

column: N1 - 1.1306




 65%|██████▌   | 116/178 [1:01:10<34:17, 33.18s/it][A[A

column: N2 - 0.66542




 66%|██████▌   | 117/178 [1:01:44<33:51, 33.30s/it][A[A

column: link0 - 0.6457




 66%|██████▋   | 118/178 [1:02:16<32:55, 32.93s/it][A[A

column: link1 - 0.6308




 67%|██████▋   | 119/178 [1:02:47<31:44, 32.29s/it][A[A

column: linkN - 0.63699




 67%|██████▋   | 120/178 [1:03:21<31:36, 32.69s/it][A[A

column: dist_xyz - 1.4731




 68%|██████▊   | 121/178 [1:03:52<30:35, 32.20s/it][A[A

column: inv_dist0 - 0.6535




 69%|██████▊   | 122/178 [1:04:26<30:38, 32.82s/it][A[A

column: inv_dist1 - 1.5953




 69%|██████▉   | 123/178 [1:04:58<29:58, 32.70s/it][A[A

column: inv_distP - 0.83181




 70%|██████▉   | 124/178 [1:05:29<28:56, 32.16s/it][A[A

column: inv_dist0R - 2.1328




 70%|███████   | 125/178 [1:06:03<28:53, 32.70s/it][A[A

column: inv_dist1R - 1.8092




 71%|███████   | 126/178 [1:06:35<28:09, 32.48s/it][A[A

column: inv_distPR - 1.449




 71%|███████▏  | 127/178 [1:07:08<27:46, 32.68s/it][A[A

column: inv_dist0E - 0.63093




 72%|███████▏  | 128/178 [1:07:39<26:41, 32.04s/it][A[A

column: inv_dist1E - 0.956




 72%|███████▏  | 129/178 [1:08:12<26:33, 32.52s/it][A[A

column: inv_distPE - 0.87678




 73%|███████▎  | 130/178 [1:08:44<25:51, 32.31s/it][A[A

column: linkM0 - 0.68619




 74%|███████▎  | 131/178 [1:09:16<25:14, 32.23s/it][A[A

column: linkM1 - 0.67559




 74%|███████▍  | 132/178 [1:09:49<24:50, 32.41s/it][A[A

column: min_molecule_atom_0_dist_xyz - 0.65722




 75%|███████▍  | 133/178 [1:10:22<24:30, 32.67s/it][A[A

column: mean_molecule_atom_0_dist_xyz - 0.65019




 75%|███████▌  | 134/178 [1:10:54<23:48, 32.47s/it][A[A

column: max_molecule_atom_0_dist_xyz - 1.1011




 76%|███████▌  | 135/178 [1:11:28<23:27, 32.73s/it][A[A

column: sd_molecule_atom_0_dist_xyz - 0.63801




 76%|███████▋  | 136/178 [1:11:59<22:39, 32.37s/it][A[A

column: min_molecule_atom_1_dist_xyz - 0.69981




 77%|███████▋  | 137/178 [1:12:29<21:41, 31.73s/it][A[A

column: mean_molecule_atom_1_dist_xyz - 0.67496




 78%|███████▊  | 138/178 [1:12:59<20:47, 31.20s/it][A[A

column: max_molecule_atom_1_dist_xyz - 0.69864




 78%|███████▊  | 139/178 [1:13:30<20:12, 31.10s/it][A[A

column: sd_molecule_atom_1_dist_xyz - 0.69622




 79%|███████▊  | 140/178 [1:14:02<19:49, 31.31s/it][A[A

column: coulomb_C.x - 0.73241




 79%|███████▉  | 141/178 [1:14:31<18:54, 30.66s/it][A[A

column: coulomb_F.x - 0.6134




 80%|███████▉  | 142/178 [1:15:04<18:42, 31.19s/it][A[A

column: coulomb_H.x - 0.71785




 80%|████████  | 143/178 [1:15:36<18:19, 31.43s/it][A[A

column: coulomb_N.x - 0.75563




 81%|████████  | 144/178 [1:16:07<17:49, 31.46s/it][A[A

column: coulomb_O.x - 0.77561




 81%|████████▏ | 145/178 [1:16:39<17:17, 31.43s/it][A[A

column: yukawa_C.x - 0.78017




 82%|████████▏ | 146/178 [1:17:10<16:49, 31.54s/it][A[A

column: yukawa_F.x - 0.61354




 83%|████████▎ | 147/178 [1:17:44<16:37, 32.19s/it][A[A

column: yukawa_H.x - 1.4876




 83%|████████▎ | 148/178 [1:18:16<16:00, 32.00s/it][A[A

column: yukawa_N.x - 1.0049




 84%|████████▎ | 149/178 [1:18:47<15:26, 31.95s/it][A[A

column: yukawa_O.x - 1.6764




 84%|████████▍ | 150/178 [1:19:20<14:55, 31.99s/it][A[A

column: coulomb_C.y - 0.8469




 85%|████████▍ | 151/178 [1:19:50<14:14, 31.66s/it][A[A

column: coulomb_F.y - 0.61377




 85%|████████▌ | 152/178 [1:20:22<13:39, 31.52s/it][A[A

column: coulomb_H.y - 0.7624




 86%|████████▌ | 153/178 [1:20:53<13:08, 31.55s/it][A[A

column: coulomb_N.y - 0.73516




 87%|████████▋ | 154/178 [1:21:25<12:42, 31.75s/it][A[A

column: coulomb_O.y - 0.8161




 87%|████████▋ | 155/178 [1:21:58<12:14, 31.93s/it][A[A

column: yukawa_C.y - 1.2065




 88%|████████▊ | 156/178 [1:22:29<11:34, 31.56s/it][A[A

column: yukawa_F.y - 0.61515




 88%|████████▊ | 157/178 [1:23:02<11:14, 32.11s/it][A[A

column: yukawa_H.y - 1.5787




 89%|████████▉ | 158/178 [1:23:32<10:28, 31.40s/it][A[A

column: yukawa_N.y - 1.1907




 89%|████████▉ | 159/178 [1:24:05<10:09, 32.08s/it][A[A

column: yukawa_O.y - 1.848




 90%|████████▉ | 160/178 [1:24:36<09:28, 31.57s/it][A[A

column: distC0 - 0.68654




 90%|█████████ | 161/178 [1:25:06<08:51, 31.28s/it][A[A

column: distC1 - 0.67602




 91%|█████████ | 162/178 [1:25:39<08:27, 31.70s/it][A[A

column: adH1 - 0.65134




 92%|█████████▏| 163/178 [1:26:12<07:59, 31.98s/it][A[A

column: adH2 - 0.6464




 92%|█████████▏| 164/178 [1:26:42<07:22, 31.59s/it][A[A

column: adH3 - 0.62371




 93%|█████████▎| 165/178 [1:27:14<06:49, 31.48s/it][A[A

column: adH4 - 0.65888




 93%|█████████▎| 166/178 [1:27:45<06:16, 31.39s/it][A[A

column: adC1 - 1.0725




 94%|█████████▍| 167/178 [1:28:17<05:46, 31.53s/it][A[A

column: adC2 - 1.0949




 94%|█████████▍| 168/178 [1:28:48<05:14, 31.44s/it][A[A

column: adC3 - 0.81226




 95%|█████████▍| 169/178 [1:29:19<04:42, 31.38s/it][A[A

column: adC4 - 0.71646




 96%|█████████▌| 170/178 [1:29:49<04:08, 31.04s/it][A[A

column: adN1 - 0.66122




 96%|█████████▌| 171/178 [1:30:19<03:35, 30.78s/it][A[A

column: adN2 - 0.62998




 97%|█████████▋| 172/178 [1:30:51<03:05, 30.88s/it][A[A

column: adN3 - 0.61459




 97%|█████████▋| 173/178 [1:31:21<02:34, 30.85s/it][A[A

column: adN4 - 0.61336




 98%|█████████▊| 174/178 [1:31:54<02:05, 31.37s/it][A[A

column: NC - 0.61839




 98%|█████████▊| 175/178 [1:32:27<01:35, 31.82s/it][A[A

column: NH - 0.64783




 99%|█████████▉| 176/178 [1:32:57<01:02, 31.35s/it][A[A

column: NN - 0.61412




 99%|█████████▉| 177/178 [1:33:30<00:31, 31.71s/it][A[A

column: NF - 0.61332




100%|██████████| 178/178 [1:34:00<00:00, 31.37s/it][A[A

column: NO - 0.61668


In [52]:
bad_features

['atom1_spin_multiplicity',
 'is_bond',
 'closest_to_0_atomic_mass',
 '2nd_closest_to_0_spin_multiplicity',
 '3rd_closest_to_0_spin_multiplicity',
 '4th_closest_to_0_spin_multiplicity',
 '5th_closest_to_0_spin_multiplicity',
 '6th_closest_to_0_spin_multiplicity',
 '7th_closest_to_0_spin_multiplicity',
 '8th_closest_to_0_spin_multiplicity',
 '9th_closest_to_0_spin_multiplicity',
 '10th_closest_to_0_atomic_mass',
 '10th_closest_to_0_spin_multiplicity',
 'closest_to_1_atomic_mass',
 'closest_to_1_valence',
 'closest_to_1_spin_multiplicity',
 '2nd_closest_to_1_atomic_mass',
 '2nd_closest_to_1_spin_multiplicity',
 '3rd_closest_to_1_spin_multiplicity',
 '4th_closest_to_1_spin_multiplicity',
 '5th_closest_to_1_spin_multiplicity',
 '6th_closest_to_1_spin_multiplicity',
 '7th_closest_to_1_spin_multiplicity',
 '8th_closest_to_1_spin_multiplicity',
 '9th_closest_to_1_spin_multiplicity',
 '10th_closest_to_1_atomic_mass',
 '10th_closest_to_1_spin_multiplicity',
 'num_atoms',
 'angle_clos_0_2nd',
 '

# 3JHC

In [57]:
folds = GroupKFold(n_splits=N_FOLDS)
bond_type = '3JHC'
N_FOLDS = 2
df = pd.read_parquet('../data/FE014/FE014-train-{}.parquet'.format(bond_type))
X = df[model.feature_names_]
y = df['scalar_coupling_constant']
mol_group_type = mol_group.loc[mol_group['type'] == bond_type]['molecule_name']

for fold_n, (train_idx, valid_idx) in enumerate(folds.split(X, groups=mol_group_type)):
    X_train, X_valid = X.iloc[train_idx], X.iloc[valid_idx]
    y_train, y_valid = y.iloc[train_idx], y.iloc[valid_idx]
    X_valid = X_valid.copy()
    model = catboost.CatBoostRegressor()
    model.load_model('../models/M036/M036-0705_2317-{}-{}.model'.format(bond_type, fold_n + 1))
    results, bad_features = permutation_importance(model, X_valid, y_valid, metric=mean_absolute_error)
    break




  0%|          | 0/178 [00:00<?, ?it/s][A[A[A

Base score 0.37588





  1%|          | 1/178 [00:56<2:47:24, 56.75s/it][A[A[A

column: atom1_valence - 0.40058


KeyboardInterrupt: 