In [1]:
%matplotlib inline
import os
import ipyplot
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import KFold
from models import isoefp_net
from training import train_model
from evaluation import make_dist
from data_processing import remove_excess_iso, shuffle_samples, \
    make_llp_boolean_labels, make_llp_iso_data_loaders, \
    iso_standard_scaler, select_cone_set, select_single_cone

In [2]:
# Assign parameters
seed = 1
num_epochs = 300
batch_size = 128
learning_rate = 1e-6
patience = 15
min_change = 5e-3
num_cones = 18
num_folds = 5
num_thresh = 100
data_dir = '../data'
train_dir = 'train'
test_dir = 'test'
image_folder = f'./images/iso_const_denom_{num_thresh}_unique_thresh_{num_folds}_fold'
log_folder = f'./model_logs/iso_const_denom_{num_thresh}_unique_thresh_{num_folds}_fold'

file_mu_label = 'mu1'
train_pos_prop_label = '95'
train_neg_prop_label = '20'
train_prop_label = f'{train_neg_prop_label}{train_pos_prop_label}'
test_pos_prop_label = '95'
test_neg_prop_label = '20'
test_prop_label = f'{test_neg_prop_label}{test_pos_prop_label}'

In [3]:
# Load data
x = np.load(f'{data_dir}/{train_dir}/{file_mu_label}_{train_prop_label}_isoset.npy')
f = np.load(f'{data_dir}/{train_dir}/{file_mu_label}_{train_prop_label}_labels.npy')
x_test = np.load(f'{data_dir}/{test_dir}/{file_mu_label}_{test_prop_label}_isoset.npy')
f_test = np.load(f'{data_dir}/{test_dir}/{file_mu_label}_{test_prop_label}_labels.npy')
m_test = np.load(f'{data_dir}/{test_dir}/{test_prop_label}_M_set.npy')

In [4]:
# Some empty events end up being loaded as they aren't empty for
# other muon in event and remain in dataset, get rid of these
nonzeros = np.sum(x,axis=1) > 0
nonzeros_test = np.sum(x_test,axis=1) > 0
x = x[nonzeros]
f = f[nonzeros]
x_test = x_test[nonzeros_test]
f_test = f_test[nonzeros_test]
m_test = m_test[nonzeros_test]

In [5]:
# Choose samples such that an integer number of batches can be made,
# ensures that each batch has uniform labels 
x, f = remove_excess_iso(x, f, batch_size, num_folds, num_cones, seed)
x, f, _ = shuffle_samples(x, f, batch_size, seed)

Labels: [0.2  0.95], Initial Respective Counts [88647 59035]
Labels: [0.2  0.95], Final Respective Counts [88320 58880]


In [6]:
# Create boolean labels from proportion labels (e.g. 0.2, 0.95 -> 0, 1)
y = make_llp_boolean_labels(f)
y_test = make_llp_boolean_labels(f_test)

In [7]:
# Make KFolds
kf = KFold(n_splits=num_folds, shuffle=False)

# Training

Train models for: 1 cone (largest) and 18 cones (all cones)

Keep FPR, TPR denominator constant during AUC calculation, use ~100 unique outputs as thresholds

In [8]:
prefixes = []
cone_set_aucs = []
cone_set_sigs = []
for cone_count in [1,18]:
    fold_aucs = []
    for i, (train_index, valid_index) in enumerate(kf.split(x)):
        print(f'Fold {i+1} / {num_folds}, {cone_count} cones')
        
        train_dict = {'x': select_cone_set(x[train_index],cone_count), 
                      'f': f[train_index], 'y': y[train_index]}
        valid_dict = {'x': select_cone_set(x[valid_index],cone_count), 
                      'f': f[valid_index], 'y': y[valid_index]}
        test_dict = {'x': select_cone_set(x_test,cone_count),
                     'f': f_test, 'y': y_test, 'm': m_test}
        
        data_loaders = make_llp_iso_data_loaders(
            *iso_standard_scaler(train_dict, valid_dict, test_dict), 
            batch_size=batch_size, seed=seed)
        
        del train_dict, valid_dict, test_dict
        
        model = isoefp_net(cone_count,0)
        
        file_prefix = f'iso_const_denom_{num_thresh}_unique_thresh_{cone_count}_cones_fold_{i}'
        image_prefix = f'{image_folder}/{file_prefix}'
        output_prefix = f'{log_folder}/{file_prefix}'
        
        model, log = train_model(model, data_loaders, llp=True, 
                num_epochs=num_epochs, learning_rate=learning_rate, seed=seed,
                reproducible=True, early_stopping=True, min_change=min_change,
                patience=patience, image_prefix=image_prefix, 
                output_prefix=output_prefix, auc_num_thresh=num_thresh)
        
        del data_loaders
        
        fold_aucs.append(log['test']['llp roc']['auc'])

    print(f'Average AUC: {np.mean(fold_aucs)} +- {np.std(fold_aucs)}')
    cone_set_aucs.append(np.mean(fold_aucs))
    cone_set_sigs.append(np.std(fold_aucs))

Fold 1 / 5, 1 cones
Using GPU
Epoch 1/300, Step 920/920, Loss: -0.029
train_loss: -0.004, val_loss: -0.008, val_auc: 0.771

Epoch 2/300, Step 920/920, Loss: -0.043
train_loss: -0.013, val_loss: -0.020, val_auc: 0.771

Epoch 3/300, Step 920/920, Loss: -0.065
train_loss: -0.028, val_loss: -0.037, val_auc: 0.771

Epoch 4/300, Step 920/920, Loss: -0.091
train_loss: -0.047, val_loss: -0.058, val_auc: 0.771

Epoch 5/300, Step 920/920, Loss: -0.114
train_loss: -0.069, val_loss: -0.080, val_auc: 0.771

Epoch 6/300, Step 920/920, Loss: -0.132
train_loss: -0.089, val_loss: -0.098, val_auc: 0.771

Epoch 7/300, Step 920/920, Loss: -0.144
train_loss: -0.106, val_loss: -0.114, val_auc: 0.771

Epoch 8/300, Step 920/920, Loss: -0.150
train_loss: -0.121, val_loss: -0.128, val_auc: 0.771

Epoch 9/300, Step 920/920, Loss: -0.151
train_loss: -0.133, val_loss: -0.140, val_auc: 0.771

Epoch 10/300, Step 920/920, Loss: -0.147
train_loss: -0.145, val_loss: -0.150, val_auc: 0.771

Epoch 11/300, Step 920/920, L



LLP AUC: 0.8481654490993485


  roc_dict[key] = np.array(roc_dict[key])


Fold 2 / 5, 1 cones
Using GPU
Epoch 1/300, Step 920/920, Loss: -0.005
train_loss: -0.003, val_loss: -0.006, val_auc: 0.769

Epoch 2/300, Step 920/920, Loss: -0.015
train_loss: -0.011, val_loss: -0.016, val_auc: 0.769

Epoch 3/300, Step 920/920, Loss: -0.034
train_loss: -0.024, val_loss: -0.033, val_auc: 0.769

Epoch 4/300, Step 920/920, Loss: -0.059
train_loss: -0.043, val_loss: -0.055, val_auc: 0.769

Epoch 5/300, Step 920/920, Loss: -0.082
train_loss: -0.065, val_loss: -0.078, val_auc: 0.769

Epoch 6/300, Step 920/920, Loss: -0.100
train_loss: -0.087, val_loss: -0.100, val_auc: 0.769

Epoch 7/300, Step 920/920, Loss: -0.111
train_loss: -0.108, val_loss: -0.119, val_auc: 0.769

Epoch 8/300, Step 920/920, Loss: -0.116
train_loss: -0.125, val_loss: -0.135, val_auc: 0.769

Epoch 9/300, Step 920/920, Loss: -0.115
train_loss: -0.141, val_loss: -0.150, val_auc: 0.769

Epoch 10/300, Step 920/920, Loss: -0.108
train_loss: -0.156, val_loss: -0.163, val_auc: 0.769

Epoch 11/300, Step 920/920, L



LLP AUC: 0.8537359251361372


  roc_dict[key] = np.array(roc_dict[key])


Fold 3 / 5, 1 cones
Using GPU
Epoch 1/300, Step 920/920, Loss: -0.018
train_loss: -0.003, val_loss: -0.004, val_auc: 0.773

Epoch 2/300, Step 920/920, Loss: -0.027
train_loss: -0.011, val_loss: -0.013, val_auc: 0.773

Epoch 3/300, Step 920/920, Loss: -0.046
train_loss: -0.023, val_loss: -0.027, val_auc: 0.773

Epoch 4/300, Step 920/920, Loss: -0.070
train_loss: -0.042, val_loss: -0.046, val_auc: 0.773

Epoch 5/300, Step 920/920, Loss: -0.094
train_loss: -0.064, val_loss: -0.067, val_auc: 0.773

Epoch 6/300, Step 920/920, Loss: -0.112
train_loss: -0.086, val_loss: -0.087, val_auc: 0.773

Epoch 7/300, Step 920/920, Loss: -0.123
train_loss: -0.107, val_loss: -0.106, val_auc: 0.773

Epoch 8/300, Step 920/920, Loss: -0.129
train_loss: -0.124, val_loss: -0.122, val_auc: 0.773

Epoch 9/300, Step 920/920, Loss: -0.130
train_loss: -0.139, val_loss: -0.138, val_auc: 0.773

Epoch 10/300, Step 920/920, Loss: -0.126
train_loss: -0.153, val_loss: -0.153, val_auc: 0.773

Epoch 11/300, Step 920/920, L



LLP AUC: 0.8523628358097738


  roc_dict[key] = np.array(roc_dict[key])


Fold 4 / 5, 1 cones
Using GPU
Epoch 1/300, Step 920/920, Loss: -0.006
train_loss: -0.003, val_loss: -0.006, val_auc: 0.772

Epoch 2/300, Step 920/920, Loss: -0.016
train_loss: -0.011, val_loss: -0.016, val_auc: 0.772

Epoch 3/300, Step 920/920, Loss: -0.036
train_loss: -0.024, val_loss: -0.033, val_auc: 0.772

Epoch 4/300, Step 920/920, Loss: -0.061
train_loss: -0.043, val_loss: -0.054, val_auc: 0.772

Epoch 5/300, Step 920/920, Loss: -0.086
train_loss: -0.065, val_loss: -0.077, val_auc: 0.772

Epoch 6/300, Step 920/920, Loss: -0.105
train_loss: -0.087, val_loss: -0.098, val_auc: 0.772

Epoch 7/300, Step 920/920, Loss: -0.118
train_loss: -0.107, val_loss: -0.116, val_auc: 0.772

Epoch 8/300, Step 920/920, Loss: -0.124
train_loss: -0.124, val_loss: -0.132, val_auc: 0.772

Epoch 9/300, Step 920/920, Loss: -0.125
train_loss: -0.139, val_loss: -0.146, val_auc: 0.772

Epoch 10/300, Step 920/920, Loss: -0.120
train_loss: -0.152, val_loss: -0.159, val_auc: 0.772

Epoch 11/300, Step 920/920, L



LLP AUC: 0.8525313114140477


  roc_dict[key] = np.array(roc_dict[key])


Fold 5 / 5, 1 cones
Using GPU
Epoch 1/300, Step 920/920, Loss: 0.0116
train_loss: -0.003, val_loss: -0.007, val_auc: 0.773

Epoch 2/300, Step 920/920, Loss: 0.0011
train_loss: -0.010, val_loss: -0.017, val_auc: 0.773

Epoch 3/300, Step 920/920, Loss: -0.011
train_loss: -0.022, val_loss: -0.032, val_auc: 0.773

Epoch 4/300, Step 920/920, Loss: -0.026
train_loss: -0.040, val_loss: -0.054, val_auc: 0.773

Epoch 5/300, Step 920/920, Loss: -0.044
train_loss: -0.063, val_loss: -0.077, val_auc: 0.773

Epoch 6/300, Step 920/920, Loss: -0.065
train_loss: -0.086, val_loss: -0.100, val_auc: 0.773

Epoch 7/300, Step 920/920, Loss: -0.093
train_loss: -0.108, val_loss: -0.121, val_auc: 0.773

Epoch 8/300, Step 920/920, Loss: -0.126
train_loss: -0.128, val_loss: -0.139, val_auc: 0.773

Epoch 9/300, Step 920/920, Loss: -0.165
train_loss: -0.146, val_loss: -0.155, val_auc: 0.773

Epoch 10/300, Step 920/920, Loss: -0.210
train_loss: -0.162, val_loss: -0.170, val_auc: 0.773

Epoch 11/300, Step 920/920, L



threshold 102/103

  roc_dict[key] = np.array(roc_dict[key])


LLP AUC: 0.8484801257159142
Average AUC: 0.8510551294350442 +- 0.0022828390825759422
Fold 1 / 5, 18 cones
Using GPU
Epoch 1/300, Step 920/920, Loss: 0.0022
train_loss: -0.004, val_loss: -0.007, val_auc: 0.772

Epoch 2/300, Step 920/920, Loss: -0.012
train_loss: -0.012, val_loss: -0.018, val_auc: 0.769

Epoch 3/300, Step 920/920, Loss: -0.036
train_loss: -0.026, val_loss: -0.035, val_auc: 0.769

Epoch 4/300, Step 920/920, Loss: -0.063
train_loss: -0.045, val_loss: -0.055, val_auc: 0.768

Epoch 5/300, Step 920/920, Loss: -0.086
train_loss: -0.065, val_loss: -0.075, val_auc: 0.768

Epoch 6/300, Step 920/920, Loss: -0.101
train_loss: -0.083, val_loss: -0.092, val_auc: 0.768

Epoch 7/300, Step 920/920, Loss: -0.111
train_loss: -0.098, val_loss: -0.105, val_auc: 0.768

Epoch 8/300, Step 920/920, Loss: -0.116
train_loss: -0.110, val_loss: -0.117, val_auc: 0.768

Epoch 9/300, Step 920/920, Loss: -0.118
train_loss: -0.121, val_loss: -0.126, val_auc: 0.768

Epoch 10/300, Step 920/920, Loss: -0.1

  roc_dict[key] = np.array(roc_dict[key])


Fold 2 / 5, 18 cones
Using GPU
Epoch 1/300, Step 920/920, Loss: 0.0058
train_loss: -0.003, val_loss: -0.004, val_auc: 0.765

Epoch 2/300, Step 920/920, Loss: -0.005
train_loss: -0.009, val_loss: -0.013, val_auc: 0.766

Epoch 3/300, Step 920/920, Loss: -0.025
train_loss: -0.021, val_loss: -0.028, val_auc: 0.766

Epoch 4/300, Step 920/920, Loss: -0.049
train_loss: -0.038, val_loss: -0.047, val_auc: 0.766

Epoch 5/300, Step 920/920, Loss: -0.070
train_loss: -0.057, val_loss: -0.067, val_auc: 0.765

Epoch 6/300, Step 920/920, Loss: -0.085
train_loss: -0.076, val_loss: -0.085, val_auc: 0.765

Epoch 7/300, Step 920/920, Loss: -0.093
train_loss: -0.092, val_loss: -0.100, val_auc: 0.765

Epoch 8/300, Step 920/920, Loss: -0.097
train_loss: -0.106, val_loss: -0.113, val_auc: 0.765

Epoch 9/300, Step 920/920, Loss: -0.095
train_loss: -0.119, val_loss: -0.124, val_auc: 0.765

Epoch 10/300, Step 920/920, Loss: -0.088
train_loss: -0.131, val_loss: -0.134, val_auc: 0.765

Epoch 11/300, Step 920/920, 

  roc_dict[key] = np.array(roc_dict[key])


Fold 3 / 5, 18 cones
Using GPU
Epoch 1/300, Step 920/920, Loss: -0.008
train_loss: -0.002, val_loss: -0.005, val_auc: 0.771

Epoch 2/300, Step 920/920, Loss: -0.022
train_loss: -0.010, val_loss: -0.014, val_auc: 0.771

Epoch 3/300, Step 920/920, Loss: -0.047
train_loss: -0.024, val_loss: -0.029, val_auc: 0.771

Epoch 4/300, Step 920/920, Loss: -0.075
train_loss: -0.043, val_loss: -0.047, val_auc: 0.770

Epoch 5/300, Step 920/920, Loss: -0.098
train_loss: -0.064, val_loss: -0.065, val_auc: 0.770

Epoch 6/300, Step 920/920, Loss: -0.115
train_loss: -0.083, val_loss: -0.081, val_auc: 0.770

Epoch 7/300, Step 920/920, Loss: -0.126
train_loss: -0.099, val_loss: -0.093, val_auc: 0.770

Epoch 8/300, Step 920/920, Loss: -0.132
train_loss: -0.111, val_loss: -0.104, val_auc: 0.770

Epoch 9/300, Step 920/920, Loss: -0.135
train_loss: -0.120, val_loss: -0.112, val_auc: 0.770

Epoch 10/300, Step 920/920, Loss: -0.136
train_loss: -0.128, val_loss: -0.120, val_auc: 0.770

Epoch 11/300, Step 920/920, 

  roc_dict[key] = np.array(roc_dict[key])


Fold 4 / 5, 18 cones
Using GPU
Epoch 1/300, Step 920/920, Loss: 0.0099
train_loss: -0.002, val_loss: -0.004, val_auc: 0.767

Epoch 2/300, Step 920/920, Loss: 0.0002
train_loss: -0.007, val_loss: -0.011, val_auc: 0.770

Epoch 3/300, Step 920/920, Loss: -0.017
train_loss: -0.016, val_loss: -0.023, val_auc: 0.770

Epoch 4/300, Step 920/920, Loss: -0.041
train_loss: -0.031, val_loss: -0.041, val_auc: 0.770

Epoch 5/300, Step 920/920, Loss: -0.064
train_loss: -0.050, val_loss: -0.060, val_auc: 0.770

Epoch 6/300, Step 920/920, Loss: -0.082
train_loss: -0.068, val_loss: -0.078, val_auc: 0.770

Epoch 7/300, Step 920/920, Loss: -0.093
train_loss: -0.085, val_loss: -0.093, val_auc: 0.770

Epoch 8/300, Step 920/920, Loss: -0.100
train_loss: -0.099, val_loss: -0.106, val_auc: 0.770

Epoch 9/300, Step 920/920, Loss: -0.102
train_loss: -0.111, val_loss: -0.118, val_auc: 0.769

Epoch 10/300, Step 920/920, Loss: -0.100
train_loss: -0.122, val_loss: -0.128, val_auc: 0.769

Epoch 11/300, Step 920/920, 

  roc_dict[key] = np.array(roc_dict[key])


LLP AUC: 0.7047776510322961
Fold 5 / 5, 18 cones
Using GPU
Epoch 1/300, Step 920/920, Loss: -0.004
train_loss: -0.002, val_loss: -0.005, val_auc: 0.772

Epoch 2/300, Step 920/920, Loss: -0.007
train_loss: -0.009, val_loss: -0.015, val_auc: 0.771

Epoch 3/300, Step 920/920, Loss: -0.011
train_loss: -0.021, val_loss: -0.031, val_auc: 0.771

Epoch 4/300, Step 920/920, Loss: -0.016
train_loss: -0.039, val_loss: -0.051, val_auc: 0.771

Epoch 5/300, Step 920/920, Loss: -0.022
train_loss: -0.058, val_loss: -0.071, val_auc: 0.770

Epoch 6/300, Step 920/920, Loss: -0.029
train_loss: -0.076, val_loss: -0.089, val_auc: 0.770

Epoch 7/300, Step 920/920, Loss: -0.039
train_loss: -0.091, val_loss: -0.103, val_auc: 0.770

Epoch 8/300, Step 920/920, Loss: -0.052
train_loss: -0.104, val_loss: -0.115, val_auc: 0.770

Epoch 9/300, Step 920/920, Loss: -0.067
train_loss: -0.115, val_loss: -0.125, val_auc: 0.770

Epoch 10/300, Step 920/920, Loss: -0.086
train_loss: -0.125, val_loss: -0.134, val_auc: 0.770



  roc_dict[key] = np.array(roc_dict[key])


Average AUC: 0.7317984107478812 +- 0.03371505379452208


# Performance

### AUC Summary

In [9]:
for auc, sig, num_cones in zip(cone_set_aucs, cone_set_sigs, [1,18]):
    print(f'{num_cones} cones AUC: {auc} +- {sig}')

1 cones AUC: 0.8510551294350442 +- 0.0022828390825759422
18 cones AUC: 0.7317984107478812 +- 0.03371505379452208


### Valid Outputs - Largest Cone

On legends "signal" is majority signal (95% signal), "background" is majority background (20% signal)

In [10]:
valid_output_images = [f'{image_folder}/{x}' for x in os.listdir(image_folder) if 
                  ('valid_output' in x) and ('1_cones' in x)]
ipyplot.plot_images(valid_output_images,img_width=300)

### Valid Outputs - All Cones

In [11]:
valid_output_images = [f'{image_folder}/{x}' for x in os.listdir(image_folder) if 
                  ('valid_output' in x) and ('18_cones' in x)]
ipyplot.plot_images(valid_output_images,img_width=300)

### Test Outputs - Largest Cone

In [12]:
test_output_images = [f'{image_folder}/{x}' for x in os.listdir(image_folder) if 
                  ('test_output' in x) and ('1_cones' in x)]
ipyplot.plot_images(test_output_images,img_width=300)

### Test Outputs - All Cones

In [13]:
test_output_images = [f'{image_folder}/{x}' for x in os.listdir(image_folder) if 
                  ('test_output' in x) and ('18_cones' in x)]
ipyplot.plot_images(test_output_images,img_width=300)

### ROC Curve - Largest Cone

In [14]:
llp_roc_images = [f'{image_folder}/{x}' for x in os.listdir(image_folder) if 
                  ('llp_roc' in x) and ('1_cones' in x)]
ipyplot.plot_images(llp_roc_images,img_width=300)

### ROC Curve - All Cones

In [15]:
llp_roc_images = [f'{image_folder}/{x}' for x in os.listdir(image_folder) if 
                  ('llp_roc' in x) and ('18_cones' in x)]
ipyplot.plot_images(llp_roc_images,img_width=300)

### Loss History - Largest Cone

In [16]:
loss_history_images = [f'{image_folder}/{x}' for x in os.listdir(image_folder) if 
                  ('loss_history' in x) and ('1_cones' in x)]
ipyplot.plot_images(loss_history_images,img_width=300)

### Loss History - All Cones

In [17]:
loss_history_images = [f'{image_folder}/{x}' for x in os.listdir(image_folder) if 
                  ('loss_history' in x) and ('18_cones' in x)]
ipyplot.plot_images(loss_history_images,img_width=300)

### Signal Proportion Above Threshold - Largest Cone

In [18]:
test_frac_images = [f'{image_folder}/{x}' for x in os.listdir(image_folder) if 
                  ('test_sig_frac' in x) and ('1_cones' in x)]
ipyplot.plot_images(test_frac_images,img_width=300)

### Signal Proportion Above Threshold - All Cones

In [19]:
test_frac_sigma_images = [f'{image_folder}/{x}' for x in os.listdir(image_folder) if 
                  ('test_frac_sigma' in x) and ('1_cones' in x)]
ipyplot.plot_images(test_frac_sigma_images,img_width=300)

### Signal Proportion Std. Dev. - Largest Cone

In [20]:
test_frac_images = [f'{image_folder}/{x}' for x in os.listdir(image_folder) if 
                  ('test_sig_frac' in x) and ('18_cones' in x)]
ipyplot.plot_images(test_frac_images,img_width=300)

### Signal Proportion Std. Dev. - All Cones

In [21]:
test_frac_sigma_images = [f'{image_folder}/{x}' for x in os.listdir(image_folder) if 
                  ('test_frac_sigma' in x) and ('18_cones' in x)]
ipyplot.plot_images(test_frac_sigma_images,img_width=300)

# Debugging

### Check data shapes

In [22]:
print(x.shape)
print(select_cone_set(x,18).shape)
print(select_cone_set(x,1).shape)
print(select_cone_set(x[train_index],18).shape)
print(select_cone_set(x[train_index],1).shape)
print(select_cone_set(x[valid_index],18).shape)
print(select_cone_set(x[valid_index],1).shape)

(147200, 18)
(147200, 18)
(147200, 1)
(117760, 18)
(117760, 1)
(29440, 18)
(29440, 1)


### Check preprocessing - 18 Cones

In [23]:
debug_train = {'x': select_cone_set(x[train_index],18), 
                      'f': f[train_index], 'y': y[train_index]}
debug_valid = {'x': select_cone_set(x[valid_index],18), 
                      'f': f[valid_index], 'y': y[valid_index]}
debug_test = {'x': select_cone_set(x_test,18),
                     'f': f_test, 'y': y_test, 'm': m_test}

print('Before preprocessing:')
print(debug_train['x'])
print('')
manual_x = (debug_train['x'] - np.mean(debug_train['x'], axis=0)) / np.std(debug_train['x'], axis=0)
print('After manual preprocessing:')
print(manual_x)
print('')
debug_train, debug_valid, debug_test = iso_standard_scaler(debug_train, debug_valid, debug_test)
print('After scikit preprocessing:')
print(debug_train['x'])
print('')
print('All elements match between methods?')
print(np.all(manual_x == debug_train['x']))

Before preprocessing:
[[1.03881079e+00 1.03881079e+00 1.03881079e+00 ... 1.62143552e-01
  1.52247402e-01 0.00000000e+00]
 [1.29244496e+00 1.26920478e+00 1.22570187e+00 ... 3.75824974e-01
  3.05820014e-01 4.00359768e-02]
 [4.02241225e+00 4.00705416e+00 3.93862231e+00 ... 2.54706716e+00
  2.35164959e+00 1.32495951e+00]
 ...
 [7.58893694e-02 6.97016814e-02 6.55898405e-02 ... 0.00000000e+00
  0.00000000e+00 0.00000000e+00]
 [1.84335222e-01 1.64815126e-01 1.55270645e-01 ... 0.00000000e+00
  0.00000000e+00 0.00000000e+00]
 [7.31885704e-02 5.62381267e-02 5.62381267e-02 ... 3.66512005e-03
  0.00000000e+00 0.00000000e+00]]

After manual preprocessing:
[[ 0.82884319  0.87958961  0.92987845 ...  0.13266261  0.29042151
  -0.16675095]
 [ 1.20330626  1.22524017  1.21471529 ...  0.71883432  0.83612329
   0.0893759 ]
 [ 5.23380401  5.33272223  5.34942138 ...  6.67499431  8.10573219
   8.30956782]
 ...
 [-0.59280484 -0.57432467 -0.55338753 ... -0.31213021 -0.25057129
  -0.16675095]
 [-0.43269642 -0.431

In [24]:
debug_train = {'x': select_cone_set(x[train_index],1), 
                      'f': f[train_index], 'y': y[train_index]}
debug_valid = {'x': select_cone_set(x[valid_index],1), 
                      'f': f[valid_index], 'y': y[valid_index]}
debug_test = {'x': select_cone_set(x_test,1),
                     'f': f_test, 'y': y_test, 'm': m_test}

print('Before preprocessing:')
print(debug_train['x'])
print('')
manual_x = (debug_train['x'] - np.mean(debug_train['x'], axis=0)) / np.std(debug_train['x'], axis=0)
print('After manual preprocessing:')
print(manual_x)
print('')
debug_train, debug_valid, debug_test = iso_standard_scaler(debug_train, debug_valid, debug_test)
print('After scikit preprocessing:')
print(debug_train['x'])
print('')
print('All elements match between methods?')
print(np.all(manual_x == debug_train['x']))

Before preprocessing:
[[1.03881079]
 [1.29244496]
 [4.02241225]
 ...
 [0.07588937]
 [0.18433522]
 [0.07318857]]

After manual preprocessing:
[[ 0.82884319]
 [ 1.20330626]
 [ 5.23380401]
 ...
 [-0.59280484]
 [-0.43269642]
 [-0.59679228]]

After scikit preprocessing:
[[ 0.82884319]
 [ 1.20330626]
 [ 5.23380401]
 ...
 [-0.59280484]
 [-0.43269642]
 [-0.59679228]]

All elements match between methods?
True
