# Reassign DecisionTree leaves by $\Delta$IA

## Load Balrog-Bagpipes catalog

In [1]:
import pickle

In [2]:
file = '/global/cfs/cdirs/des/elisa/IA_decisiontree/decisiontree_data/balrog-bagpipes.pkl'
data = pickle.load(open(file, 'rb'), encoding='latin1')
print(f'Length of Balrog-Bagpipes catalog: {len(data)}')

Length of Balrog-Bagpipes catalog: 2417437


In [3]:
data = data.dropna(subset=['stell_best', 'ssfr_best'])
print(f'Length of Balrog-Bagpipes catalog (no nan): {len(data)}')

Length of Balrog-Bagpipes catalog (no nan): 2417396


In [4]:
for i in range(4):
    print(f'Length of Balrog-Bagpipes Bin{i}: {len(data[data["bin"]==i])}')

Length of Balrog-Bagpipes Bin0: 613544
Length of Balrog-Bagpipes Bin1: 608122
Length of Balrog-Bagpipes Bin2: 605458
Length of Balrog-Bagpipes Bin3: 590272


## Load DecisionTrees and train/test samples, predictions

In [15]:
import joblib
import pandas as pd

In [12]:
decisiontree_dir = '/global/cfs/cdirs/des/elisa/IA_decisiontree/decisiontree_data/'
decisiontree_file_template = decisiontree_dir + 'decisiontree_bin{}.joblib'

classifiers = {}

for i in range(4):
    print(f'Loading DecisionTree Bin{i}')
    classifiers[i] = joblib.load(decisiontree_file_template.format(i))

Loading DecisionTree Bin0
Loading DecisionTree Bin1
Loading DecisionTree Bin2
Loading DecisionTree Bin3


In [16]:
file = '/global/cfs/cdirs/des/elisa/IA_decisiontree/decisiontree_data/decisiontree_predictions.pkl'
predictions = pickle.load(open(file, 'rb'), encoding='latin1')
print(f'Length of predictions catalog: {len(predictions)}')

# Add predictions to data
data = pd.merge(data, predictions, on='bal_id', how='inner') # Oss: checked it does it correctly

Length of predictions catalog: 2417396


In [17]:
for i in range(4):
    X_test = data[(data['bin'] == i) & (data['label_pred'] != -1)]
    print(f'Bin{i}')
    print(f"- Number of blue galaxies: {len(X_test[X_test['label_pred']==0])}")
    print(f"- Number of red galaxies:  {len(X_test[X_test['label_pred']==1])}")
    print(f'Number of leaves: {classifiers[i].get_n_leaves()}')
    print()

Bin0
- Number of blue galaxies: 67312
- Number of red galaxies:  55397
Number of leaves: 23189

Bin1
- Number of blue galaxies: 65563
- Number of red galaxies:  56062
Number of leaves: 22090

Bin2
- Number of blue galaxies: 65197
- Number of red galaxies:  55895
Number of leaves: 23989

Bin3
- Number of blue galaxies: 68222
- Number of red galaxies:  49833
Number of leaves: 24669



## Produce table with `leaf_id`, `label`, and properties to compute SNR