# Explainable Boosting Machine

## Imports

In [1]:
from interpret import set_visualize_provider
from interpret.provider import InlineProvider
set_visualize_provider(InlineProvider())

In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split

from interpret.glassbox import ExplainableBoostingClassifier
from interpret import show

In [3]:
import sys
import h5py
import numpy as np

## Paths

In [4]:
ROOT = sys.prefix[:-4]
DATA_DIR = ROOT + 'data\\Uchuu-UM\\'

FILES = [
            'Sample.01.Redshift.0.000.h5',
            'Sample.01.Redshift.1.032.h5',
            'Sample.01.Redshift.2.029.h5',
            'Sample.01.Redshift.3.129.h5',
            'Sample.01.Redshift.4.269.h5',
            'Sample.01.Redshift.5.155.h5'
        ]

PATHS = [DATA_DIR + f for f in FILES]

## Data processing

In [5]:
REDSHIFT = 5

In [6]:
path = PATHS[REDSHIFT]
h = h5py.File(path, 'r')

In [7]:
df = pd.DataFrame({
    'Mvir': np.real(h.get('Mvir')),
    'StellarMass': np.real(h.get('StellarMass')),
    'StarFormationRate': np.real(h.get('StarFormationRate'))
})

In [16]:
df.sample(frac=0.0005)

Unnamed: 0,Mvir,StellarMass,StarFormationRate
2556632,2.812000e+10,1.968547e+08,0.279518
6514554,2.077000e+11,1.231111e+09,0.185750
2938393,3.924000e+10,1.067251e+08,0.432267
4276252,1.236000e+11,4.408923e+08,0.760493
2088477,2.289000e+10,1.553236e+08,1.740504
...,...,...,...
5572123,7.096000e+10,2.434861e+08,152.222885
510362,9.810000e+09,1.176770e+08,3.108943
3984572,7.914000e+10,2.405337e+08,0.554991
4281740,5.919000e+10,1.342575e+08,2.369842


In [None]:
df = df.sample(frac=0.0005)

train_cols = ['Mvir', 'StellarMass'] # Quitar stellarmass
label = 'StarFormationRate'
X = df[train_cols]
y = df[label]

seed = 1
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=seed)

ebm = ExplainableBoostingClassifier(random_state=seed)
ebm.fit(X_train, y_train)

ebm_global = ebm.explain_global()
show(ebm_global)

ebm_local = ebm.explain_local(X_test[:5], y_test[:5])
show(ebm_local)