In [2]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import uproot

In [3]:
from lightgbm import LGBMClassifier
import shap

In [4]:
file = uproot.open('../Data/mc15_13TeV.361203.Pythia8_A2_MSTW2008LO_ND_minbias.30062015_v0_per_0.root')
T = file['tree']

amount = int(1e6)

df = T.pandas.df(entrystop = amount)
params = df.iloc[:, :138]
label = df.trueKs

In [6]:
model = LGBMClassifier()

model.fit(params, label)

LGBMClassifier()

In [7]:
from sklearn.metrics import roc_auc_score
probs = model.predict_proba(params)
roc_auc_score(label, probs[:, 1])

0.9984599415123231

In [8]:
explain = shap.TreeExplainer(model, shap.sample(params,100))

In [9]:
shap_values = explain.shap_values(shap.sample(params, 100))

In [10]:
number_of_features = 60

param_names = df.columns[np.argsort(abs(shap_values.mean(0)))][::-1][:number_of_features]

df = df[param_names]

In [11]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import normalize
df = normalize(df)
X_train, X_test, y_train, y_test = train_test_split(df, label, test_size=0.015, random_state=42)

In [12]:
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

In [13]:
from tensorflow.keras.optimizers import Adam
opt = Adam(learning_rate = 0.005)


model = Sequential()
s = 3
cells = [48, 48, 24, 8]
print(cells)

model.add(Dense(cells[0], input_dim = number_of_features, activation = 'relu'))
if s != 1:
    for i in range(s - 1):
        model.add(Dense(cells[i + 1], activation = 'relu'))
model.add(Dense(1, activation = "sigmoid"))

model.compile(loss = "binary_crossentropy", optimizer = opt, metrics = ['accuracy'])

model.fit(X_train, y_train, epochs = 50, batch_size = 128, verbose = 1)


model.evaluate(X_test, y_test)



[48, 48, 24, 8]
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50

KeyboardInterrupt: 

In [15]:
model.evaluate(X_test, y_test)

