# Bibliotecas

In [None]:
import sys

sys.path.append('/eos/user/t/thenriqu/Dark_Matter/LGBM_hhdm_analysis/')


import json
from pathlib import Path

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import hepherolib.analysis as ana

from lgbm.controllers_lgbm import LGBLearner, LGBModel

ana.start()
plt.style.use("default")

# Configuração

In [None]:
period = '18'
year_style = 2018
dataset_year = "2018"
basedir = '/eos/user/t/thenriqu/Dark_Matter/Amostras/hhdmAnalysis_deepJet_Regions/datasets'
dataset_name = basedir.split('/')[-2]

# Data folder
data_path = f"./data/{dataset_name}/{dataset_year}"

# Setup output folders
models_path = f"./models/{dataset_name}/{dataset_year}"
Path(models_path).mkdir(parents=True, exist_ok=True)

# Lendo os dados

In [None]:
model_name = "LGB_multi_signal"

X_train = pd.read_csv(f"{data_path}/{model_name}-train-data.csv")
X_test = pd.read_csv(f"{data_path}/{model_name}-test-data.csv")
W_train = X_train.pop("modelWeight").values
W_test = X_test.pop("modelWeight").values
Y_train = X_train.pop("Label").values
Y_test = X_test.pop("Label").values
evtWeight_train = X_train.pop("evtWeight").values
evtWeight_test = X_test.pop("evtWeight").values

In [18]:
X_train.head()

Unnamed: 0,LeadingLep_pt,LepLep_deltaM,LepLep_deltaR,LepLep_pt,MET_LepLep_Mt,MET_LepLep_deltaPhi,MET_pt,MT2LL,Nbjets,TrailingLep_pt
0,69.88334,0.582565,3.027906,40.11211,254.93567,2.920919,410.03757,26.96941,1,30.125252
1,114.942566,18.834435,1.757877,129.6942,122.29612,1.108255,104.121475,6.398953,2,33.967537
2,82.65237,5.285355,1.379278,118.73953,174.27248,2.980687,64.36005,85.83524,1,68.04502
3,245.78777,6.339325,0.763112,287.61877,274.4351,2.785732,67.58102,105.5349,1,51.919327
4,191.7956,3.473717,0.850083,240.17885,97.727554,0.878454,54.975773,22.92876,1,55.08442


In [None]:
W_train

In [None]:
Y_train

# Procura por Hiperparametros

#### Usado pelo Macedo - LGBMClassifier

n_iter = 150
cv = 2
param_grid = {
    'learning_rate': [0.008, 0.0004],
    'n_estimators': [900, 1000],
    'num_leaves': [24,128,62],
    'min_child_samples':[100,400,800],
    'subsample':[1,0.9, 0.8],
    'colsample_bytree':[0.9,0.8,0.2],
    'objective': ['multiclass'],
    'metric': ['multi_logloss'],
    'num_class':[3],
    'boosting_typ’: [‘gbdt’]
    'feature_pre_filter': ['False']
}




#### Usado pelo Gabriel: XGBoostClassifier


params = {
    'n_estimators': [100, 500, 1000, 2000, 3000, 5000, 7000, 9000],
    'min_child_weight': range(1,10),
    'learning_rate': [1e-3, 1e-2, 1e-1, 0.3, 0.5, 0.7, 0.9, 1.],
    'subsample': np.arange(0.50, 1.01, 0.05),
    'colsample_bytree': np.arange(0.50, 1.01, 0.05),
    'max_depth': range(3,11),
    'eval_metric': ['logloss', 'error']
}






#### Usado pelo Thiago ( Eu ): vottingclassifier 


log_clf = LogisticRegression(solver = 'newton-cg', random_state = 42)
rnd_clf = XGBClassifier( objective = 'multiclass', n_estimators = 4000, learning_rate = 0.001, random_state = 42)
lgbm_clf = LGBMClassifier(objective = 'multiclass',  num_class = 3, n_estimators = 4000, metric = 'multi_logloss', learning_rate = 0.001, boosting_type = 'gbdt', random_state = 42)

voting_clf_soft = VotingClassifier(
    estimators=[('lr', log_clf), ('rf', rnd_clf), ('lgbm', lgbm_clf)],
    voting='soft')



In [None]:

params = {
    'n_estimators': [900, 1000, 5000, 7000, 9000],
    'min_child_weight': range(1,10),
    'min_child_samples': [100, 400, 800],
    'learning_rate': [0.001, 0.004, 0.008],
    'subsample': [1, 0.9, 0.8],
    'colsample_bytree': [0.9, 0.8, 0.2],
#     'colsample_bytree': np.arange(0.50, 1.01, 0.05),
#     'subsample': np.arange(0.50, 1.01, 0.05),
    'max_depth': range(3,11),
    'eval_metric': ['logloss', 'error']
}



learner = LGBLearner(X_train, Y_train, W_train, njobs=12)
res = learner.find_hyperparams(hyperparams_grid=params, n_splits=5, n_iter=30)


# Salvando os Parametros

In [None]:
with open(f'{models_path}/{model_name}-bst-params.json', 'w') as f:
    json.dump(res['hyperparameters'], f, ensure_ascii=False, indent=4)

# Noficação Quando o Notebook terminar de compilar

In [None]:
from IPython.display import clear_output, display, HTML, Javascript

display(Javascript("""
  var msg = new SpeechSynthesisUtterance();
  msg.text = "Process completed!";
  window.speechSynthesis.speak(msg);
  alert("Process completed!")
"""))