In [250]:
import pandas as pd 
import catboost as cb 
from sklearn.model_selection import cross_val_score
import numpy as np
from sklearn.model_selection import GridSearchCV
from lightgbm import LGBMClassifier
from xgboost.sklearn import XGBClassifier
import xgboost as xgb
from sklearn.base import ClassifierMixin
from sklearn.preprocessing import LabelEncoder
from bayes_opt import BayesianOptimization
from bayes_opt import SequentialDomainReductionTransformer
from bayes_opt.logger import JSONLogger
from bayes_opt.event import Events
from bayes_opt.util import load_logs

In [221]:
import warnings
warnings.filterwarnings('ignore')

In [207]:
train_values = pd.read_csv('train_values.csv', index_col='building_id')
train_values1 = pd.read_csv('train_values_short1.csv', index_col='building_id')
train_values_baseline = pd.get_dummies(train_values)
ambos_geos = train_values1.merge(train_values[["geo_level_1_id", "geo_level_2_id", "geo_level_3_id"]],
                                            left_index=True, right_index=True)
train_values_catboost = pd.read_csv("train_values_catboost.csv", index_col='building_id')
train_values_lgbm = pd.read_csv("train_values_lightGBM.csv", index_col='building_id')

train_labels = pd.read_csv('train_labels.csv', index_col='building_id')
le = LabelEncoder()
train_labels_encoded = le.fit_transform(y=train_labels.values.ravel())

In [168]:
categoricas = []
for col in train_values_catboost.columns:
    if ((col != 'min_geo_id') & (col != 'max_geo_id')):
        categoricas.append(col)
        categoricas.append(train_values_catboost.columns.get_loc(col))
        train_values_catboost[col] = train_values_catboost[col].astype("category")

In [215]:
class EncontradorPesos(ClassifierMixin):
    def __init__(self, peso_xgb_baseline, peso_xgb_short1, peso_catboost, peso_lgbm, categoricas_catboost):
        
        self.peso_xgb_baseline = peso_xgb_baseline
        self.xgb_baseline = xgb_baseline = XGBClassifier(random_state=2021, max_depth= 10, n_estimators= 273,
                                                         verbosity=0, use_label_encoder=False)
        
        self.peso_xgb_short1 = peso_xgb_short1
        self.xgb_short1=XGBClassifier( objective = "multi:softmax", use_label_encoder=False, seed=30,
                         max_depth=9,
                         min_child_weight=6,
                         gamma=0.2554138025988315,
                        colsample_bytree = 0.7773754946860542,
                        subsample = 0.9030471581301206,
                         learning_rate =0.1, 
                         n_estimators=273,
                        verbosity=0
                         )
        
        self.peso_catboost = peso_catboost
        self.catboost = cb.CatBoostClassifier(loss_function='MultiClass', eval_metric='TotalF1:average=Micro', 
                                              iterations=100, verbose=False)
        self.categoricas_catboost = categoricas_catboost
        
        self.lgbm = LGBMClassifier(random_state=2021, num_iterations= 273,
                                   num_leaves= 70, objective= 'regression', colsample_bytree=0.77)
        self.peso_lgbm = peso_lgbm
    
                 
    def fit(self, X, y):
        self.xgb_short1.fit(X[0], y)
        self.xgb_baseline.fit(X[1], y)
        self.catboost.fit(X[2], y, self.categoricas_catboost)
        self.lgbm.fit(X[3],y)
        
    def predict(self, X):
        probas_xgb_short1 =  self.xgb_short1.predict_proba(X[0]) * self.peso_xgb_short1
        probas_xgb_baseline =  self.xgb_baseline.predict_proba(X[1]) * self.peso_xgb_baseline
        probas_catboost =  self.catboost.predict_proba(X[2]) * self.peso_catboost
        probas_lgbm =  self.lgbm.predict_proba(X[3]) * self.peso_lgbm

        
        preds = []
        for i in range(len(X[0])):
            proba_grado1 = probas_xgb_short1[i][0] + probas_xgb_baseline[i][0] \
                           + probas_catboost[i][0] + probas_lgbm[i][0]
            proba_grado2 = probas_xgb_short1[i][1] + probas_xgb_baseline[i][1] \
                           + probas_catboost[i][1] + probas_lgbm[i][1]
            proba_grado3 = probas_xgb_short1[i][2] + probas_xgb_baseline[i][2] \
                           + probas_catboost[i][2] + probas_lgbm[i][2]
            
            
            max_proba = max([proba_grado1, proba_grado2, proba_grado3])
            preds.append([proba_grado1, proba_grado2, proba_grado3].index(max_proba))
        
 
        return preds

In [232]:
train_values = [ambos_geos.iloc[:100], train_values_baseline[:100], train_values_catboost[:100], 
           train_values_lgbm[:100]]
train_labels = train_labels_encoded[:100]

In [229]:
test_values = [ambos_geos.iloc[:10], train_values_baseline[:10], train_values_catboost[:10],
                       train_values_lgbm[:10]]
test_labels = train_labels_encoded[:10]

In [248]:

def funcion_a_optimizar(peso_baseline, peso_short1, peso_lgbm):

            model = EncontradorPesos(peso_xgb_baseline=peso_baseline,
                                     peso_xgb_short1=peso_short1, peso_catboost=1, peso_lgbm = peso_lgbm,
                        categoricas_catboost=categoricas)

            model.fit(train_values, train_labels)
            preds = model.predict(test_values)
            score = f1_score(preds, test_labels, average='micro')
            print("Score: " + str(score))
            print("\t Peso baseline: " + str(peso_baseline))
            print("\t Peso short1: " + str(peso_short1))
            print("\t Peso lgbm: " + str(peso_lgbm))
            print()
            
            return(score)

In [253]:
limites = {"peso_baseline":(4,6), "peso_short1":(3,5), "peso_lgbm":(0.8,2)}

In [254]:
optimizer = BayesianOptimization(
    f=funcion_a_optimizar,
    random_state=10,
    pbounds=limites,
)


logger = JSONLogger(path="./logs_ultima_carta.json")
optimizer.subscribe(Events.OPTIMIZATION_STEP, logger)

In [255]:
optimizer.maximize(
    init_points=2,
    n_iter=2,
)

Score: 1.0
	 Peso baseline: 5.542641286533492
	 Peso short1: 4.267296469852551
	 Peso lgbm: 0.8249023392312819

Score: 1.0
	 Peso baseline: 5.497607765077223
	 Peso short1: 3.4495932910616953
	 Peso lgbm: 1.3982084147631086

Score: 1.0
	 Peso baseline: 4.019905151444054
	 Peso short1: 4.906116435412473
	 Peso lgbm: 1.9999662999261694

Score: 1.0
	 Peso baseline: 4.05168532283733
	 Peso short1: 3.023672684186033
	 Peso lgbm: 1.9905196468737527



In [257]:
optimizer.max

{'target': 1.0,
 'params': {'peso_baseline': 5.542641286533492,
  'peso_lgbm': 0.8249023392312819,
  'peso_short1': 4.267296469852551}}