In [None]:
! pip3 install bayesian_optimization
! pip3 install cython

In [None]:
! sudo apt-get update
! sudo apt-get -y install build-essential

In [1]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import f1_score
from sklearn.model_selection import train_test_split
from bayes_opt import BayesianOptimization
from bayes_opt import SequentialDomainReductionTransformer
from bayes_opt.logger import JSONLogger
from bayes_opt.event import Events
from bayes_opt.util import load_logs
import cython
import time
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
%load_ext Cython

In [2]:
train_values_short = pd.read_csv('train_values_short.csv', index_col='building_id')
train_labels = pd.read_csv('train_labels.csv', index_col='building_id')

In [3]:
%%cython
def hassanat1(double[:] x, double[:] y, double[:] factores):
    cdef int n = x.shape[0]
    cdef double res = 0
    cdef double d, minimo, maximo

    for i in range(n):
        minimo = min(x[i],y[i])
        maximo = max(x[i],y[i])
        if (minimo >=0):
            d =  1 - ( (1+minimo)/(1+maximo) )
        else:
            d =  1 - ( (1+minimo+abs(minimo))/(1+maximo+abs(minimo)) )
        res += d * factores[i]
    
    return res

In [None]:
def target(resultado):
    return resultado['target']

def mostrar(resultados, keys, min=0):
    
    print( "i", end = '\t\t') 
    print("target", end = '\t\t') 
    for key in keys:
        print(  key, end = '\t') 
    print()
    
    a_borrar = []
    for resultado in resultados:
        if(resultado['target']<min):
            a_borrar.append(resultado)
    resultados_importantes = [a for a in resultados if a not in a_borrar]
    
    resultados_importantes.sort(reverse=True, key=target)
    
    for i, res in enumerate(resultados_importantes):
        cells = []

        cells.append(i)
        cells.append(res["target"])

        for key in keys:
            cells.append(res["params"][key])
        
        for j in cells:
            print("| " + str(j), end = '\t') 
            
        print()

In [4]:
x_train, x_test, y_train, y_test = train_test_split(train_values_short,
                                train_labels, test_size=0.001, random_state=1)

In [9]:
def funcion_a_optimizar5(factor_age, factor_altura, factor_plan):
    factor_count_floors = 1.9195786742228238
    factor_area = 0.3139710742344524
    factor_foundation_type = 2.199668387926665
    factor_legal_ownership_status = 0.12558762490941516
    factor_secondary_use = 0.7726106929827516
    factor_geo_id = 7.508261370013642
    factor_superestructuras = 14.483312165426854
    
    k = 22
    factores = np.array([factor_count_floors, factor_age, factor_area, factor_altura, factor_foundation_type,
                         1, 1, 1, 1, factor_plan, factor_legal_ownership_status, factor_secondary_use,
                         factor_geo_id,  factor_geo_id, 534565675686786]).astype('double')
    knn = KNeighborsClassifier(weights="distance",n_neighbors=k,
                  metric=hassanat1, metric_params={'factores':factores})
    knn.fit(x_train, y_train.values.ravel())
    prediction = knn.predict(x_test)
    return f1_score(y_test, prediction, average='micro')

In [13]:
funcion_a_optimizar5(1,1,1)

0.7049808429118773

In [None]:
funcion_a_optimizar5(1,1,1)

In [7]:
def funcion_a_optimizar6(factor_age, factor_altura, factor_plan):
    factor_count_floors = 1.9195786742228238
    factor_area = 0.3139710742344524
    factor_foundation_type = 2.199668387926665
    factor_legal_ownership_status = 0.12558762490941516
    factor_secondary_use = 0.7726106929827516
    factor_geo_id = 7.508261370013642
    factor_superestructuras = 14.483312165426854
    
    k = 22
    factores = np.array([factor_count_floors, factor_age, factor_area, factor_altura, factor_foundation_type,
                        1, 1, 1, factor_plan, factor_legal_ownership_status, factor_secondary_use,
                         factor_geo_id,  factor_geo_id, 45456575675675675]).astype('double')
    knn = KNeighborsClassifier(weights="distance",n_neighbors=k,
                  metric=hassanat1, metric_params={'factores':factores})
    knn.fit(x_train, y_train.values.ravel())
    prediction = knn.predict(x_test)
    return f1_score(y_test, prediction, average='micro')

In [8]:
funcion_a_optimizar6(1,1,1)

0.7049808429118773

In [6]:
funcion_a_optimizar6(1,1,1)

0.7126436781609196

In [4]:
train_values_short.columns

Index(['count_floors_pre_eq', 'age', 'area_percentage', 'height_percentage',
       'foundation_type', 'roof_type', 'ground_floor_type', 'other_floor_type',
       'plan_configuration', 'legal_ownership_status', 'secondary_use',
       'min_geo_id', 'max_geo_id', 'superestructure'],
      dtype='object')

In [None]:
limites = {"factor_age":(0,1), "factor_altura":(0.5,3), "factor_plan":(1,3.5)}

optimizer = BayesianOptimization(
    f=funcion_a_optimizar5,
    pbounds=limites,
    random_state=,
    bounds_transformer=SequentialDomainReductionTransformer(),
    verbose=5,
)
logger = JSONLogger(path="./logs_nuevos.json")
optimizer.subscribe(Events.OPTIMIZATION_STEP, logger)

In [None]:
optimizer.maximize(
    init_points=0,
    n_iter=1,
)

In [None]:
optimizer.probe(
    params={"factor_count_floors":1.9195786742228238, "factor_age":1, "factor_foundation_type":2.199668387926665},
    lazy=False,
)

In [None]:
mostrar(optimizer.res, ["factor_age", "factor_altura", "factor_plan"])