In [9]:
import random
import warnings
import keras as k
import numpy as np
import pandas as pd
import seaborn as sns
from skopt import gp_minimize
from skopt.space import Real, Categorical, Integer
from skopt.plots import plot_objective
from skopt.utils import use_named_args
import matplotlib.pyplot as plt
from sklearn.datasets import fetch_california_housing
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
import plotly
import plotly.graph_objs as go

import config as c
from pipeline.Layer import Layer
from utils.cuda import turn_off_gpu
from models.svm.svm import SVM
from models.keras_dense_classifier.keras_dense_classifier import KerasDenseClassifier as KDC
from visualization.utils import plot_correlation_matrix, plot_scatterplot_matrix

turn_off_gpu()
init_notebook_mode(connected=True)
warnings.filterwarnings('ignore')
plt.rcParams ['figure.figsize'] = (13,8)
sns.set()
%config InlineBackend.figure_format = 'retina'

In [10]:
from utils.preprocess import preprocess 

In [11]:
df = pd.read_csv('train.csv', sep=',')
pd.set_option('display.max_columns', 500)
df['addr_region_fact_encoding2'] = (df['addr_region_fact_encoding2']*11).round(0).astype(int)
df['addr_region_fact_encoding1'] = (df['addr_region_fact_encoding1']*83).round(0).astype(int)
df['addr_region_reg_encoding1'] = (df['addr_region_reg_encoding1']*83).round(0).astype(int)
df['addr_region_reg_encoding2'] = (df['addr_region_reg_encoding2']*11).round(0).astype(int)
df['app_addr_region_reg_encoding2'] = (df['app_addr_region_reg_encoding2']*11).round(0).astype(int)
df['app_addr_region_reg_encoding1'] = (df['app_addr_region_reg_encoding1']*83).round(0).astype(int)
df['app_addr_region_fact_encoding1'] = (df['app_addr_region_fact_encoding1']*83).round(0).astype(int)
df['app_addr_region_fact_encoding2'] = (df['app_addr_region_fact_encoding2']*11).round(0).astype(int)
df['app_addr_region_sale_encoding1'] = (df['app_addr_region_sale_encoding1']*39).round(0).astype(int)
df['app_addr_region_sale_encoding2'] = (df['app_addr_region_sale_encoding2']*7).round(0).astype(int)

df = preprocess(df)
df.head()

Unnamed: 0,card_id,target,addr_region_reg,addr_region_fact,sas_limit_after_003_amt,sas_limit_last_amt,clnt_income_month_avg_net_amt,clnt_expense_month_avg_amt,clnt_experience_cur_mnth,clnt_experience_cur_year,clnt_experience_total_mnth,app_addr_region_reg,app_addr_region_fact,app_addr_region_sale,clnt_birth_year,addr_region_fact_encoding1,addr_region_fact_encoding2,addr_region_reg_encoding1,addr_region_reg_encoding2,app_addr_region_reg_encoding1,app_addr_region_reg_encoding2,app_addr_region_fact_encoding1,app_addr_region_fact_encoding2,app_addr_region_sale_encoding1,app_addr_region_sale_encoding2,loans_main_borrower,loans_active,last_loan_date,first_loan_date,max_overdue_status,ttl_officials,ttl_legals,ttl_bankruptcies,inquiry_recent_period,inquiry_3_month,inquiry_6_month,inquiry_9_month,inquiry_12_month,ttl_inquiries,ttl_auto_loan,ttl_mortgage,ttl_credit_card,ttl_consumer,worst_status_ever,feature_1,feature_2,feature_3,feature_4,feature_5,feature_6,feature_7,feature_8,feature_9,fl_coborrower,fl_active_coborrower,pay_load,inquiry_1_week,inquiry_1_month,feature_10,feature_12,feature_13,feature_14,feature_15,feature_16,feature_17,feature_18,feature_19,feature_20,feature_21,feature_22,feature_23,feature_24,feature_25,feature_26,feature_27,feature_28,feature_29,makro_region,fo,region,feature_30,delivery_type_cat_1,delivery_type_cat_2,delivery_type_cat_3,channel_name_cat_0,channel_name_cat_1,channel_name_cat_2,channel_name_cat_3,channel_name_cat_4,channel_name_cat_5,channel_name_cat_6,channel_name_2_cat_0,channel_name_2_cat_1,channel_name_2_cat_2,channel_name_2_cat_3,channel_name_2_cat_4,channel_name_2_cat_5,channel_name_modified_2018_cat_0,channel_name_modified_2018_cat_1,channel_name_modified_2018_cat_2,channel_name_modified_2018_cat_3,channel_name_modified_2018_cat_4,clnt_education_name_cat_0,clnt_education_name_cat_1,clnt_education_name_cat_2,clnt_education_name_cat_3,clnt_education_name_cat_4,clnt_education_name_cat_5,clnt_education_name_cat_6,clnt_marital_status_name_cat_0,clnt_marital_status_name_cat_1,clnt_marital_status_name_cat_2,clnt_marital_status_name_cat_3,clnt_marital_status_name_cat_4,clnt_employment_type_name_cat_0,clnt_employment_type_name_cat_1,clnt_employment_type_name_cat_2,clnt_employment_type_name_cat_3,clnt_employment_type_name_cat_4,clnt_speciality_sphere_name_cat_0,clnt_speciality_sphere_name_cat_1,clnt_speciality_sphere_name_cat_10,clnt_speciality_sphere_name_cat_11,clnt_speciality_sphere_name_cat_12,clnt_speciality_sphere_name_cat_13,clnt_speciality_sphere_name_cat_14,clnt_speciality_sphere_name_cat_15,clnt_speciality_sphere_name_cat_16,clnt_speciality_sphere_name_cat_17,clnt_speciality_sphere_name_cat_18,clnt_speciality_sphere_name_cat_19,clnt_speciality_sphere_name_cat_2,clnt_speciality_sphere_name_cat_20,clnt_speciality_sphere_name_cat_21,clnt_speciality_sphere_name_cat_22,clnt_speciality_sphere_name_cat_23,clnt_speciality_sphere_name_cat_24,clnt_speciality_sphere_name_cat_25,clnt_speciality_sphere_name_cat_26,clnt_speciality_sphere_name_cat_27,clnt_speciality_sphere_name_cat_28,clnt_speciality_sphere_name_cat_3,clnt_speciality_sphere_name_cat_4,clnt_speciality_sphere_name_cat_5,clnt_speciality_sphere_name_cat_6,clnt_speciality_sphere_name_cat_7,clnt_speciality_sphere_name_cat_8,clnt_speciality_sphere_name_cat_9,clnt_sex_name_cat_0,clnt_sex_name_cat_1,prt_name_cat_0,prt_name_cat_1,prt_name_cat_2,prt_name_cat_3,prt_name_cat_4,prt_name_cat_5,prt_name_cat_6,prt_name_cat_7,prt_name_cat_8,prt_name_cat_9,feature_0_cat_0,feature_0_cat_1,feature_0_cat_10,feature_0_cat_12,feature_0_cat_13,feature_0_cat_14,feature_0_cat_15,feature_0_cat_16,feature_0_cat_3,feature_0_cat_6,feature_0_cat_7,feature_0_cat_8,feature_0_cat_9,clnt_experience_cur_mnth_na,clnt_experience_cur_year_na,last_loan_date_na,first_loan_date_na,ttl_officials_na,ttl_legals_na,ttl_bankruptcies_na,inquiry_recent_period_na,inquiry_3_month_na,inquiry_6_month_na,inquiry_9_month_na,inquiry_12_month_na,inquiry_1_week_na,inquiry_1_month_na,feature_10_na,feature_12_na,feature_13_na,feature_14_na,feature_15_na,feature_16_na,feature_17_na,feature_18_na,feature_19_na,feature_20_na,feature_21_na,feature_22_na,feature_23_na,feature_24_na,feature_25_na,feature_26_na,feature_27_na,feature_28_na,feature_29_na
0,cid_10620,1,107,107,1,1,3,0,0.0,0.0,0.0,107,107,45,46,560000,78750,560000,78750,560000,78750,560000,78750,260000,46000,0.88,0.78,193.0,2851.0,2,0.0,0.0,0.0,0.0,7.0,10.0,12.0,19.0,87,0,0,1,1,3,3,10,4,0,0,5,1,0,0,0,0,0.82,1.0,3.0,0.57,0.64,0.7,0.0,0.0,0.0,0.0,0.57,0.88,0.0,0.0,0.77,0.49,0.53,0.91,0.99,0.0,510.023518,0.0,0,0,0,3,1,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,1,0,0,0,0,0,0,1,0,1
1,cid_105724,0,9,9,3,3,5,1,2.0,0.0,3.0,9,9,1,47,620000,82500,620000,82500,620000,82500,620000,82500,310000,54000,0.77,0.7,73.0,4288.0,1,0.0,0.0,0.0,58.0,19.0,28.0,31.0,43.0,94,1,0,1,1,2,2,5,0,0,0,0,0,0,0,0,0,0.93,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0,7,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1
2,cid_101410,1,109,109,1,1,2,0,0.0,0.0,0.0,109,109,109,44,650000,88750,650000,88750,650000,88750,650000,88750,210000,36000,0.51,0.0,242.0,1852.0,1,0.0,0.0,0.0,30.0,6.0,6.0,11.0,20.0,31,0,0,1,1,2,1,2,0,0,0,0,0,0,0,0,0,0.0,0.0,2.0,0.53,0.5,0.6,0.0,0.0,0.34,0.56,0.53,0.87,0.6,0.85,0.75,0.77,0.7,0.89,0.99,0.97,262.65425,359.0,2,4,71,4,1,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,cid_38961,0,66,66,3,3,2,1,0.0,0.0,0.0,66,66,66,27,560000,73750,560000,73750,560000,73750,560000,73750,120000,12000,0.28,0.57,868.0,868.0,1,0.0,0.0,0.0,366.0,0.0,0.0,0.0,0.0,8,0,1,0,0,2,0,4,0,0,0,3,0,0,0,1,1,0.92,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0,7,1,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1
4,cid_57462,0,16,16,0,0,3,0,0.0,0.0,0.0,16,16,16,42,600000,80000,600000,80000,600000,80000,600000,80000,160000,22000,0.28,0.0,1525.0,1525.0,1,0.0,0.0,0.0,26.0,4.0,7.0,8.0,15.0,33,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0,7,1,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1


In [12]:
corr = df.corr().target
columns = list(corr[abs(corr)>0.2].index)
df = df[columns]

In [13]:
X = df[df.columns[1:]].to_numpy()
y = df['target'].to_numpy()

In [14]:
X.shape

(90000, 3)

In [28]:
parameters = KDC.default_model_constructor_parameters

In [16]:
kds = KDC()
print(kds.fit_ensemble(2, 1, X, y, KDC.default_model_constructor_parameters))

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100


Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78/100
Epoch 79/100
Epoch 80/100
Epoch 81/100
Epoch 82/100
Epoch 83/100
Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100


Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100


Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78/100
Epoch 79/100
Epoch 80/100
Epoch 81/100
Epoch 82/100
Epoch 83/100
Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
0.62941113114357


In [23]:
predict = (kds.predict(X).reshape(-1)>0.5)*1.0

In [30]:
print(sum(predict*y), sum(predict*(1-y)), sum((1-predict)*(1-y)), sum((1-predict)*y))

20186.0 9529.0 36451.0 23834.0


9529.0

In [20]:
kds.predict(X)

array([[0.46183205],
       [0.5880158 ],
       [0.4173655 ],
       ...,
       [0.59948635],
       [0.5766834 ],
       [0.15270266]], dtype=float32)

In [24]:
# Описание гиперпараметров модели
dimensions = [Categorical(categories=['sigmoid', 'softmax', 'relu', 'softsign', 'tanh'], name='activation'),
              Categorical(categories=['sigmoid', 'softmax', 'relu', 'softsign', 'tanh'], name='output_node_activation'),
              Real(low=1e-6, high=1e2, prior='log-uniform', name='learning_rate'),
              Integer(low=1, high=5, name='num_dense_layers'),
              Integer(low=3, high=30, name='dense_shape'),
              Integer(low=2, high=10, name='early_patience')
              ]


# Глобальные переменные
best_score = 0.0
fit_iteration = 0


@use_named_args(dimensions=dimensions)
def skopt_fit(**model_constructor_parameters):
    """
    Создает, обучает и тестирует модель с задаными гиперпараметрами
    :param model_constructor_parameters: гиперпараметры
    :return: Скор
    """
    print(model_constructor_parameters)
    global X, y, best_score, fit_iteration
    c.SEED = random.randint(0, 3000)

    # Создание, обучение и тестирование модели
    model = KDC()
    score = model.fit_ensemble(10, 1, X, y, model_constructor_parameters)

    print("Score: {0:.2%}".format(score))
    print("Best score: {0:.2%}".format(best_score))
    print("Fitness iteration:", fit_iteration)
    print('Seed', c.SEED)
    print('--||--' * 10, '\n')
    fit_iteration += 1

    # Сохранение лучшей модели
    if score > best_score:
        model.save_ensemble('best_model')
        best_score = score

    # Очистка памяти
    del model
    k.backend.clear_session()

    # Возврат скора, так-как задача минимизации, то чем лучше модель - тем меньше результат
    return -score


# Подбор гиперпараметров, описание параметров смотри:
# https://scikit-optimize.github.io/stable/modules/generated/skopt.plots.plot_objective.html
search_result = gp_minimize(func=skopt_fit,
                            dimensions=dimensions,
                            acq_func='EI',
                            n_calls=30,
                            n_jobs=10,
                            x0=list(KDC.default_model_constructor_parameters.values()))

print('Best Accuracy: %.3f' % (-search_result.fun))
print('Best Parameters: %s' % search_result.x)

# Отрисовка графиков
_ = plot_objective(result=search_result, n_points=30)
_ = plot_objective(result=search_result, sample_source='result', n_points=30)
plt.show()

{'activation': 'sigmoid', 'output_node_activation': 'sigmoid', 'learning_rate': 0.001, 'num_dense_layers': 3, 'dense_shape': 30, 'early_patience': 5}
Score: 61.25%
Best score: 0.00%
Fitness iteration: 0
Seed 1985
--||----||----||----||----||----||----||----||----||----||-- 

Instructions for updating:
This property should not be used in TensorFlow 2.0, as updates are applied automatically.
Instructions for updating:
This property should not be used in TensorFlow 2.0, as updates are applied automatically.
INFO:tensorflow:Assets written to: models\keras_dense_classifier\saved_models\best_model\model_0\assets
INFO:tensorflow:Assets written to: models\keras_dense_classifier\saved_models\best_model\model_1\assets
INFO:tensorflow:Assets written to: models\keras_dense_classifier\saved_models\best_model\model_2\assets
INFO:tensorflow:Assets written to: models\keras_dense_classifier\saved_models\best_model\model_3\assets
INFO:tensorflow:Assets written to: models\keras_dense_classifier\saved_mod

NameError: name 'k' is not defined