## Постановка задачи
Загрузите данные, приведите их к числовым, заполните пропуски, нормализуйте данные и оптимизируйте память.

Сформируйте параллельный ансамбль (стекинг) из CatBoost, градиентного бустинга, XGBoost и LightGBM. Используйте лучшие гиперпараметры, подобранные ранее, или найдите их через перекрестную проверку. Итоговое решение рассчитайте на основании самого точного предсказания класса у определенной модели ансамбля: выберите для каждого класса модель, которая предсказывает его лучше всего.

Проведите расчеты и выгрузите результат в виде submission.csv

Данные:
* https://video.ittensive.com/machine-learning/prudential/train.csv.gz
* https://video.ittensive.com/machine-learning/prudential/test.csv.gz
* https://video.ittensive.com/machine-learning/prudential/sample_submission.csv.gz

Соревнование: https://www.kaggle.com/c/prudential-life-insurance-assessment/

© ITtensive, 2020

### Подключение библиотек

In [3]:
pip install catboost

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting catboost
  Downloading catboost-1.0.6-cp37-none-manylinux1_x86_64.whl (76.6 MB)
[K     |████████████████████████████████| 76.6 MB 1.2 MB/s 
Installing collected packages: catboost
Successfully installed catboost-1.0.6


In [4]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import cohen_kappa_score, confusion_matrix
from catboost import Pool, CatBoostClassifier
from sklearn.ensemble import GradientBoostingClassifier
from xgboost import XGBClassifier
import lightgbm as lgb
from sklearn import preprocessing

In [27]:
pd.set_option('max_rows', 10)
pd.set_option('max_columns', 250)
pd.set_option('display.max_colwidth', 25)
pd.set_option('display.width', 1000)

### Загрузка данных(тренировочная выборка, данные для обучения)

In [5]:
data = pd.read_csv("https://video.ittensive.com/machine-learning/prudential/train.csv.gz")
print (data.info())
data.head(2)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 59381 entries, 0 to 59380
Columns: 128 entries, Id to Response
dtypes: float64(18), int64(109), object(1)
memory usage: 58.0+ MB
None


Unnamed: 0,Id,Product_Info_1,Product_Info_2,Product_Info_3,Product_Info_4,Product_Info_5,Product_Info_6,Product_Info_7,Ins_Age,Ht,...,Medical_Keyword_40,Medical_Keyword_41,Medical_Keyword_42,Medical_Keyword_43,Medical_Keyword_44,Medical_Keyword_45,Medical_Keyword_46,Medical_Keyword_47,Medical_Keyword_48,Response
0,2,1,D3,10,0.076923,2,1,1,0.641791,0.581818,...,0,0,0,0,0,0,0,0,0,8
1,5,1,A1,26,0.076923,2,3,1,0.059701,0.6,...,0,0,0,0,0,0,0,0,0,4


### Предобработка данных

In [6]:
def data_preprocess (df):
    df["Product_Info_2_1"] = df["Product_Info_2"].str.slice(0, 1)
    df["Product_Info_2_2"] = pd.to_numeric(df["Product_Info_2"].str.slice(1, 2))
    df.drop(labels=["Product_Info_2"], axis=1, inplace=True)
    for l in df["Product_Info_2_1"].unique():
        df["Product_Info_2_1" + l] = df["Product_Info_2_1"].isin([l]).astype("int8")
    df.drop(labels=["Product_Info_2_1"], axis=1, inplace=True)
    df.fillna(value=-1, inplace=True)
    data["Response"] = data["Response"] - 1
    return df

In [7]:
data = data_preprocess(data)

### Набор столбцов для расчета

In [8]:
columns_groups = ["Insurance_History", "InsurеdInfo", "Medical_Keyword",
                  "Family_Hist", "Medical_History", "Product_Info"]
columns = ["Wt", "Ht", "Ins_Age", "BMI"]
for cg in columns_groups:
    columns.extend(data.columns[data.columns.str.startswith(cg)])
print (columns)

['Wt', 'Ht', 'Ins_Age', 'BMI', 'Insurance_History_1', 'Insurance_History_2', 'Insurance_History_3', 'Insurance_History_4', 'Insurance_History_5', 'Insurance_History_7', 'Insurance_History_8', 'Insurance_History_9', 'Medical_Keyword_1', 'Medical_Keyword_2', 'Medical_Keyword_3', 'Medical_Keyword_4', 'Medical_Keyword_5', 'Medical_Keyword_6', 'Medical_Keyword_7', 'Medical_Keyword_8', 'Medical_Keyword_9', 'Medical_Keyword_10', 'Medical_Keyword_11', 'Medical_Keyword_12', 'Medical_Keyword_13', 'Medical_Keyword_14', 'Medical_Keyword_15', 'Medical_Keyword_16', 'Medical_Keyword_17', 'Medical_Keyword_18', 'Medical_Keyword_19', 'Medical_Keyword_20', 'Medical_Keyword_21', 'Medical_Keyword_22', 'Medical_Keyword_23', 'Medical_Keyword_24', 'Medical_Keyword_25', 'Medical_Keyword_26', 'Medical_Keyword_27', 'Medical_Keyword_28', 'Medical_Keyword_29', 'Medical_Keyword_30', 'Medical_Keyword_31', 'Medical_Keyword_32', 'Medical_Keyword_33', 'Medical_Keyword_34', 'Medical_Keyword_35', 'Medical_Keyword_36', 'M

### Нормализация данных

In [9]:
scaler = preprocessing.StandardScaler()
data_transformed = pd.DataFrame(scaler.fit_transform(pd.DataFrame(data,
                                                     columns=columns)))
columns_transformed = data_transformed.columns
data_transformed["Response"] = data["Response"]

### Оптимизация памяти

In [10]:
def reduce_mem_usage (df):
    start_mem = df.memory_usage().sum() / 1024**2    
    for col in df.columns:
        col_type = df[col].dtypes
        if str(col_type)[:5] == "float":
            c_min = df[col].min()
            c_max = df[col].max()
            if c_min > np.finfo("f2").min and c_max < np.finfo("f2").max:
                df[col] = df[col].astype(np.float16)
            elif c_min > np.finfo("f4").min and c_max < np.finfo("f4").max:
                df[col] = df[col].astype(np.float32)
            else:
                df[col] = df[col].astype(np.float64)
        elif str(col_type)[:3] == "int":
            c_min = df[col].min()
            c_max = df[col].max()
            if c_min > np.iinfo("i1").min and c_max < np.iinfo("i1").max:
                df[col] = df[col].astype(np.int8)
            elif c_min > np.iinfo("i2").min and c_max < np.iinfo("i2").max:
                df[col] = df[col].astype(np.int16)
            elif c_min > np.iinfo("i4").min and c_max < np.iinfo("i4").max:
                df[col] = df[col].astype(np.int32)
            elif c_min > np.iinfo("i8").min and c_max < np.iinfo("i8").max:
                df[col] = df[col].astype(np.int64)
        else:
            df[col] = df[col].astype("category")
    end_mem = df.memory_usage().sum() / 1024**2
    print('Потребление памяти меньше на', round(start_mem - end_mem, 2), 'Мб (минус', round(100 * (start_mem - end_mem) / start_mem, 1), '%)')
    return df

In [11]:
data_transformed = reduce_mem_usage(data_transformed)
print (data_transformed.info())

Потребление памяти меньше на 40.49 Мб (минус 75.1 %)
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 59381 entries, 0 to 59380
Columns: 119 entries, 0 to Response
dtypes: float16(118), int8(1)
memory usage: 13.4 MB
None


### Построение базовых моделей

In [12]:
x = pd.DataFrame(data_transformed, columns=columns_transformed)

XGBoost

In [14]:
model_xgb = XGBClassifier(max_depth=15, max_features=27,
                      n_estimators=75, min_samples_leaf=20)  # используем гиперпараметры решающего дерева для ансамбля extreme gradient boosting
model_xgb.fit(x, data['Response'])     # подгонка выбранных параметров под загруженный датасет данных

XGBClassifier(max_depth=15, max_features=27, min_samples_leaf=20,
              n_estimators=75, objective='multi:softprob')

CatBoost

In [15]:
model_cb = CatBoostClassifier(iterations=10000, learning_rate=0.57,
            random_seed=17, depth=6, l2_leaf_reg=2,
            loss_function='MultiClass', bootstrap_type="MVS")   # используем гиперпараметры решающего дерева для ансамбля catecorial boosting
model_cb.fit(Pool(data=x, label=data["Response"]))    # подгонка выбранных параметров под загруженный датасет данных

[1;30;43mВыходные данные были обрезаны до нескольких последних строк (5000).[0m
5000:	learn: 0.0781839	total: 11m 43s	remaining: 11m 42s
5001:	learn: 0.0781349	total: 11m 43s	remaining: 11m 42s
5002:	learn: 0.0780948	total: 11m 43s	remaining: 11m 42s
5003:	learn: 0.0780226	total: 11m 43s	remaining: 11m 42s
5004:	learn: 0.0780059	total: 11m 43s	remaining: 11m 42s
5005:	learn: 0.0779897	total: 11m 43s	remaining: 11m 42s
5006:	learn: 0.0779641	total: 11m 43s	remaining: 11m 42s
5007:	learn: 0.0779603	total: 11m 44s	remaining: 11m 41s
5008:	learn: 0.0779602	total: 11m 44s	remaining: 11m 41s
5009:	learn: 0.0779238	total: 11m 44s	remaining: 11m 41s
5010:	learn: 0.0779237	total: 11m 44s	remaining: 11m 41s
5011:	learn: 0.0779090	total: 11m 44s	remaining: 11m 41s
5012:	learn: 0.0778857	total: 11m 44s	remaining: 11m 41s
5013:	learn: 0.0778651	total: 11m 44s	remaining: 11m 40s
5014:	learn: 0.0778160	total: 11m 44s	remaining: 11m 40s
5015:	learn: 0.0777677	total: 11m 45s	remaining: 11m 40s
5016:	

<catboost.core.CatBoostClassifier at 0x7f477c5b9850>

Градиентный бустинг

In [16]:
print (x.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 59381 entries, 0 to 59380
Columns: 118 entries, 0 to 117
dtypes: float16(118)
memory usage: 13.4 MB
None


In [17]:
model_gbc = GradientBoostingClassifier(random_state=17, max_depth=12,
                max_features=26, min_samples_leaf=20, n_estimators=75)
model_gbc.fit(x, data['Response'])

GradientBoostingClassifier(max_depth=12, max_features=26, min_samples_leaf=20,
                           n_estimators=75, random_state=17)

LightGBM

In [18]:
model_lgb = lgb.LGBMRegressor(random_state=17, max_depth=18,
    min_child_samples=17, num_leaves=35, n_estimators=10000)
model_lgb.fit(x, data['Response'])

LGBMRegressor(max_depth=18, min_child_samples=17, n_estimators=10000,
              num_leaves=35, random_state=17)

### Загрузка данных для расчета

In [19]:
data_test = pd.read_csv("https://video.ittensive.com/machine-learning/prudential/test.csv.gz")
data_test = data_preprocess(data_test)
data_test = reduce_mem_usage(data_test)
data_test_transformed = pd.DataFrame(scaler.transform(pd.DataFrame(data_test,
                                                     columns=columns)))
print (data_test_transformed.info())

Потребление памяти меньше на 16.34 Мб (минус 84.9 %)
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 19765 entries, 0 to 19764
Columns: 118 entries, 0 to 117
dtypes: float32(118)
memory usage: 8.9 MB
None


### Расчет предказаний

In [21]:
data_test["target_xgb"] = model_xgb.predict(data_test_transformed)  # предсказания по "подогнанным" данным разных моделей

In [22]:
data_test["target_cb"] = model_cb.predict(Pool(data=data_test_transformed))

In [23]:
data_test["target_gbc"] = model_gbc.predict(data_test_transformed)

In [24]:
data_test["target_lgb"] = np.round(model_lgb.predict(data_test_transformed)).astype("int8")

Классы смещены на 1: начинаются от 0 и заканчиваются 7. Судя по рассчитанным матрицам ошибок, для 0, 1, 3, 4 и 6 классов точнее работает градиентный бустинг, для 2 - XGBoost, для 5 - LightGBM, для 7 - логистическая регрессия.

Точные параметры классов можно перерассчитать, например, через перекрестную проверку всех данных.

In [25]:
def vote_class (x):
    if x.target_xgb == 2:
        class_ = x.target_xgb
#    elif x.target_lgb == 7:
#        class_ = x.target_lgb
#    elif x.target_cb == 0:
#        class_ = x.target_cb
    else:
        class_ = x.target_gbc
    x["Response"] = class_ + 1
    return x

In [28]:
data_test = data_test.apply(vote_class, axis=1)
data_test.head()

Unnamed: 0,Id,Product_Info_1,Product_Info_3,Product_Info_4,Product_Info_5,Product_Info_6,Product_Info_7,Ins_Age,Ht,Wt,BMI,Employment_Info_1,Employment_Info_2,Employment_Info_3,Employment_Info_4,Employment_Info_5,Employment_Info_6,InsuredInfo_1,InsuredInfo_2,InsuredInfo_3,InsuredInfo_4,InsuredInfo_5,InsuredInfo_6,InsuredInfo_7,Insurance_History_1,Insurance_History_2,Insurance_History_3,Insurance_History_4,Insurance_History_5,Insurance_History_7,Insurance_History_8,Insurance_History_9,Family_Hist_1,Family_Hist_2,Family_Hist_3,Family_Hist_4,Family_Hist_5,Medical_History_1,Medical_History_2,Medical_History_3,Medical_History_4,Medical_History_5,Medical_History_6,Medical_History_7,Medical_History_8,Medical_History_9,Medical_History_10,Medical_History_11,Medical_History_12,Medical_History_13,Medical_History_14,Medical_History_15,Medical_History_16,Medical_History_17,Medical_History_18,Medical_History_19,Medical_History_20,Medical_History_21,Medical_History_22,Medical_History_23,Medical_History_24,Medical_History_25,Medical_History_26,Medical_History_27,Medical_History_28,Medical_History_29,Medical_History_30,Medical_History_31,Medical_History_32,Medical_History_33,Medical_History_34,Medical_History_35,Medical_History_36,Medical_History_37,Medical_History_38,Medical_History_39,Medical_History_40,Medical_History_41,Medical_Keyword_1,Medical_Keyword_2,Medical_Keyword_3,Medical_Keyword_4,Medical_Keyword_5,Medical_Keyword_6,Medical_Keyword_7,Medical_Keyword_8,Medical_Keyword_9,Medical_Keyword_10,Medical_Keyword_11,Medical_Keyword_12,Medical_Keyword_13,Medical_Keyword_14,Medical_Keyword_15,Medical_Keyword_16,Medical_Keyword_17,Medical_Keyword_18,Medical_Keyword_19,Medical_Keyword_20,Medical_Keyword_21,Medical_Keyword_22,Medical_Keyword_23,Medical_Keyword_24,Medical_Keyword_25,Medical_Keyword_26,Medical_Keyword_27,Medical_Keyword_28,Medical_Keyword_29,Medical_Keyword_30,Medical_Keyword_31,Medical_Keyword_32,Medical_Keyword_33,Medical_Keyword_34,Medical_Keyword_35,Medical_Keyword_36,Medical_Keyword_37,Medical_Keyword_38,Medical_Keyword_39,Medical_Keyword_40,Medical_Keyword_41,Medical_Keyword_42,Medical_Keyword_43,Medical_Keyword_44,Medical_Keyword_45,Medical_Keyword_46,Medical_Keyword_47,Medical_Keyword_48,Product_Info_2_2,Product_Info_2_1D,Product_Info_2_1A,Product_Info_2_1E,Product_Info_2_1B,Product_Info_2_1C,target_xgb,target_cb,target_gbc,target_lgb,Response
0,1.0,1.0,26.0,0.487061,2.0,3.0,1.0,0.611816,0.781738,0.338867,0.472168,0.150024,3.0,1.0,0.0,2.0,0.5,2.0,2.0,11.0,3.0,1.0,1.0,1.0,2.0,1.0,1.0,3.0,-1.0,3.0,2.0,3.0,3.0,-1.0,0.627441,0.760742,-1.0,2.0,16.0,2.0,2.0,1.0,3.0,1.0,2.0,2.0,-1.0,3.0,2.0,1.0,3.0,-1.0,1.0,2.0,1.0,1.0,2.0,1.0,2.0,1.0,-1.0,2.0,2.0,1.0,1.0,3.0,2.0,3.0,-1.0,3.0,3.0,1.0,3.0,2.0,1.0,3.0,3.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,6.0,4.0,7.0
1,3.0,1.0,26.0,0.076904,2.0,3.0,1.0,0.626953,0.727051,0.311768,0.484863,0.0,1.0,3.0,0.070007,2.0,0.199951,1.0,2.0,8.0,3.0,1.0,1.0,1.0,1.0,1.0,3.0,1.0,0.001667,1.0,1.0,2.0,2.0,-1.0,0.529297,0.746582,-1.0,5.0,261.0,3.0,1.0,1.0,3.0,2.0,2.0,1.0,-1.0,3.0,2.0,3.0,3.0,110.0,3.0,3.0,1.0,1.0,2.0,1.0,2.0,3.0,-1.0,2.0,2.0,3.0,1.0,3.0,2.0,3.0,-1.0,3.0,3.0,1.0,3.0,2.0,1.0,3.0,3.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,1.0,0.0,0.0,0.0,7.0,7.0,7.0,5.0,8.0
2,4.0,1.0,26.0,0.144653,2.0,3.0,1.0,0.582031,0.708984,0.320068,0.519043,0.142944,9.0,1.0,0.0,2.0,0.449951,1.0,2.0,3.0,3.0,1.0,1.0,1.0,2.0,1.0,1.0,3.0,-1.0,3.0,2.0,3.0,3.0,0.666504,-1.0,0.662109,-1.0,3.0,132.0,2.0,1.0,1.0,3.0,2.0,2.0,2.0,-1.0,3.0,2.0,3.0,3.0,240.0,1.0,3.0,1.0,1.0,2.0,1.0,2.0,3.0,-1.0,2.0,2.0,3.0,1.0,1.0,2.0,3.0,-1.0,1.0,3.0,1.0,3.0,2.0,1.0,3.0,3.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,1.0,0.0,0.0,0.0,0.0,5.0,5.0,5.0,4.0,6.0
3,9.0,1.0,26.0,0.151733,2.0,1.0,1.0,0.522461,0.654785,0.267822,0.487061,0.209961,9.0,1.0,0.0,2.0,1.0,2.0,2.0,3.0,3.0,1.0,1.0,1.0,1.0,1.0,3.0,1.0,0.000667,2.0,1.0,2.0,2.0,-1.0,0.686035,0.67627,-1.0,-1.0,162.0,3.0,2.0,1.0,1.0,2.0,3.0,2.0,-1.0,3.0,2.0,3.0,3.0,-1.0,1.0,3.0,1.0,1.0,2.0,2.0,2.0,3.0,-1.0,1.0,3.0,3.0,2.0,3.0,2.0,3.0,-1.0,3.0,1.0,1.0,2.0,2.0,1.0,3.0,3.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,1.0,0.0,0.0,0.0,6.0,0.0,7.0,6.0,8.0
4,12.0,1.0,26.0,0.076904,2.0,3.0,1.0,0.298584,0.672852,0.246826,0.428711,0.085022,9.0,1.0,0.0,2.0,0.199951,1.0,2.0,8.0,3.0,1.0,2.0,1.0,2.0,1.0,1.0,3.0,-1.0,3.0,2.0,3.0,2.0,0.449219,-1.0,0.380371,-1.0,18.0,181.0,3.0,1.0,1.0,3.0,2.0,2.0,2.0,-1.0,3.0,2.0,3.0,3.0,188.0,1.0,3.0,1.0,1.0,2.0,1.0,2.0,1.0,-1.0,1.0,3.0,3.0,1.0,1.0,2.0,3.0,-1.0,3.0,3.0,1.0,2.0,2.0,1.0,3.0,3.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,7.0,7.0,7.0,6.0,8.0


### Формирование и выгрузка результатов
Загрузим примерный файл, заменим в нем результаты и сохраним.

Число строк в файле будет равно размену набора данных + 1 заголовочная строка.

In [29]:
submission = pd.read_csv("https://video.ittensive.com/machine-learning/prudential/sample_submission.csv.gz")
submission["Response"] = data_test["Response"].astype("int8")
submission.to_csv("submission.csv", index=False)
print (len(submission["Response"]) + 1)

19766


### Само-проверка модели
Рассчитаем точность классификации на обучающих данных

In [30]:
data_copy = data_transformed.copy()
x_copy = pd.DataFrame(data_copy, columns=columns_transformed)
copy_dataset = Pool(data=x_copy, label=data_copy["Response"])
data_copy["target_xgb"] = model_xgb.predict(x_copy)
data_copy["target_cb"] = model_cb.predict(copy_dataset)
data_copy["target_gbc"] = model_gbc.predict(x_copy)
data_copy["target_lgb"] = np.round(model_lgb.predict(x_copy)).astype("int8")

In [32]:
class_target = ["target_gbc"]*8
def vote_class_enumerate (x):    # функция пересчета классов голосования
    for _,target in enumerate(class_target):
        if x[target] == _:
            x["Response"] = x[target]
            break
    return x

In [33]:
kappa_min = 0
for target_model in ["xgb", "cb", "gbc", "lgb"]:
    print ("Проверяем модель:", target_model)
    target_model = "target_" + target_model
    for class_ in range(0,8):
        target_model_prev = class_target[class_]
        class_target[class_] = target_model
        data_copy = data_copy.apply(vote_class_enumerate, axis=1)
        kappa = cohen_kappa_score(data_copy["Response"], 
                data["Response"], weights='quadratic')
        if kappa > kappa_min:
            kappa_min = kappa
        else:
            class_target[class_] = target_model_prev
    print ("Максимальная оценка:", kappa_min)
print (class_target)

Проверяем модель: xgb
Максимальная оценка: 0.7851668495048733
Проверяем модель: cb
Максимальная оценка: 0.9235406319076538
Проверяем модель: gbc
Максимальная оценка: 0.9235406319076538
Проверяем модель: lgb
Максимальная оценка: 0.9242086183066969
['target_cb', 'target_cb', 'target_cb', 'target_cb', 'target_cb', 'target_cb', 'target_lgb', 'target_lgb']


In [None]:
data_copy = data_copy.apply(vote_class_enumerate, axis=1)

['target_xgb', 'target_xgb', 'target_gbc', 'target_gbc', 'target_xgb', 'target_xgb', 'target_xgb', 'target_xgb']


In [None]:
print ("Результат:",
       round(cohen_kappa_score(data_copy["Response"],
                    data["Response"], weights='quadratic'), 3))
print (confusion_matrix(data_copy["Response"], data["Response"]))

Результат: 0.93
[[ 5568     0     0     0     0     0     0     0]
 [    0  5939     0     1     0     0     0     0]
 [    1     0  1011     0     0     0     0     0]
 [   10    17     2  1425     0     2     0     1]
 [   23     8     0     0  5271     3     1     3]
 [  120   108     0     0    34 10262    34    31]
 [  119    78     0     1    32   129  7032    16]
 [  366   402     0     1    95   837   960 19438]]
