# スタッキング
・GBDT 2~3個：決定木の深さが「浅い」「普通」「深い」モデル

・Random Forest 1~2個：決定木の深さが「浅い」「深い」モデル

・Neural Net 1~2個：層の数が「少ない」「多い」モデル

・Linier 1個

In [1]:
# import os
# os.chdir('../../')

In [2]:
import numpy as np
import pandas as pd
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import roc_auc_score
from scr.models.gbdt import Model1_CatBoost_1, Model1_CatBoost_2, Model1_CatBoost_3, Model1_XGBoost_1, Model1_XGBoost_2, Model1_XGBoost_3, Model1_LightGBM_1, Model1_LightGBM_2, Model1_LightGBM_3
from scr.models.random_forest import Model1_RandomForest_1, Model1_RandomForest_2, Model1_RandomForest_3
from scr.models.nn import Model1_NN_1, Model1_NN_2, Model1_NN_all_1, Model1_NN_all_2, Model1_TabNet_1, Model1_TabNet_2
from scr.models.linear import Model1_Logistic_1, Model1_Logistic_2, Model1_Logistic_3, Model1_Logistic_4, Model2_Logistic

In [3]:
df_train = pd.read_csv('data/feature_engineered/null_cat/train_scaled_for_not_gbdt.csv')
df_test = pd.read_csv('data/feature_engineered/null_cat/test_scaled_for_not_gbdt.csv')

In [4]:
feature = [
    'Age', 
    'DurationOfPitch', 
    'NumberOfPersonVisiting',
    'NumberOfFollowups', 
    'NumberOfTrips', 
    'MonthlyIncome', 
    #'ProdTaken',
    'Motivation', 
    'EconomicPower', 
    'TripEasier', 
    'SalesPerformance',
    'LivingCost', 
    'EconomicStability', 
    'NumberOfTrips_log', 
    'TravelCost',
    'EconomicSegment', 
    'PackageMatch', 
    'Monetary', 
    #'ContractRate_FM',
    #'ContractRate_G1',
    #'ContractRate_G2', 
    #'ContractRate_G3',
    #'ContractRate_G4',
    #'ContractRate_G5', 
    #'ContractRate_G6',
    'TypeofContact_No',
    'TypeofContact_Self Enquiry',
    'CityTier_2',
    'CityTier_3',
    'Occupation_Salaried',
    'Occupation_Small Business',
    'Gender_male', 
    'ProductPitched_Deluxe', 
    'ProductPitched_King',
    'ProductPitched_Standard',
    'ProductPitched_Super Deluxe',
    'PreferredPropertyStar_4',
    'PreferredPropertyStar_5', 
    'Passport_1',
    'PitchSatisfactionScore_2', 
    'PitchSatisfactionScore_3',
    'PitchSatisfactionScore_4',
    'PitchSatisfactionScore_5',
    'Designation_Executive',
    'Designation_Manager',
    'Designation_Senior Manager', 
    'Designation_VP',
    'Marry_Married',
    'Marry_Single', 
    'Car_No Car', 
    'Child_1_child', 
    'Child_2_child',
    'Child_3_child',
    'AgeGroup_20s',
    'AgeGroup_30s',
    'AgeGroup_40s',
    'AgeGroup_50s', 
    'AgeGroup_60s', 
    'TypeofContactNULL_1',
    'Child01_1',
    'IsFamily_1',
    'FreaqencySeg_1',
    'FreaqencySeg_2',
    'MonetarySeg_2',
    'MonetarySeg_3', 
    'MonetarySeg_4', 
    'AgeNull', 
    'DurationOfPitchNull',
    'NumberOfTripsNull', 
    'MonthlyIncomeNull'
]

X = df_train[feature]
y = df_train['ProdTaken']

df_test = df_test[feature]

num_features = len(df_test.columns)

In [5]:
# スタッキング
def predict_cv(model, X, y, df_test):
    preds = []
    preds_test = []
    va_idxes = []

    skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

    for i, (tr_idx, va_idx) in enumerate(skf.split(X, y)):
        tr_x, va_x = X.iloc[tr_idx], X.iloc[va_idx]
        tr_y, va_y = y.iloc[tr_idx], y.iloc[va_idx]
        model.fit(tr_x, tr_y, va_x, va_y)
        pred = model.predict(va_x)
        preds.append(pred)
        pred_test = model.predict(df_test)
        preds_test.append(pred_test)
        va_idxes.append(va_idx)

    # バリデーションデータに対する予測値を連結し、その後元の順序に並べ直す
    va_idxes = np.concatenate(va_idxes)
    preds = np.concatenate(preds, axis=0)
    order = np.argsort(va_idxes)
    pred_train = preds[order]

    # テストデータに対する予測値の平均をとる
    preds_test = np.mean(preds_test, axis=0)

    return pred_train, preds_test

In [6]:
# 1層目のモデル
models_gbdt = [
    Model1_CatBoost_1(),
    Model1_CatBoost_2(),
    Model1_CatBoost_3(),
    Model1_XGBoost_1(),
    Model1_XGBoost_2(),
    Model1_XGBoost_3(),
    Model1_LightGBM_1(),
    Model1_LightGBM_2(),
    Model1_LightGBM_3(),
    Model1_RandomForest_1(),
    Model1_RandomForest_2(),
    Model1_RandomForest_3()
]

models_nn = [
    Model1_NN_1(input_shape=num_features),
    Model1_NN_2(input_shape=num_features),
    Model1_NN_all_1(input_shape=num_features),
    Model1_NN_all_2(input_shape=num_features),
    Model1_TabNet_1(input_dim=num_features),
    Model1_TabNet_2(input_dim=num_features),
    Model1_Logistic_1(),
    Model1_Logistic_2(),
    Model1_Logistic_3(),
    Model1_Logistic_4()
]



In [7]:
pred_train_list = []
pred_test_list = []
for model in models_gbdt:
    pred_train, pred_test = predict_cv(model, X, y, df_test)
    pred_train_list.append(pred_train)
    pred_test_list.append(pred_test)
for model in models_nn:
    pred_train, pred_test = predict_cv(model, X, y, df_test)
    pred_train_list.append(pred_train)
    pred_test_list.append(pred_test)

0:	test: 0.6015970	best: 0.6015970 (0)	total: 119ms	remaining: 13m 52s
200:	test: 0.8337458	best: 0.8339465 (199)	total: 520ms	remaining: 17.6s
400:	test: 0.8385452	best: 0.8389799 (391)	total: 921ms	remaining: 15.2s
600:	test: 0.8384114	best: 0.8389799 (391)	total: 1.35s	remaining: 14.4s
800:	test: 0.8386957	best: 0.8394649 (705)	total: 1.74s	remaining: 13.4s
1000:	test: 0.8394649	best: 0.8400669 (960)	total: 2.12s	remaining: 12.7s
1200:	test: 0.8397492	best: 0.8400669 (960)	total: 2.52s	remaining: 12.2s
1400:	test: 0.8398997	best: 0.8401338 (1337)	total: 2.91s	remaining: 11.6s
1600:	test: 0.8400000	best: 0.8401338 (1337)	total: 3.32s	remaining: 11.2s
1800:	test: 0.8398662	best: 0.8401338 (1337)	total: 3.7s	remaining: 10.7s
2000:	test: 0.8397492	best: 0.8401338 (1337)	total: 4.09s	remaining: 10.2s
2200:	test: 0.8395652	best: 0.8401338 (1337)	total: 4.47s	remaining: 9.74s
2400:	test: 0.8395819	best: 0.8401338 (1337)	total: 4.85s	remaining: 9.29s
2600:	test: 0.8396823	best: 0.8401338 (1



epoch 0  | loss: 0.56205 | val_0_auc: 0.65719 |  0:00:00s
epoch 1  | loss: 0.45426 | val_0_auc: 0.64431 |  0:00:00s
epoch 2  | loss: 0.41859 | val_0_auc: 0.63873 |  0:00:00s
epoch 3  | loss: 0.40374 | val_0_auc: 0.66761 |  0:00:01s
epoch 4  | loss: 0.37225 | val_0_auc: 0.68226 |  0:00:01s
epoch 5  | loss: 0.37307 | val_0_auc: 0.70545 |  0:00:01s
epoch 6  | loss: 0.35586 | val_0_auc: 0.72543 |  0:00:02s
epoch 7  | loss: 0.35553 | val_0_auc: 0.73731 |  0:00:02s
epoch 8  | loss: 0.35048 | val_0_auc: 0.75159 |  0:00:02s
epoch 9  | loss: 0.34123 | val_0_auc: 0.76462 |  0:00:03s
epoch 10 | loss: 0.33934 | val_0_auc: 0.77069 |  0:00:03s
epoch 11 | loss: 0.32287 | val_0_auc: 0.77333 |  0:00:03s
epoch 12 | loss: 0.32137 | val_0_auc: 0.78003 |  0:00:04s
epoch 13 | loss: 0.31513 | val_0_auc: 0.78038 |  0:00:04s
epoch 14 | loss: 0.31322 | val_0_auc: 0.78339 |  0:00:04s
epoch 15 | loss: 0.30917 | val_0_auc: 0.77778 |  0:00:05s
epoch 16 | loss: 0.30288 | val_0_auc: 0.78246 |  0:00:05s
epoch 17 | los



epoch 0  | loss: 0.56226 | val_0_auc: 0.56463 |  0:00:00s
epoch 1  | loss: 0.4353  | val_0_auc: 0.64147 |  0:00:00s
epoch 2  | loss: 0.41554 | val_0_auc: 0.67001 |  0:00:01s
epoch 3  | loss: 0.39524 | val_0_auc: 0.66974 |  0:00:01s
epoch 4  | loss: 0.38961 | val_0_auc: 0.66245 |  0:00:01s
epoch 5  | loss: 0.38099 | val_0_auc: 0.67611 |  0:00:02s
epoch 6  | loss: 0.35837 | val_0_auc: 0.69173 |  0:00:02s
epoch 7  | loss: 0.34924 | val_0_auc: 0.70431 |  0:00:02s
epoch 8  | loss: 0.34054 | val_0_auc: 0.71189 |  0:00:02s
epoch 9  | loss: 0.33656 | val_0_auc: 0.72137 |  0:00:03s
epoch 10 | loss: 0.33193 | val_0_auc: 0.72518 |  0:00:03s
epoch 11 | loss: 0.31906 | val_0_auc: 0.73452 |  0:00:03s
epoch 12 | loss: 0.31566 | val_0_auc: 0.74105 |  0:00:04s
epoch 13 | loss: 0.31354 | val_0_auc: 0.74484 |  0:00:04s
epoch 14 | loss: 0.30864 | val_0_auc: 0.75078 |  0:00:05s
epoch 15 | loss: 0.30654 | val_0_auc: 0.75759 |  0:00:05s
epoch 16 | loss: 0.30645 | val_0_auc: 0.7566  |  0:00:05s
epoch 17 | los



epoch 0  | loss: 0.55109 | val_0_auc: 0.59525 |  0:00:00s
epoch 1  | loss: 0.42902 | val_0_auc: 0.59618 |  0:00:00s
epoch 2  | loss: 0.41742 | val_0_auc: 0.62962 |  0:00:00s
epoch 3  | loss: 0.38838 | val_0_auc: 0.62093 |  0:00:01s
epoch 4  | loss: 0.38544 | val_0_auc: 0.62469 |  0:00:01s
epoch 5  | loss: 0.37391 | val_0_auc: 0.66021 |  0:00:01s
epoch 6  | loss: 0.36429 | val_0_auc: 0.68943 |  0:00:02s
epoch 7  | loss: 0.34859 | val_0_auc: 0.72284 |  0:00:02s
epoch 8  | loss: 0.33854 | val_0_auc: 0.73871 |  0:00:02s
epoch 9  | loss: 0.33511 | val_0_auc: 0.7423  |  0:00:03s
epoch 10 | loss: 0.3272  | val_0_auc: 0.74564 |  0:00:03s
epoch 11 | loss: 0.32338 | val_0_auc: 0.74636 |  0:00:03s
epoch 12 | loss: 0.32565 | val_0_auc: 0.74837 |  0:00:04s
epoch 13 | loss: 0.31551 | val_0_auc: 0.74753 |  0:00:04s
epoch 14 | loss: 0.30586 | val_0_auc: 0.74105 |  0:00:04s
epoch 15 | loss: 0.30608 | val_0_auc: 0.72803 |  0:00:05s
epoch 16 | loss: 0.31122 | val_0_auc: 0.7199  |  0:00:05s
epoch 17 | los



epoch 0  | loss: 0.56673 | val_0_auc: 0.57898 |  0:00:00s
epoch 1  | loss: 0.44356 | val_0_auc: 0.61    |  0:00:00s
epoch 2  | loss: 0.40651 | val_0_auc: 0.64839 |  0:00:00s
epoch 3  | loss: 0.38024 | val_0_auc: 0.66319 |  0:00:01s
epoch 4  | loss: 0.36884 | val_0_auc: 0.68018 |  0:00:01s
epoch 5  | loss: 0.36256 | val_0_auc: 0.69589 |  0:00:01s
epoch 6  | loss: 0.34404 | val_0_auc: 0.70915 |  0:00:02s
epoch 7  | loss: 0.34774 | val_0_auc: 0.72913 |  0:00:02s
epoch 8  | loss: 0.33588 | val_0_auc: 0.74585 |  0:00:02s
epoch 9  | loss: 0.33478 | val_0_auc: 0.75366 |  0:00:03s
epoch 10 | loss: 0.32416 | val_0_auc: 0.7534  |  0:00:03s
epoch 11 | loss: 0.31876 | val_0_auc: 0.75107 |  0:00:03s
epoch 12 | loss: 0.31658 | val_0_auc: 0.75675 |  0:00:04s
epoch 13 | loss: 0.31186 | val_0_auc: 0.76092 |  0:00:04s
epoch 14 | loss: 0.31043 | val_0_auc: 0.75871 |  0:00:04s
epoch 15 | loss: 0.30434 | val_0_auc: 0.75896 |  0:00:05s
epoch 16 | loss: 0.30064 | val_0_auc: 0.76075 |  0:00:05s
epoch 17 | los



epoch 0  | loss: 0.67869 | val_0_auc: 0.55187 |  0:00:00s
epoch 1  | loss: 0.52234 | val_0_auc: 0.63533 |  0:00:00s
epoch 2  | loss: 0.47909 | val_0_auc: 0.64299 |  0:00:01s
epoch 3  | loss: 0.44883 | val_0_auc: 0.6814  |  0:00:01s
epoch 4  | loss: 0.42301 | val_0_auc: 0.66259 |  0:00:02s
epoch 5  | loss: 0.40611 | val_0_auc: 0.67498 |  0:00:02s
epoch 6  | loss: 0.38399 | val_0_auc: 0.72968 |  0:00:03s
epoch 7  | loss: 0.36976 | val_0_auc: 0.74846 |  0:00:03s
epoch 8  | loss: 0.376   | val_0_auc: 0.78209 |  0:00:04s
epoch 9  | loss: 0.3639  | val_0_auc: 0.74585 |  0:00:04s
epoch 10 | loss: 0.37006 | val_0_auc: 0.75848 |  0:00:05s
epoch 11 | loss: 0.36244 | val_0_auc: 0.7614  |  0:00:05s
epoch 12 | loss: 0.35129 | val_0_auc: 0.75022 |  0:00:05s
epoch 13 | loss: 0.34752 | val_0_auc: 0.75242 |  0:00:06s
epoch 14 | loss: 0.35139 | val_0_auc: 0.76107 |  0:00:06s
epoch 15 | loss: 0.33904 | val_0_auc: 0.75564 |  0:00:07s
epoch 16 | loss: 0.33822 | val_0_auc: 0.75671 |  0:00:07s
epoch 17 | los



epoch 0  | loss: 0.68927 | val_0_auc: 0.50264 |  0:00:00s
epoch 1  | loss: 0.58754 | val_0_auc: 0.61654 |  0:00:00s
epoch 2  | loss: 0.49899 | val_0_auc: 0.5604  |  0:00:01s
epoch 3  | loss: 0.45376 | val_0_auc: 0.62627 |  0:00:01s
epoch 4  | loss: 0.44905 | val_0_auc: 0.64007 |  0:00:01s
epoch 5  | loss: 0.4112  | val_0_auc: 0.65151 |  0:00:02s
epoch 6  | loss: 0.40323 | val_0_auc: 0.65925 |  0:00:02s
epoch 7  | loss: 0.37659 | val_0_auc: 0.67661 |  0:00:03s
epoch 8  | loss: 0.37987 | val_0_auc: 0.66836 |  0:00:03s
epoch 9  | loss: 0.35412 | val_0_auc: 0.67696 |  0:00:04s
epoch 10 | loss: 0.35534 | val_0_auc: 0.69769 |  0:00:04s
epoch 11 | loss: 0.35476 | val_0_auc: 0.71222 |  0:00:04s
epoch 12 | loss: 0.34869 | val_0_auc: 0.70841 |  0:00:05s
epoch 13 | loss: 0.3478  | val_0_auc: 0.73304 |  0:00:05s
epoch 14 | loss: 0.34846 | val_0_auc: 0.74532 |  0:00:06s
epoch 15 | loss: 0.34439 | val_0_auc: 0.75502 |  0:00:06s
epoch 16 | loss: 0.34085 | val_0_auc: 0.77497 |  0:00:06s
epoch 17 | los



epoch 0  | loss: 0.68459 | val_0_auc: 0.54004 |  0:00:00s
epoch 1  | loss: 0.48486 | val_0_auc: 0.61847 |  0:00:00s
epoch 2  | loss: 0.46045 | val_0_auc: 0.62741 |  0:00:01s
epoch 3  | loss: 0.42051 | val_0_auc: 0.62388 |  0:00:01s
epoch 4  | loss: 0.40902 | val_0_auc: 0.67678 |  0:00:02s
epoch 5  | loss: 0.39272 | val_0_auc: 0.70854 |  0:00:02s
epoch 6  | loss: 0.37217 | val_0_auc: 0.70154 |  0:00:02s
epoch 7  | loss: 0.37544 | val_0_auc: 0.7483  |  0:00:03s
epoch 8  | loss: 0.36329 | val_0_auc: 0.72999 |  0:00:03s
epoch 9  | loss: 0.359   | val_0_auc: 0.7351  |  0:00:03s
epoch 10 | loss: 0.34667 | val_0_auc: 0.77333 |  0:00:04s
epoch 11 | loss: 0.34577 | val_0_auc: 0.76565 |  0:00:04s
epoch 12 | loss: 0.34265 | val_0_auc: 0.76157 |  0:00:05s
epoch 13 | loss: 0.33501 | val_0_auc: 0.7554  |  0:00:05s
epoch 14 | loss: 0.34045 | val_0_auc: 0.76513 |  0:00:06s
epoch 15 | loss: 0.33826 | val_0_auc: 0.76091 |  0:00:06s
epoch 16 | loss: 0.33945 | val_0_auc: 0.75267 |  0:00:06s
epoch 17 | los



epoch 0  | loss: 0.69196 | val_0_auc: 0.55249 |  0:00:00s
epoch 1  | loss: 0.50726 | val_0_auc: 0.65572 |  0:00:00s
epoch 2  | loss: 0.45311 | val_0_auc: 0.61496 |  0:00:01s
epoch 3  | loss: 0.40109 | val_0_auc: 0.62491 |  0:00:01s
epoch 4  | loss: 0.40608 | val_0_auc: 0.62414 |  0:00:01s
epoch 5  | loss: 0.38976 | val_0_auc: 0.62996 |  0:00:02s
epoch 6  | loss: 0.38972 | val_0_auc: 0.67877 |  0:00:02s
epoch 7  | loss: 0.37786 | val_0_auc: 0.67977 |  0:00:03s
epoch 8  | loss: 0.36663 | val_0_auc: 0.66382 |  0:00:03s
epoch 9  | loss: 0.35076 | val_0_auc: 0.68092 |  0:00:03s
epoch 10 | loss: 0.35374 | val_0_auc: 0.70584 |  0:00:04s
epoch 11 | loss: 0.35161 | val_0_auc: 0.70006 |  0:00:04s
epoch 12 | loss: 0.34906 | val_0_auc: 0.70899 |  0:00:05s
epoch 13 | loss: 0.34305 | val_0_auc: 0.70734 |  0:00:05s
epoch 14 | loss: 0.34219 | val_0_auc: 0.71051 |  0:00:05s
epoch 15 | loss: 0.3411  | val_0_auc: 0.70154 |  0:00:06s
epoch 16 | loss: 0.34045 | val_0_auc: 0.70321 |  0:00:06s
epoch 17 | los



epoch 0  | loss: 0.70149 | val_0_auc: 0.56468 |  0:00:00s
epoch 1  | loss: 0.55882 | val_0_auc: 0.54645 |  0:00:00s
epoch 2  | loss: 0.47927 | val_0_auc: 0.61005 |  0:00:01s
epoch 3  | loss: 0.451   | val_0_auc: 0.64322 |  0:00:01s
epoch 4  | loss: 0.42919 | val_0_auc: 0.66895 |  0:00:01s
epoch 5  | loss: 0.39425 | val_0_auc: 0.68949 |  0:00:02s
epoch 6  | loss: 0.37181 | val_0_auc: 0.71653 |  0:00:02s
epoch 7  | loss: 0.36766 | val_0_auc: 0.72212 |  0:00:03s
epoch 8  | loss: 0.36846 | val_0_auc: 0.70699 |  0:00:03s
epoch 9  | loss: 0.35497 | val_0_auc: 0.69893 |  0:00:04s
epoch 10 | loss: 0.35542 | val_0_auc: 0.70221 |  0:00:04s
epoch 11 | loss: 0.35442 | val_0_auc: 0.72847 |  0:00:04s
epoch 12 | loss: 0.34517 | val_0_auc: 0.73839 |  0:00:05s
epoch 13 | loss: 0.34389 | val_0_auc: 0.74526 |  0:00:05s
epoch 14 | loss: 0.33757 | val_0_auc: 0.72094 |  0:00:05s
epoch 15 | loss: 0.33557 | val_0_auc: 0.72123 |  0:00:06s
epoch 16 | loss: 0.32172 | val_0_auc: 0.72753 |  0:00:06s
epoch 17 | los



In [8]:
# 1層目のモデル

# model_1a = Model1_CatBoost_1()
# pred_train_1a, pred_test_1a = predict_cv(model_1a, X, y, df_test)

# model_1b = Model1_CatBoost_2()
# pred_train_1b, pred_test_1b = predict_cv(model_1b, X, y, df_test)

# model_1c = Model1_CatBoost_3()
# pred_train_1c, pred_test_1c = predict_cv(model_1c, X, y, df_test)

# model_1d = Model1_RandomForest_1()
# pred_train_1d, pred_test_1d = predict_cv(model_1d, X, y, df_test)

# model_1e = Model1_RandomForest_2()
# pred_train_1e, pred_test_1e = predict_cv(model_1e, X, y, df_test)

# model_1f = Model1_NN_1()
# pred_train_1f, pred_test_1f = predict_cv(model_1f, X, y, df_test)

# model_1g = Model1_NN_2()
# pred_train_1g, pred_test_1g = predict_cv(model_1g, X, y, df_test)

# model_1h = Model1_Logistic_1()
# pred_train_1h, pred_test_1h = predict_cv(model_1h, X, y, df_test)

In [9]:
# 1層目のモデルの評価
for i, pred_train in enumerate(pred_train_list):
    auc_score = roc_auc_score(y, pred_train)
    print(f'AUC for model {i+1}: {auc_score}')

AUC for model 1: 0.8304163214581608
AUC for model 2: 0.8293430368306093
AUC for model 3: 0.8297586320059394
AUC for model 4: 0.7743338372480875
AUC for model 5: 0.7843262785267757
AUC for model 6: 0.7900733276665339
AUC for model 7: 0.8134831717578197
AUC for model 8: 0.8041423675744306
AUC for model 9: 0.8030448735191901
AUC for model 10: 0.812139548521073
AUC for model 11: 0.8198650458903153
AUC for model 12: 0.8213593055660164
AUC for model 13: 0.8454275115936258
AUC for model 14: 0.8458108275320371
AUC for model 15: 0.9103242449348496
AUC for model 16: 0.927817573892553
AUC for model 17: 0.7842455804344783
AUC for model 18: 0.7682915675873422
AUC for model 19: 0.8300316605515446
AUC for model 20: 0.8255885580864869
AUC for model 21: 0.7994699480304285
AUC for model 22: 0.739332384682426


In [10]:
# 特徴量として使用する列名を作成
column_names = [f'pred_{i+1}' for i in range(len(pred_train_list))]

# 予測値を特徴量としてデータフレームを作成
train_x_2 = pd.DataFrame(
    {f'pred_{i+1}': pred_train_list[i] for i in range(len(pred_train_list))},
    columns=column_names
)

test_x_2 = pd.DataFrame(
    {f'pred_{i+1}': pred_test_list[i] for i in range(len(pred_test_list))},
    columns=column_names
)

In [11]:
# # 予測値を特徴量としてデータフレームを作成
# train_x_2 = pd.DataFrame({
#     'pred_1a': pred_train_1a,
#     'pred_1b': pred_train_1b,
#     'pred_1c': pred_train_1c,
#     'pred_1d': pred_train_1d,
#     'pred_1e': pred_train_1e,
#     'pred_1f': pred_train_1f,
#     'pred_1g': pred_train_1g,
#     'pred_1h': pred_train_1h
#     })

# test_x_2 = pd.DataFrame({
#     'pred_1a': pred_test_1a,
#     'pred_1b': pred_test_1b,
#     'pred_1c': pred_test_1c,
#     'pred_1d': pred_test_1d,
#     'pred_1e': pred_test_1e,
#     'pred_1f': pred_test_1f,
#     'pred_1g': pred_test_1g,
#     'pred_1h': pred_test_1h,
#     })

In [12]:
# 2層目のモデル
# pred_train_2は、2層目のモデルの学習データのクロスバリデーションでの予測値
# pred_test_2は、2層目のモデルのテストデータの予測値
model_2 = Model2_Logistic()
pred_train_2, pred_test_2 = predict_cv(model_2, train_x_2, y, test_x_2)
print(f'AUC: {roc_auc_score(y, pred_train_2)}')

AUC: 0.9132112191867784


In [13]:
index = pd.read_csv('data/test.csv')['id'].values
df_submit = pd.DataFrame({
    "id": index,
    "prediction": pred_test_2
})

In [14]:
path = 'submission/submit_25_second_stacking.csv'

In [15]:
df_submit.to_csv(path, index=False, header=None)