In [3]:
import pandas as pd
from sklearn.ensemble import GradientBoostingClassifier, RandomForestClassifier, AdaBoostClassifier, StackingClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import make_pipeline
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
from sklearn.preprocessing import StandardScaler

In [5]:
# Путь к тренировочному набору
path_train = 'train.csv' # содержит только имя файла, без имен папок
# Путь к тестовому набору
path_test  = 'test.csv' # содержит только имя файла, без имен папок
df_train = pd.read_csv(path_train)
df_train = df_train.drop_duplicates()
df_test = pd.read_csv(path_test)
df_train

Unnamed: 0,C1,N2,N3,C4_enc,C5_enc,C6_enc,N7,C8,C9,N10,C11,C12_enc,N13,N14,Target
0,1.0,28.00,2.000,2.0,4.0,8.0,4.165,1.0,1.0,2.0,1.0,2.0,181.0,1.0,1.0
1,1.0,36.25,5.000,2.0,8.0,5.0,2.500,1.0,1.0,6.0,0.0,2.0,0.0,368.0,1.0
2,0.0,29.50,2.000,1.0,10.0,8.0,2.000,0.0,0.0,0.0,0.0,2.0,256.0,18.0,0.0
3,1.0,22.67,0.165,2.0,8.0,3.0,2.250,0.0,0.0,0.0,1.0,1.0,0.0,1.0,1.0
4,0.0,25.75,0.500,2.0,8.0,8.0,0.875,1.0,0.0,0.0,1.0,2.0,491.0,1.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
581,1.0,28.75,1.165,2.0,4.0,4.0,0.500,1.0,0.0,0.0,0.0,1.0,280.0,1.0,0.0
582,1.0,44.83,7.000,1.0,8.0,4.0,1.625,0.0,0.0,0.0,0.0,2.0,160.0,3.0,0.0
583,1.0,21.33,7.500,2.0,6.0,4.0,1.415,1.0,1.0,1.0,0.0,2.0,80.0,9801.0,1.0
584,0.0,53.33,0.165,2.0,1.0,1.0,0.000,0.0,0.0,0.0,1.0,1.0,62.0,28.0,0.0


In [6]:
df_test

Unnamed: 0,C1,N2,N3,C4_enc,C5_enc,C6_enc,N7,C8,C9,N10,C11,C12_enc,N13,N14
0,1.0,22.00,0.790,2.0,9.0,4.0,0.290,0.0,1.0,1.0,0.0,2.0,420.0,284.0
1,1.0,31.57,4.000,2.0,14.0,4.0,5.000,1.0,1.0,3.0,1.0,2.0,290.0,2280.0
2,1.0,35.58,0.750,2.0,4.0,4.0,1.500,0.0,0.0,0.0,1.0,2.0,231.0,1.0
3,1.0,41.17,1.335,2.0,2.0,4.0,0.165,0.0,0.0,0.0,0.0,2.0,168.0,1.0
4,1.0,21.17,0.875,1.0,8.0,8.0,0.250,0.0,0.0,0.0,0.0,2.0,280.0,205.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
99,1.0,17.08,0.085,1.0,8.0,4.0,0.040,0.0,0.0,0.0,0.0,2.0,140.0,723.0
100,0.0,33.67,0.375,2.0,13.0,4.0,0.375,0.0,0.0,0.0,0.0,2.0,300.0,45.0
101,1.0,18.08,5.500,2.0,4.0,4.0,0.500,1.0,0.0,0.0,0.0,2.0,80.0,1.0
102,0.0,31.57,1.500,2.0,1.0,1.0,0.000,0.0,1.0,2.0,1.0,2.0,200.0,106.0


In [7]:
# Блок(и) обучения и поверки модели

In [8]:
grad_boost = make_pipeline(GradientBoostingClassifier(
    criterion='friedman_mse',
    learning_rate=0.11,
    max_features=None,
    min_samples_leaf=1,
    min_samples_split=4,
    n_estimators=100,
    subsample=0.9
))

In [9]:
rf = make_pipeline(
    RandomForestClassifier(
        criterion='gini',
        max_depth=4,
        max_features=None,
        min_samples_leaf=2,
        min_samples_split=2,
        n_estimators=1000
    )
)

In [10]:
base_estimator = DecisionTreeClassifier(max_depth=1)
ada_boost = make_pipeline(AdaBoostClassifier(
        base_estimator=base_estimator,
        n_estimators = 200, 
        learning_rate = 0.1 
    )
)

In [11]:
log_reg = make_pipeline(StandardScaler() , LogisticRegression(C = 1, penalty = 'l1', solver = 'liblinear'))

In [12]:
X_train = df_train.drop('Target', axis=1)  
y_train = df_train['Target']

In [13]:
clf_v1 = grad_boost
clf_v2 = ada_boost
clf_v3 = rf

model_itog = StackingClassifier(
        estimators = [
            ('grad', clf_v1), 
            ('ada', clf_v2), 
            ('forest',clf_v3)],
        final_estimator = log_reg,
        stack_method = 'predict_proba'
)

model_itog.fit(X_train, y_train)



In [14]:
# Блок предсказания с использованием тестового набора
y_predict = model_itog.predict(df_test)

In [15]:
# Название вектора предсказанных значений  y_predict полученого на основании тестового набора
y_predict

array([0., 1., 0., 0., 0., 0., 0., 0., 1., 0., 1., 1., 1., 0., 0., 0., 0.,
       1., 1., 1., 1., 1., 1., 1., 0., 0., 1., 1., 0., 1., 0., 1., 0., 0.,
       1., 1., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 1.,
       1., 0., 0., 1., 0., 1., 0., 0., 0., 0., 0., 1., 1., 0., 1., 0., 0.,
       1., 1., 1., 0., 0., 0., 1., 0., 1., 0., 0., 0., 1., 1., 0., 1., 1.,
       0., 0., 0., 1., 1., 0., 0., 1., 1., 0., 0., 0., 1., 1., 0., 0., 0.,
       0., 0.])