In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')

In [26]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.ensemble import StackingRegressor
from sklearn.ensemble import StackingClassifier

from sklearn.linear_model import LinearRegression, LogisticRegression
from sklearn.tree import DecisionTreeRegressor

from sklearn.metrics import accuracy_score, roc_auc_score, roc_curve, precision_recall_curve

# 1. Регрессия

In [4]:
from sklearn.datasets import load_boston

data = load_boston()
X = data.data
y = data.target
z = data.feature_names
df = pd.DataFrame(np.c_[X,y], columns=list(z)+['target'])
df.head()

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT,target
0,0.00632,18.0,2.31,0.0,0.538,6.575,65.2,4.09,1.0,296.0,15.3,396.9,4.98,24.0
1,0.02731,0.0,7.07,0.0,0.469,6.421,78.9,4.9671,2.0,242.0,17.8,396.9,9.14,21.6
2,0.02729,0.0,7.07,0.0,0.469,7.185,61.1,4.9671,2.0,242.0,17.8,392.83,4.03,34.7
3,0.03237,0.0,2.18,0.0,0.458,6.998,45.8,6.0622,3.0,222.0,18.7,394.63,2.94,33.4
4,0.06905,0.0,2.18,0.0,0.458,7.147,54.2,6.0622,3.0,222.0,18.7,396.9,5.33,36.2


In [24]:
RFR = RandomForestRegressor(random_state=42)
GBR = GradientBoostingRegressor(random_state=42)

estimators = [('gb',  GradientBoostingRegressor(random_state=42)),
              ('rvr', RandomForestRegressor(random_state=42)),
             ]

SR = StackingRegressor(estimators=estimators, final_estimator=LinearRegression())

X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.3, random_state=42)
RFR.fit(X_train, y_train)
GBR.fit(X_train, y_train)
SR.fit(X_train, y_train)

y_pred_rfr = RFR.predict(X_test)
y_pred_gbr = GBR.predict(X_test)
y_pred_sr = SR.predict(X_test)


print(f'r2 for Random Forest is {r2_score(y_test, y_pred_rfr)}')
print(f'r2 for GradientBoosting is {r2_score(y_test, y_pred_gbr)}')
print(f'r2 for StackingRegression is {r2_score(y_test, y_pred_sr)}')


r2 for Random Forest is 0.8708997131330258
r2 for GradientBoosting is 0.8907929176394638
r2 for StackingRegression is 0.8880905875510527


# 2. Классификация

In [25]:
from sklearn.datasets import load_breast_cancer
data = load_breast_cancer(as_frame=True)
X = data['data']
y = data['target']

In [28]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

RFC = RandomForestClassifier(random_state=42)
RFC.fit(X_train, y_train)
GBC = GradientBoostingClassifier(random_state=42)
GBC.fit(X_train, y_train)
y_pred1 = RFC.predict(X_test)
y_pred2 = GBC.predict(X_test)


estimators = [('gb',  GradientBoostingClassifier(random_state=42)),
              ('rvr', RandomForestClassifier(random_state=42)),
             ]

SC = StackingClassifier(estimators=estimators, final_estimator=LogisticRegression())

y_pred3 = SC.fit(X_train, y_train).predict(X_test)

print(f' RandomForest scores {accuracy_score(y_test, y_pred1), roc_auc_score(y_test, RFC.predict(X_test))}')
print(f' Gboos scores {accuracy_score(y_test, y_pred2), roc_auc_score(y_test, GBC.predict(X_test))}')
print(f' Stacking scores {accuracy_score(y_test, y_pred3), roc_auc_score(y_test, SC.predict(X_test))}')

 RandomForest scores (0.9649122807017544, 0.9580740255486406)
 Gboos scores (0.956140350877193, 0.9510317720275139)
 Stacking scores (0.956140350877193, 0.9510317720275139)
