In [None]:
from mylibraries import *
X_train = pd.read_csv('X_train_scaled.csv')
X_test = pd.read_csv('X_test_scaled.csv')
y_train = pd.read_csv('y_train.csv')
y_test = pd.read_csv('y_test.csv')

y_train = y_train.values.ravel()
y_test = y_test.values.ravel()

# Získanie názvov stĺpcov z pôvodného X_train
column_names = X_train.columns


In [None]:
from sklearn.ensemble import StackingClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from xgboost import XGBClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, roc_curve, roc_auc_score, precision_recall_curve, average_precision_score
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import learning_curve

# Inicializácia jednotlivých základných modelov
random_forest = RandomForestClassifier(n_estimators=100, max_depth=7)
gbc = GradientBoostingClassifier()
xgb = XGBClassifier()


# Definícia zoznamu základných modelov
estimators = [
    ('rf', random_forest),
    ('gbc', gbc),
    ('xgb', xgb),
]

# Inicializácia modelu Stacking Classifier s jednotlivými základnými modelmi
stacking_model = StackingClassifier(estimators=estimators, final_estimator= GaussianNB(), cv=5)

# Trénovanie modelu Stacking Classifier
stacking_model.fit(X_train, y_train)

y_pred_stacking = stacking_model.predict(X_test)

# Klasifikační report
classification_rep = classification_report(y_test, y_pred_stacking)
print("Klasifikační report modelu Stacking Classifier:")
print(classification_rep)

# Matice záměn
confusion_mat = confusion_matrix(y_test, y_pred_stacking)
print("Matice záměn modelu Stacking Classifier:")
print(confusion_mat)

# ROC krivka
fpr_stacking, tpr_stacking, _ = roc_curve(y_test, y_pred_stacking)
plt.plot(fpr_stacking, tpr_stacking, label='ROC krivka (AUC = {:.2f})'.format(roc_auc_score(y_test, y_pred_stacking)))
plt.plot([0, 1], [0, 1], linestyle='--', color='gray')
plt.xlabel('Falošne pozitívna miera')
plt.ylabel('Pravdivo pozitívna miera')
plt.title('ROC Krivka pre Stacking Classifier')
plt.legend()
plt.show()

# Precision-Recall krivka
precision_stacking, recall_stacking, _ = precision_recall_curve(y_test, y_pred_stacking)
average_precision_stacking = average_precision_score(y_test, y_pred_stacking)
plt.plot(recall_stacking, precision_stacking, label='Precision-Recall krivka (priemer = {:.2f})'.format(average_precision_stacking))
plt.xlabel('Recall')
plt.ylabel('Precision')
plt.title('Precision-Recall Krivka pre Stacking Classifier')
plt.legend()
plt.show()

# Learning Curve
train_sizes, train_scores, test_scores = learning_curve(stacking_model, X_train, y_train, cv=5, n_jobs=-1, train_sizes=np.linspace(0.1, 1.0, 10))
train_scores_mean = np.mean(train_scores, axis=1)
test_scores_mean = np.mean(test_scores, axis=1)

plt.figure()
plt.plot(train_sizes, train_scores_mean, 'o-', color='r', label='Trénovacia presnosť')
plt.plot(train_sizes, test_scores_mean, 'o-', color='g', label='Validačná presnosť')

plt.xlabel('Počet trénovacích príkladov')
plt.ylabel('Presnosť')
plt.title('Learning Curve pre Stacking Classifier')
plt.legend(loc="best")
plt.show()