In [None]:
from mylibraries import *
X_train = pd.read_csv('X_train_scaled.csv')
X_test = pd.read_csv('X_test_scaled.csv')
y_train = pd.read_csv('y_train.csv')
y_test = pd.read_csv('y_test.csv')

y_train = y_train.values.ravel()
y_test = y_test.values.ravel()

# Získanie názvov stĺpcov z pôvodného X_train
column_names = X_train.columns

In [None]:
from sklearn.ensemble import RandomForestClassifier

# Definovanie modelu RandomForestClassifier
rfc = RandomForestClassifier(max_depth= 20, min_samples_leaf=10, min_samples_split=5, n_estimators=100)

# Trénovanie modelu
rfc.fit(X_train, y_train)

# Predikcia na testovacej množine
y_pred = rfc.predict(X_test)

# Vyhodnotenie modelu
accuracy = accuracy_score(y_test, y_pred)
print(f"Testovacia presnosť: {accuracy:.4f}")

from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, roc_curve, roc_auc_score

# Výpis klasifikačnej správy
print("Klasifikačná správa:")
print(classification_report(y_test, y_pred))

# Matica zámen
conf_matrix = confusion_matrix(y_test, y_pred)
print("Matica zámen:")
print(conf_matrix)

# ROC krivka
y_pred_proba = rfc.predict_proba(X_test)[:, 1]
fpr, tpr, thresholds = roc_curve(y_test, y_pred_proba)
plt.plot(fpr, tpr, label='ROC krivka')
plt.plot([0, 1], [0, 1], linestyle='--', color='gray')
plt.xlabel('Falošne pozitívna miera')
plt.ylabel('Pravdivo pozitívna miera')
plt.title('ROC Krivka')
plt.legend()
plt.show()

from sklearn.metrics import precision_recall_curve, average_precision_score

# Precision-Recall krivka
precision, recall, _ = precision_recall_curve(y_test, y_pred_proba)
average_precision = average_precision_score(y_test, y_pred_proba)

plt.figure()
plt.step(recall, precision, where='post', color='b', alpha=0.2, label=f'Priemerná presnosť = {average_precision:.2f}')
plt.fill_between(recall, precision, step='post', alpha=0.2, color='b')

plt.xlabel('Recall')
plt.ylabel('Precision')
plt.ylim([0.0, 1.05])
plt.xlim([0.0, 1.0])
plt.title('Precision-Recall Krivka')
plt.legend(loc="lower left")
plt.show()


from sklearn.model_selection import learning_curve

# Learning Curve
train_sizes, train_scores, test_scores = learning_curve(rfc, X_train, y_train, cv=5, n_jobs=-1, train_sizes=np.linspace(0.1, 1.0, 10))
train_scores_mean = np.mean(train_scores, axis=1)
test_scores_mean = np.mean(test_scores, axis=1)

plt.figure()
plt.plot(train_sizes, train_scores_mean, 'o-', color='r', label='Trénovacia presnosť')
plt.plot(train_sizes, test_scores_mean, 'o-', color='g', label='Validačná presnosť')

plt.xlabel('Počet trénovacích príkladov')
plt.ylabel('Presnosť')
plt.title('Learning Curve')
plt.legend(loc="best")
plt.show()

# Dôležitosť premenných
importances = rfc.feature_importances_
feature_names = X_train.columns
feature_importances = pd.Series(importances, index=feature_names)

# Zobrazenie dôležitosti premenných
feature_importances.nlargest(10).plot(kind='barh')
plt.title('Dôležitosť premenných')
plt.show()

In [None]:
# Vyberieme najdôležitejšie premenné (napríklad top 10)
top_features = feature_importances.nlargest(10).index
X_train_top = X_train[top_features]
X_test_top = X_test[top_features]

# Definovanie nového modelu RandomForestClassifier na top premenných
rfc_top = RandomForestClassifier(max_depth=20, min_samples_leaf=1, min_samples_split=2, n_estimators=100)
rfc_top.fit(X_train_top, y_train)

# Predikcia na testovacej množine
y_pred_top = rfc_top.predict(X_test_top)

# Vyhodnotenie modelu
accuracy_top = accuracy_score(y_test, y_pred_top)
print(f"Testovacia presnosť s top premennými: {accuracy_top:.4f}")

# Výpis klasifikačnej správy
print("Klasifikačná správa s top premennými:")
print(classification_report(y_test, y_pred_top))

# Matica zámen
conf_matrix_top = confusion_matrix(y_test, y_pred_top)
print("Matica zámen s top premennými:")
print(conf_matrix_top)

# ROC krivka
y_pred_proba_top = rfc_top.predict_proba(X_test_top)[:, 1]
fpr_top, tpr_top, _ = roc_curve(y_test, y_pred_proba_top)
plt.plot(fpr_top, tpr_top, label='ROC krivka s top premennými')
plt.plot([0, 1], [0, 1], linestyle='--', color='gray')
plt.xlabel('Falošne pozitívna miera')
plt.ylabel('Pravdivo pozitívna miera')
plt.title('ROC Krivka s top premennými')
plt.legend()
plt.show()


In [None]:
from sklearn.ensemble import BaggingClassifier

bagging = BaggingClassifier(estimator=RandomForestClassifier(max_depth=20, min_samples_leaf=1, min_samples_split=2, n_estimators=100), n_estimators=10, random_state=42)
bagging.fit(X_train, y_train)
y_pred_bagging = bagging.predict(X_test)

# Vyhodnotenie modelu
accuracy_bagging = accuracy_score(y_test, y_pred_bagging)
print(f"Testovacia presnosť Bagging: {accuracy_bagging:.4f}")

# Výpis klasifikačnej správy
print("Klasifikačná správa Bagging:")
print(classification_report(y_test, y_pred_bagging))

# Matica zámen
conf_matrix_bagging = confusion_matrix(y_test, y_pred_bagging)
print("Matica zámen Bagging:")
print(conf_matrix_bagging)

# ROC krivka
y_pred_proba_bagging = bagging.predict_proba(X_test)[:, 1]
fpr_bagging, tpr_bagging, _ = roc_curve(y_test, y_pred_proba_bagging)
plt.plot(fpr_bagging, tpr_bagging, label='ROC krivka Bagging')
plt.plot([0, 1], [0, 1], linestyle='--', color='gray')
plt.xlabel('Falošne pozitívna miera')
plt.ylabel('Pravdivo pozitívna miera')
plt.title('ROC Krivka Bagging')
plt.legend()
plt.show()