In [None]:
from mylibraries import *
X_train = pd.read_csv('X_train_scaled.csv')
X_test = pd.read_csv('X_test_scaled.csv')
y_train = pd.read_csv('y_train.csv')
y_test = pd.read_csv('y_test.csv')

y_train = y_train.values.ravel()
y_test = y_test.values.ravel()

# Získanie názvov stĺpcov z pôvodného X_train
column_names = X_train.columns

In [None]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, precision_recall_curve, average_precision_score, roc_curve, roc_auc_score
from sklearn.model_selection import learning_curve

# Definícia modelu KNN
knn = KNeighborsClassifier()

# Trénovanie modelu KNN
knn.fit(X_train, y_train)

# Predikcia pravdepodobnosti triedy 1 na testovacej množine
y_prob_knn = knn.predict_proba(X_test)[:, 1]

# Predikcia na testovacej množine
y_pred_knn = knn.predict(X_test)

# Vyhodnotenie modelu KNN
accuracy_knn = accuracy_score(y_test, y_pred_knn)
print(f"Testovacia presnosť modelu KNN: {accuracy_knn:.4f}")

# Klasifikačná správa modelu KNN
print("Klasifikačná správa modelu KNN:")
print(classification_report(y_test, y_pred_knn))

# Matica zámen modelu KNN
conf_matrix_knn = confusion_matrix(y_test, y_pred_knn)
print("Matica zámen modelu KNN:")
print(conf_matrix_knn)

# ROC krivka
fpr_knn, tpr_knn, _ = roc_curve(y_test, y_prob_knn)
plt.plot(fpr_knn, tpr_knn, label='ROC krivka (AUC = {:.2f})'.format(roc_auc_score(y_test, y_prob_knn)))
plt.plot([0, 1], [0, 1], linestyle='--', color='gray')
plt.xlabel('Falošne pozitívna miera')
plt.ylabel('Pravdivo pozitívna miera')
plt.title('ROC Krivka pre KNN')
plt.legend()
plt.show()

# Precision-Recall krivka
precision_knn, recall_knn, _ = precision_recall_curve(y_test, y_prob_knn)
average_precision_knn = average_precision_score(y_test, y_prob_knn)
plt.plot(recall_knn, precision_knn, label='Precision-Recall krivka (priemer = {:.2f})'.format(average_precision_knn))
plt.xlabel('Recall')
plt.ylabel('Precision')
plt.title('Precision-Recall Krivka pre KNN')
plt.legend()
plt.show()

# Learning Curve
train_sizes, train_scores, test_scores = learning_curve(knn, X_train, y_train, cv=5, n_jobs=-1, train_sizes=np.linspace(0.1, 1.0, 10))
train_scores_mean = np.mean(train_scores, axis=1)
test_scores_mean = np.mean(test_scores, axis=1)
plt.figure()
plt.plot(train_sizes, train_scores_mean, 'o-', color='r', label='Trénovacia presnosť')
plt.plot(train_sizes, test_scores_mean, 'o-', color='g', label='Validačná presnosť')
plt.xlabel('Počet trénovacích príkladov')
plt.ylabel('Presnosť')
plt.title('Learning Curve')
plt.legend(loc="best")
plt.show()

# Výpočet permutačnej dôležitosti atribútov
from sklearn.inspection import permutation_importance
perm_importance = permutation_importance(knn, X_test, y_test, n_repeats=30, random_state=42)



In [None]:
# Vyberieme 10 najdôležitejších atribútov
important_features = X.columns[sorted_idx_top_n]

# Získanie dát iba s najdôležitejšími atribútmi
X_train_important = X_train_scaled[:, sorted_idx_top_n]
X_test_important = X_test_scaled[:, sorted_idx_top_n]

# Definícia modelu KNN s počtom susedov (n_neighbors) 5
knn_important = KNeighborsClassifier(metric='manhattan', n_neighbors=9, weights='distance')

# Trénovanie modelu KNN na najdôležitejších atribútoch
knn_important.fit(X_train_important, y_train)

# Predikcia na testovacej množine s najdôležitejšími atribútmi
y_pred_knn_important = knn_important.predict(X_test_important)

# Vyhodnotenie presnosti modelu KNN s najdôležitejšími atribútmi
accuracy_knn_important = accuracy_score(y_test, y_pred_knn_important)
print(f"Testovacia presnosť modelu KNN s najdôležitejšími atribútmi: {accuracy_knn_important:.4f}")
# Klasifikačná správa modelu KNN s najdôležitejšími atribútmi
print("Klasifikačná správa modelu KNN s najdôležitejšími atribútmi:")
print(classification_report(y_test, y_pred_knn_important))

# Matica zámen modelu KNN s najdôležitejšími atribútmi
conf_matrix_knn_important = confusion_matrix(y_test, y_pred_knn_important)
print("Matica zámen modelu KNN s najdôležitejšími atribútmi:")
print(conf_matrix_knn_important)

In [None]:
# Vytvorenie KNN modelu
knn = KNeighborsClassifier(metric='manhattan', n_neighbors=9, weights='distance')

# Vytvorenie Bagging modelu s KNN ako základným modelom
from sklearn.ensemble import BaggingClassifier
bagging_knn = BaggingClassifier(estimator=knn, n_estimators=10, random_state=42)

# Trénovanie Bagging modelu
bagging_knn.fit(X_train, y_train)

# Predikcia na testovacej množine
y_pred_bagging_knn = bagging_knn.predict(X_test)

# Vyhodnotenie modelu
accuracy_bagging_knn = accuracy_score(y_test, y_pred_bagging_knn)
print(f"Presnosť Bagging modelu s KNN: {accuracy_bagging_knn:.4f}")

# Klasifikačná správa
print("Klasifikačná správa:")
print(classification_report(y_test, y_pred_bagging_knn))

# Matica zámen
conf_matrix_bagging_knn = confusion_matrix(y_test, y_pred_bagging_knn)
print("Matica zámen:")
print(conf_matrix_bagging_knn)