In [None]:
from sklearn.datasets import load_breast_cancer, make_moons
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.cluster import KMeans, AgglomerativeClustering

In [None]:
#load_breast_cancer 데이터를 로드
data, labels = load_breast_cancer(return_X_y=True)

In [None]:
train_data, test_data, train_labels, test_labels = train_test_split(data, labels, test_size=0.2, random_state=1004)

In [None]:
#KNN 알고리즘을 활용하여 학습시킨다
knn = KNeighborsClassifier()
knn.fit(train_data, train_labels)

In [None]:
#학습된 모델을 활용하여 테스트 데이터에 대한 예측값을 구한다
accuracy = knn.score(test_data, test_labels)
print(f"Accuracy of KNN: {accuracy}")

Accuracy of KNN: 0.9122807017543859


In [None]:
#make_moons
data, labels = make_moons(n_samples=1000, noise=0.1, random_state=1004)

In [None]:
#KMeans 알고리즘을 활용하여 군집화한다.
kmeans = KMeans(n_clusters=2)
kmeans.fit(data)
kmeans_accuracy = sum(kmeans.labels_ == labels) / len(labels)
print(f"Accuracy of KMeans: {kmeans_accuracy}")



Accuracy of KMeans: 0.751


In [None]:
#AgglomerativeClustering 알고리즘을 활용하여 군집화한다.
agg_clustering = AgglomerativeClustering(n_clusters=2)
agg_clustering.fit(data)
agg_clustering_accuracy = sum(agg_clustering.labels_ == labels) / len(labels)
print(f"Accuracy of Agglomerative Clustering: {agg_clustering_accuracy}")

Accuracy of Agglomerative Clustering: 0.714


In [None]:
#--------------confusion matrix를 이용한 모델평가---------------
from sklearn.metrics import confusion_matrix

In [None]:
#학습된 모델을 활용하여 테스트 데이터에 대한 예측값을 구한다.
predicted_labels = knn.predict(test_data)

In [None]:
#confusion matrix 를 생성한다.
cm = confusion_matrix(test_labels, predicted_labels)

In [None]:
#confusion matrix 를 출력한다.
print("Confusion Matrix:")
print(cm)

Confusion Matrix:
[[44  7]
 [ 3 60]]


In [None]:
#accuracy, precision, recall, f1 score 를 계산한다.
tn, fp, fn, tp = cm.ravel()
accuracy = (tp + tn) / (tp + tn + fp + fn)
precision = tp / (tp + fp)
recall = tp / (tp + fn)
f1_score = 2 * (precision * recall) / (precision + recall)

In [None]:
#계산한 지표들을 출력합니다.
print(f"Accuracy: {accuracy}")
print(f"Precision: {precision}")
print(f"Recall: {recall}")
print(f"F1 Score: {f1_score}")

Accuracy: 0.9122807017543859
Precision: 0.8955223880597015
Recall: 0.9523809523809523
F1 Score: 0.923076923076923
