In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.datasets import load_iris

In [2]:
# ----------------------
# 데이터 준비
# ----------------------
iris = load_iris()
x = iris.data[:130, [2, 3]]  # 꽃잎 길이, 너비
y = iris.target[:130]

In [3]:
# ----------------------
# 유클리드 거리 함수
# ----------------------
def euclidean_distance(x1, x2):
    return np.sqrt(np.sum((x1 - x2)**2))

# ----------------------
# KNN 예측 함수
# ----------------------
def knn_predict(x, X_train, y_train, k):
    distances = [euclidean_distance(x, xi) for xi in X_train]
    k_indices = np.argsort(distances)[:k]
    k_labels = y_train[k_indices]
    
    # 다수결
    labels, counts = np.unique(k_labels, return_counts=True)
    return labels[np.argmax(counts)]

# ----------------------
# 정확도 평가 함수
# ----------------------
def compute_accuracy(X_val, y_val, X_train, y_train, k):
    correct = 0
    for x, y in zip(X_val, y_val):
        pred = knn_predict(x, X_train, y_train, k)
        if pred == y:
            correct += 1
    return correct / len(y_val)

In [4]:
# ----------------------
# 훈련/검증 데이터 분할
# ----------------------
np.random.seed(45)
indices = np.random.permutation(len(x))
split = int(0.7 * len(x))

train_idx = indices[:split]
val_idx = indices[split:]

X_train, y_train = x[train_idx], y[train_idx]
X_val, y_val = x[val_idx], y[val_idx]

In [5]:
# ----------------------
# k값 최적화
# ----------------------
k_candidates = range(1, 11)
best_k = None
best_acc = 0.0

print("k값별 정확도:")
for k in k_candidates:
    acc = compute_accuracy(X_val, y_val, X_train, y_train, k)
    print(f"k = {k} -> 정확도: {acc:.2f}")
    if acc > best_acc:
        best_acc = acc
        best_k = k

k값별 정확도:
k = 1 -> 정확도: 0.95
k = 2 -> 정확도: 0.92
k = 3 -> 정확도: 0.92
k = 4 -> 정확도: 0.92
k = 5 -> 정확도: 0.97
k = 6 -> 정확도: 0.92
k = 7 -> 정확도: 0.92
k = 8 -> 정확도: 0.92
k = 9 -> 정확도: 0.95
k = 10 -> 정확도: 0.92


In [6]:
print(f"\n✅ 최적의 k값은: {best_k} (정확도: {best_acc:.2f})")


✅ 최적의 k값은: 5 (정확도: 0.97)


In [None]:
# 131번째와 132번째 데이터 추출(petal length, petal width만 사용)
x_test = iris.data[130:132, [2,3]]
y_test = iris.target[130:132]

#최적화된 k값을 바탕으로 예측
predictions = [knn_predict(x, X_train, y_train, best_k) for x in x_test]

#결과 출력
result_df = pd.DataFrame({
    "Index" : [130, 131],
    "Actual" : y_test,
    "Predicted" : predictions
})


print(result_df)

   Index  Actual  Predicted
0    130       2          2
1    131       2          2
