In [23]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score

def species_to_int(species: str) -> int:
    if species == "Iris-versicolor": return 0
    elif species == "Iris-setosa": return 1
    elif species == "Iris-virginica": return 2

def species_to_string(species: int) -> str:
    if species == 0: return "Iris-versicolor"
    elif species == 1: return "Iris-setosa"
    elif species == 2: return "Iris-virginica"

data: pd.DataFrame = pd.read_csv("iris_data.csv", names = ["sepal length", "sepal width", "petal length", "petal width", "species"])

print("DATASET SAMPLE:")
print(data.sample(frac = 0.04).to_string())

data["species"]: pd.Series = data["species"].apply(species_to_int)



DATASET SAMPLE:
     sepal length  sepal width  petal length  petal width          species
109           7.2          3.6           6.1          2.5   Iris-virginica
42            4.4          3.2           1.3          0.2      Iris-setosa
81            5.5          2.4           3.7          1.0  Iris-versicolor
94            5.6          2.7           4.2          1.3  Iris-versicolor
105           7.6          3.0           6.6          2.1   Iris-virginica
40            5.0          3.5           1.3          0.3      Iris-setosa


In [10]:
# 데이터들 분류 & test
train: pd.DataFrame
test: pd.DataFrame
train, test = train_test_split(data, test_size = 0.3) # test_size
print("test set : ", len(train), "train + ", len(test), "test")

105 train +  45 test


In [24]:
# test한 정보들 저장
X: pd.DataFrame = train.iloc[:, 0:-1]
y: pd.Series = train.iloc[:, -1]
y = pd.to_numeric(y) # int형으로 변경

# test info
y_test: pd.Series = pd.to_numeric(test.iloc[:, -1])

# knn 알고리즘을 적용
KNN = KNeighborsClassifier()
KNN.fit(X, y) # train
predictions = KNN.predict(test.iloc[:, :-1]) # KNN을 이용한 예측

# 결과 출력
print("\nKNN 예측 결과:")
comparison = pd.DataFrame({"test_data": y_test, "prediction": predictions}) # 예측과 실제 결과를 비교
comparison["test_data"] = comparison["test_data"].apply(species_to_string)
comparison["prediction"] = comparison["prediction"].apply(species_to_string)
comparison["correct"] = comparison["test_data"] == comparison["prediction"]
print(comparison.to_string())

print("\nACCURACY SCORE: ", accuracy_score(y_test, predictions))


KNN 예측 결과:
           test_data       prediction  correct
6        Iris-setosa      Iris-setosa     True
32       Iris-setosa      Iris-setosa     True
116   Iris-virginica   Iris-virginica     True
70   Iris-versicolor   Iris-virginica    False
71   Iris-versicolor  Iris-versicolor     True
73   Iris-versicolor  Iris-versicolor     True
148   Iris-virginica   Iris-virginica     True
51   Iris-versicolor  Iris-versicolor     True
33       Iris-setosa      Iris-setosa     True
78   Iris-versicolor  Iris-versicolor     True
91   Iris-versicolor  Iris-versicolor     True
21       Iris-setosa      Iris-setosa     True
140   Iris-virginica   Iris-virginica     True
123   Iris-virginica   Iris-virginica     True
55   Iris-versicolor  Iris-versicolor     True
40       Iris-setosa      Iris-setosa     True
134   Iris-virginica   Iris-virginica     True
96   Iris-versicolor  Iris-versicolor     True
27       Iris-setosa      Iris-setosa     True
15       Iris-setosa      Iris-setosa     True
5