In [14]:
import numpy as np
import pandas as pd

# 트레이닝/테스트 데이터 분할
from sklearn.model_selection import train_test_split

# 데이터 표준화
from sklearn.preprocessing import StandardScaler

# 모델
from sklearn.neighbors import KNeighborsClassifier

# 성능 평가 지표
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score

# confusion matrix
from sklearn.metrics import confusion_matrix

# 분류 레포트
from sklearn.metrics import classification_report

In [2]:
df = pd.read_csv('c:/reposit/data/철원이형/iris.csv')
df.head(3)

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,class
0,5.1,3.5,1.4,0.2,0
1,4.9,3.0,1.4,0.2,0
2,4.7,3.2,1.3,0.2,0


In [3]:
df.shape

(150, 5)

In [4]:
df['class'].unique()

array([0, 1, 2], dtype=int64)

In [5]:
features = df.columns[:-1]

# 피처
X = df[features] # 행렬

# 타겟
y = df['class'] # 벡터

In [6]:
X, y

(     sepal_length  sepal_width  petal_length  petal_width
 0             5.1          3.5           1.4          0.2
 1             4.9          3.0           1.4          0.2
 2             4.7          3.2           1.3          0.2
 3             4.6          3.1           1.5          0.2
 4             5.0          3.6           1.4          0.2
 ..            ...          ...           ...          ...
 145           6.7          3.0           5.2          2.3
 146           6.3          2.5           5.0          1.9
 147           6.5          3.0           5.2          2.0
 148           6.2          3.4           5.4          2.3
 149           5.9          3.0           5.1          1.8
 
 [150 rows x 4 columns],
 0      0
 1      0
 2      0
 3      0
 4      0
       ..
 145    2
 146    2
 147    2
 148    2
 149    2
 Name: class, Length: 150, dtype: int64)

In [7]:
X_tr, X_te, y_tr, y_te = train_test_split(X, y, random_state=808)
X_tr

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width
60,5.0,2.0,3.5,1.0
36,5.5,3.5,1.3,0.2
70,5.9,3.2,4.8,1.8
35,5.0,3.2,1.2,0.2
113,5.7,2.5,5.0,2.0
...,...,...,...,...
142,5.8,2.7,5.1,1.9
147,6.5,3.0,5.2,2.0
84,5.4,3.0,4.5,1.5
121,5.6,2.8,4.9,2.0


In [8]:
# 데이터 standardization
std_scale = StandardScaler()
std_scale.fit(X_tr)
X_tr_std = std_scale.transform(X_tr)
X_te_std = std_scale.transform(X_te)

X_tr, X_te

(     sepal_length  sepal_width  petal_length  petal_width
 60            5.0          2.0           3.5          1.0
 36            5.5          3.5           1.3          0.2
 70            5.9          3.2           4.8          1.8
 35            5.0          3.2           1.2          0.2
 113           5.7          2.5           5.0          2.0
 ..            ...          ...           ...          ...
 142           5.8          2.7           5.1          1.9
 147           6.5          3.0           5.2          2.0
 84            5.4          3.0           4.5          1.5
 121           5.6          2.8           4.9          2.0
 38            4.4          3.0           1.3          0.2
 
 [112 rows x 4 columns],
      sepal_length  sepal_width  petal_length  petal_width
 6             4.6          3.4           1.4          0.3
 32            5.2          4.1           1.5          0.1
 136           6.3          3.4           5.6          2.4
 47            4.6          3

In [10]:
# 학습
clf_knn = KNeighborsClassifier(n_neighbors=2)
clf_knn.fit(X_tr_std, y_tr)

In [11]:
# 예측
knn_pred = clf_knn.predict(X_te_std)
knn_pred

array([0, 0, 2, 0, 1, 2, 0, 0, 1, 2, 2, 1, 2, 1, 0, 0, 1, 1, 2, 0, 1, 0,
       2, 1, 2, 2, 2, 2, 0, 0, 1, 1, 1, 1, 1, 2, 1, 0], dtype=int64)

In [13]:
# 정확도
accuracy = accuracy_score(y_te, knn_pred)
print('정확도: ', accuracy)

# 정밀도
precision = precision_score(y_te, knn_pred, average='macro')
print('정밀도: ', precision)

# 리콜
recall = recall_score(y_te, knn_pred, average='macro')
print('리콜: ', recall)

# F1 스코어
f1 = f1_score(y_te, knn_pred, average='macro')
print('f1 스코어:', f1)

정확도:  0.9210526315789473
정밀도:  0.9246031746031745
리콜:  0.923076923076923
f1 스코어: 0.9229629629629629


In [15]:
# confusion matrix 확인
conf_matrix = confusion_matrix(y_te, knn_pred)
print(conf_matrix)

[[12  0  0]
 [ 0 12  1]
 [ 0  2 11]]


In [18]:
# 분류 레포트 확인
class_report = classification_report(y_te, knn_pred)
print(class_report)

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        12
           1       0.86      0.92      0.89        13
           2       0.92      0.85      0.88        13

    accuracy                           0.92        38
   macro avg       0.92      0.92      0.92        38
weighted avg       0.92      0.92      0.92        38

