## OvR, OvO 모듈 활용하기

1. module loading & data

In [69]:
#module
from sklearn.multiclass import OneVsOneClassifier, OneVsRestClassifier
from sklearn.linear_model import LogisticRegression
import pandas as pd
import numpy as np

In [70]:
#데이터 준비
data = '../data/fish.csv'
fish = pd.read_csv(data)

fish.head(2)

Unnamed: 0,Species,Weight,Length,Diagonal,Height,Width
0,Bream,242.0,25.4,30.0,11.52,4.02
1,Bream,290.0,26.3,31.2,12.48,4.3056


2. 데이터셋 준비

In [71]:
# feature / target 분리
feature = fish[fish.columns[1:]]
target = fish[fish.columns[0]]

print(f'feature: {feature.shape}')
print(f'target: {target.shape}')

feature: (159, 5)
target: (159,)


In [72]:
from sklearn.preprocessing import LabelEncoder

encoder = LabelEncoder()
encoder.fit(target)

In [73]:
target = encoder.transform(target)
print(target)

[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 4 4
 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 6 6 6 6 6 6 1 1 1 1 1 1 1 1 1 1 1 2 2
 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 5 5 5
 5 5 5 5 5 5 5 5 5 5 5]


3. 학습 / 테스트용 데이터 준비


In [74]:
from sklearn.model_selection import train_test_split

xtrain, xtest, ytrain, ytest = train_test_split(feature, target, stratify=target, random_state=11)
print(f'[train dataset] {xtrain.shape}, {ytrain.shape}')
print(f'[test dataset] {xtest.shape}, {ytest.shape}')

[train dataset] (119, 5), (119,)
[test dataset] (40, 5), (40,)


4. 학습 진행

In [75]:
## OvO, OvR에서 사용할 관측지(Estimator) 생성

In [76]:
md = LogisticRegression(solver='liblinear')
md.fit(xtrain, ytrain)

3 - 1. OvO 기반 학습 진헹

In [77]:
ovoMd = OneVsOneClassifier(md)
ovoMd.fit(xtrain, ytrain)

In [78]:
# 모델 파라미터 확인
print(f'classes : {ovoMd.classes_}')
print(f'feature_names_in : {ovoMd.feature_names_in_}')
print(f'feature_names_in ;  {len(ovoMd.estimators_)}개')

classes : [0 1 2 3 4 5 6]
feature_names_in : ['Weight' 'Length' 'Diagonal' 'Height' 'Width']
feature_names_in ;  21개


In [79]:
# 평가
print(f'[Train score] : {ovoMd.score(xtrain, ytrain)}\n [Test score] : {ovoMd.score(xtest, ytest)}')

[Train score] : 0.957983193277311
 [Test score] : 0.925


In [80]:
#예측
ovoMd.predict(xtest[:2])

array([0, 1])

In [81]:
ovoMd.decision_function(xtest[:2])

array([[ 6.32094951,  5.32872468,  2.32890163,  0.68506766,  3.322758  ,
        -0.33168462,  4.3140798 ],
       [ 4.26849104,  6.32543178,  2.3234672 ,  0.67951149,  5.319289  ,
         0.67104504,  1.85564622]])

3 - 2. OvR 기반 학습 진행

In [82]:
ovrMd = OneVsRestClassifier(md)

In [83]:
ovrMd.fit(xtrain,ytrain )

In [84]:
print(f'[train] {ovrMd.score(xtrain, ytrain)} \n [test] {ovrMd.score(xtest, ytest)}')

[train] 0.9495798319327731 
 [test] 0.975


In [85]:
# 모델 파라미터 확인
print(f'classes : {ovrMd.classes_}')
print(f'feature_names_in : {ovrMd.feature_names_in_}')
print(f'feature_names_in ;  {len(ovrMd.estimators_)}개')

classes : [0 1 2 3 4 5 6]
feature_names_in : ['Weight' 'Length' 'Diagonal' 'Height' 'Width']
feature_names_in ;  7개


6. 모델 성능 평가<hr>
- 정확도
- 정밀도
- 재현율
- F1-score
- Confunsion Matrics
- Classification Report

In [86]:
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
from sklearn.metrics import confusion_matrix, classification_report

In [87]:
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(feature, target, test_size=0.2, random_state=42) 

In [90]:
print(classification_report(ytest, md.predict(xtest), zero_division=0))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00         9
           1       1.00      1.00      1.00         3
           2       0.93      1.00      0.97        14
           3       1.00      1.00      1.00         4
           4       1.00      1.00      1.00         5
           5       1.00      1.00      1.00         4
           6       0.00      0.00      0.00         1

    accuracy                           0.97        40
   macro avg       0.85      0.86      0.85        40
weighted avg       0.95      0.97      0.96        40


In [93]:
f1_score(y_test, md.predict(x_test), average= 'weighted')

0.8660714285714286

In [95]:
recall_score(y_test, md.predict(x_test), average= 'macro')

0.8571428571428571

In [97]:
confusion_matrix(y_test, md.predict(x_test))

array([[10,  0,  0,  0,  0,  0,  0],
       [ 0,  1,  0,  0,  0,  0,  0],
       [ 0,  0,  9,  0,  0,  0,  0],
       [ 0,  0,  0,  3,  0,  0,  0],
       [ 0,  0,  0,  0,  1,  0,  0],
       [ 0,  0,  0,  0,  0,  5,  0],
       [ 0,  0,  3,  0,  0,  0,  0]], dtype=int64)