### OVR & OVO 모듈 활용

(1) 모듈 로딩 & 데이터 준비

In [33]:
## 모듈 로딩
from sklearn.multiclass import OneVsOneClassifier, OneVsRestClassifier
from sklearn.linear_model import LogisticRegression
import pandas as pd
import numpy as np

In [34]:
# 데이터 준비
data_file = '../data/fish.csv'

fishDF = pd.read_csv(data_file)
fishDF.head(2)

Unnamed: 0,Species,Weight,Length,Diagonal,Height,Width
0,Bream,242.0,25.4,30.0,11.52,4.02
1,Bream,290.0,26.3,31.2,12.48,4.3056


(2) 데이터셋 준비 <hr>

(2-1) 피쳐/타겟 분리

In [35]:
featureDF = fishDF[fishDF.columns[1:]]
targetDF = fishDF[fishDF.columns[0]]

print(f'featureDF : {featureDF.shape}, targetDF : {targetDF.shape}')

featureDF : (159, 5), targetDF : (159,)


In [36]:
# 타겟의 클래스 수 확인
targetDF.nunique()

7

In [37]:
# 타겟 클래스 별 데이터 수 확인
(targetDF.value_counts()/targetDF.shape[0])*100

Species
Perch        35.220126
Bream        22.012579
Roach        12.578616
Pike         10.691824
Smelt         8.805031
Parkki        6.918239
Whitefish     3.773585
Name: count, dtype: float64

(2-2) 학습용 / 테스트용 데이터셋 분리

In [38]:
from sklearn.model_selection import train_test_split

In [39]:
X_train, X_test, y_train, y_test = train_test_split(featureDF,
                                                   targetDF,
                                                   stratify=targetDF,
                                                   random_state=11)

In [40]:
print(f'[Train Dataset] {X_train.shape}, {y_train.shape}')
print(f'[Test Dataset] {X_test.shape}, {y_test.shape}')

[Train Dataset] (119, 5), (119,)
[Test Dataset] (40, 5), (40,)


(3) 학습 진행

In [41]:
## OVO/OVR에서 사용할 관측지(Estimator) 생성
model=LogisticRegression(solver='liblinear')

(3-1) OVO 기반 학습 진행

In [42]:
ovoModel=OneVsOneClassifier(model)
ovoModel.fit(X_train, y_train)

In [43]:
# 모델 파라미터 확인
print(f'classes_ : {ovoModel.classes_}')
print(f'feature_names_in_ : {ovoModel.feature_names_in_}')
print(f'estimators_ : {len(ovoModel.estimators_)}개')

classes_ : ['Bream' 'Parkki' 'Perch' 'Pike' 'Roach' 'Smelt' 'Whitefish']
feature_names_in_ : ['Weight' 'Length' 'Diagonal' 'Height' 'Width']
estimators_ : 21개


In [44]:
# 평가
print(f'[Train Score] {ovoModel.score(X_train, y_train)}\n[Test Score] {ovoModel.score(X_test, y_test)}')

[Train Score] 0.957983193277311
[Test Score] 0.925


In [45]:
# 예측
ovoModel.predict(X_test[:2])

array(['Bream', 'Parkki'], dtype=object)

In [46]:
ovoModel.decision_function(X_test[:2])

array([[ 6.32094951,  5.32872468,  2.32890163,  0.68506766,  3.322758  ,
        -0.33168462,  4.3140798 ],
       [ 4.26849104,  6.32543178,  2.3234672 ,  0.67951149,  5.319289  ,
         0.67104504,  1.85564622]])

(3-2) OVR 기반 학습 진행

In [47]:
ovrModel=OneVsRestClassifier(model)
ovrModel.fit(X_train, y_train)

In [48]:
# 모델 파라미터 확인
print(f'classes_ : {ovrModel.classes_}')
print(f'feature_names_in_ : {ovrModel.feature_names_in_}')
print(f'estimators_ : {len(ovrModel.estimators_)}개')

classes_ : ['Bream' 'Parkki' 'Perch' 'Pike' 'Roach' 'Smelt' 'Whitefish']
feature_names_in_ : ['Weight' 'Length' 'Diagonal' 'Height' 'Width']
estimators_ : 7개


In [49]:
# 평가
print(f'[Train Score] {ovrModel.score(X_train, y_train)}\n[Test Score] {ovrModel.score(X_test, y_test)}')

[Train Score] 0.9495798319327731
[Test Score] 0.975


In [50]:
# 예측
ovrModel.predict(X_test[:2])

array(['Bream', 'Parkki'], dtype='<U9')

In [51]:
ovrModel.decision_function(X_test[:2])

array([[  1.87053679,   0.13665969,  -7.34472734, -14.76498298,
         -0.86086327, -27.57113603,  -3.90345836],
       [ -1.40152254,   2.39014045,  -2.83220689, -12.23098559,
         -2.55867317, -15.03484394,  -4.32514035]])