### OVR 모듈 활용


(1) 모듈 로딩 & 데이터 준비


In [1]:
## 모듈 로딩
from sklearn.multiclass import OneVsRestClassifier, OneVsOneClassifier
from sklearn.linear_model import LogisticRegression
import pandas as pd
import numpy as np


In [2]:
# 데이터 준비
data_file = "../data/fish.csv"

fishDF = pd.read_csv(data_file)

fishDF.head(2)


Unnamed: 0,Species,Weight,Length,Diagonal,Height,Width
0,Bream,242.0,25.4,30.0,11.52,4.02
1,Bream,290.0,26.3,31.2,12.48,4.3056


In [3]:
featureDF = fishDF[fishDF.columns[1:]]
targetDF = fishDF[fishDF.columns[0]]

print(f"featureDF : {featureDF.shape}, targetDF : {targetDF.shape}")


featureDF : (159, 5), targetDF : (159,)


In [4]:
from sklearn.model_selection import train_test_split


In [5]:
x_train, x_test, y_train, y_test = train_test_split(
    featureDF, targetDF, stratify=targetDF, random_state=11
)

print(f"[Train Dataset] {x_train.shape}, {y_train.shape}")
print(f"[Test Dataset] {x_test.shape}, {y_test.shape}")


[Train Dataset] (119, 5), (119,)
[Test Dataset] (40, 5), (40,)


(3) 학습 진행 <hr>


In [6]:
model = LogisticRegression(solver="liblinear")


(3-1) OVO 기반 학습 진행


In [7]:
ovoModel = OneVsOneClassifier(model)
ovoModel.fit(x_train, y_train)


In [8]:
# 모델 파라미터 확인
print(f"classes_ : {ovoModel.classes_}")
print(f"feature_names_in_ : {ovoModel.feature_names_in_}")
print(f"estimators_ : {len(ovoModel.estimators_)}개")


classes_ : ['Bream' 'Parkki' 'Perch' 'Pike' 'Roach' 'Smelt' 'Whitefish']
feature_names_in_ : ['Weight' 'Length' 'Diagonal' 'Height' 'Width']
estimators_ : 21개


In [9]:
# 평가
print(f"[Train Score] {ovoModel.score(x_train, y_train)}")
print(f"[Test Score] {ovoModel.score(x_test, y_test)}")


[Train Score] 0.957983193277311
[Test Score] 0.925


In [10]:
# 예측
ovoModel.predict(x_test[:2])


array(['Bream', 'Parkki'], dtype=object)

In [11]:
ovoModel.decision_function(x_test[:2])


array([[ 6.32094951,  5.32872468,  2.32890163,  0.68506766,  3.322758  ,
        -0.33168462,  4.3140798 ],
       [ 4.26849104,  6.32543178,  2.3234672 ,  0.67951149,  5.319289  ,
         0.67104504,  1.85564622]])

(3-2) OVR 기반 학습 진행


In [12]:
ovrModel = OneVsRestClassifier(model)
ovrModel.fit(x_train, y_train)


In [13]:
# 모델 파라미터 확인
print(f"classes_ : {ovrModel.classes_}")
print(f"feature_names_in_ : {ovrModel.feature_names_in_}")
print(f"estimators_ : {len(ovrModel.estimators_)}개")


classes_ : ['Bream' 'Parkki' 'Perch' 'Pike' 'Roach' 'Smelt' 'Whitefish']
feature_names_in_ : ['Weight' 'Length' 'Diagonal' 'Height' 'Width']
estimators_ : 7개


In [14]:
# 평가
print(f"[Train Score] {ovrModel.score(x_train, y_train)}")
print(f"[Test Score] {ovrModel.score(x_test, y_test)}")


[Train Score] 0.9495798319327731
[Test Score] 0.975
