In [26]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.ensemble import VotingClassifier
from sklearn.metrics import accuracy_score

In [27]:
iris = load_iris()
iris

{'data': array([[5.1, 3.5, 1.4, 0.2],
        [4.9, 3. , 1.4, 0.2],
        [4.7, 3.2, 1.3, 0.2],
        [4.6, 3.1, 1.5, 0.2],
        [5. , 3.6, 1.4, 0.2],
        [5.4, 3.9, 1.7, 0.4],
        [4.6, 3.4, 1.4, 0.3],
        [5. , 3.4, 1.5, 0.2],
        [4.4, 2.9, 1.4, 0.2],
        [4.9, 3.1, 1.5, 0.1],
        [5.4, 3.7, 1.5, 0.2],
        [4.8, 3.4, 1.6, 0.2],
        [4.8, 3. , 1.4, 0.1],
        [4.3, 3. , 1.1, 0.1],
        [5.8, 4. , 1.2, 0.2],
        [5.7, 4.4, 1.5, 0.4],
        [5.4, 3.9, 1.3, 0.4],
        [5.1, 3.5, 1.4, 0.3],
        [5.7, 3.8, 1.7, 0.3],
        [5.1, 3.8, 1.5, 0.3],
        [5.4, 3.4, 1.7, 0.2],
        [5.1, 3.7, 1.5, 0.4],
        [4.6, 3.6, 1. , 0.2],
        [5.1, 3.3, 1.7, 0.5],
        [4.8, 3.4, 1.9, 0.2],
        [5. , 3. , 1.6, 0.2],
        [5. , 3.4, 1.6, 0.4],
        [5.2, 3.5, 1.5, 0.2],
        [5.2, 3.4, 1.4, 0.2],
        [4.7, 3.2, 1.6, 0.2],
        [4.8, 3.1, 1.6, 0.2],
        [5.4, 3.4, 1.5, 0.4],
        [5.2, 4.1, 1.5, 0.1],
  

In [28]:
X = iris.data
y = iris.target
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.2, random_state =42)


In [29]:
decision = DecisionTreeClassifier()
decision
knn = KNeighborsClassifier()
knn
svm = SVC(probability=True) #소프트 보팅을 위한 확률 설정
svm

# 보팅 분류기 생성(소프트 보팅)

In [30]:
voting = VotingClassifier(estimators = [('decision', decision),
                                        ('knn', knn), ('svm', svm)],
                          voting='soft')
voting

- estimators : 필수 매개변수, 리스트 형태로 각 분류기와 이름을 포함하는 튜플로 구성, 분류기와 이름을 연결하는 VotingClassifier가 각 분류기를 식별할 수 있다
- voting : 선택, 기본값 : 'hard'
- weights : 선택, 기본값 : None / 가중치를 지정할 수 있도록 리스트 또는 배열, 모든 분류기가 동일한 가중치를 적용(소프트 보팅에서 가중치를 지정하여 특정 분류기의 영향력을 높이거나 줄일 때 사용)
- flatten_transform : 선택, 기본값 : True / True 변환된 특성 데이터를 한 배열로 평평하게 만들어서 변환, False 변환된 특성 데이터를 분류기별로 따로 반환
- n_jobs : 선택, 기본값 : None / CPU코어의 개수 지정(병렬처리를 위함)
- verbose : 선택, 기본값 : False / True면 학습과정에서 로그메시지 출력
- voting_classifier.get_params() : 객체의 현재 설정되어있는 모든 매개변수 값을 반환

# 모델 학습(보팅분류기 훈련 데이터 맞춤)

In [31]:
voting.fit(X_train, y_train)

In [32]:
y_pred = voting.predict(X_test)
y_pred

array([1, 0, 2, 1, 1, 0, 1, 2, 1, 1, 2, 0, 0, 0, 0, 1, 2, 1, 1, 2, 0, 2,
       0, 2, 2, 2, 2, 2, 0, 0])

# 정확도 평가

In [33]:
acc = accuracy_score(y_test,y_pred)
print(f"소프트 보팅 분류기의 정확도 : {acc}")

소프트 보팅 분류기의 정확도 : 1.0


# 2. 소프트 보팅(데이터셋 brest_cancer)
- 모델 : 로지스틱 회귀, KNN


In [34]:
from sklearn.ensemble import VotingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_breast_cancer
from sklearn.metrics import accuracy_score

In [35]:
cancer = load_breast_cancer()


In [36]:
lr = LogisticRegression()
lr
knn = KNeighborsClassifier(9)
knn

In [37]:
voting = VotingClassifier(estimators = [('lr',lr),('KNN', knn)], voting='soft')
voting

In [43]:
X_train, X_test, y_train, y_test = train_test_split(cancer['data'], cancer['target'], test_size=0.2)
print(X_train.shape, X_test.shape)
print(X_train)
print(X_test)
print(y_train)
print(y_test)

(455, 30) (114, 30)
[[1.185e+01 1.746e+01 7.554e+01 ... 9.140e-02 3.101e-01 7.007e-02]
 [1.205e+01 2.272e+01 7.875e+01 ... 1.092e-01 2.191e-01 9.349e-02]
 [8.878e+00 1.549e+01 5.674e+01 ... 4.762e-02 2.434e-01 7.431e-02]
 ...
 [1.327e+01 1.702e+01 8.455e+01 ... 9.678e-02 2.506e-01 7.623e-02]
 [2.018e+01 2.397e+01 1.437e+02 ... 2.508e-01 5.440e-01 9.964e-02]
 [1.979e+01 2.512e+01 1.304e+02 ... 1.732e-01 3.305e-01 8.465e-02]]
[[1.506e+01 1.983e+01 1.003e+02 ... 2.115e-01 2.834e-01 8.234e-02]
 [9.755e+00 2.820e+01 6.168e+01 ... 4.866e-02 2.321e-01 7.211e-02]
 [1.206e+01 1.890e+01 7.666e+01 ... 5.093e-02 2.880e-01 8.083e-02]
 ...
 [1.845e+01 2.191e+01 1.202e+02 ... 1.379e-01 3.109e-01 7.610e-02]
 [9.606e+00 1.684e+01 6.164e+01 ... 8.120e-02 2.982e-01 9.825e-02]
 [1.441e+01 1.973e+01 9.603e+01 ... 1.021e-01 2.272e-01 8.799e-02]]
[1 1 1 0 0 1 1 1 1 1 0 0 1 0 1 0 0 0 1 1 1 0 1 1 0 0 1 1 1 1 1 0 1 1 1 0 1
 1 0 1 0 1 1 0 0 0 1 1 1 0 1 1 1 1 0 1 0 1 0 1 1 1 0 0 1 0 0 1 0 0 0 1 0 1
 1 0 0 0 0 1 1

In [42]:
lr.fit(X_train, y_train)
lr_pred = lr.predict(X_test)
lr_acc = accuracy_score(y_test, lr_pred)
print(f"로지스틱 회귀 정확도 :{lr_acc:.3f}")
print(y_test)

로지스틱 회귀 정확도 :0.930
[1 0 0 0 1 1 1 0 0 0 1 1 1 0 1 1 1 1 1 1 1 0 1 1 1 1 0 1 1 0 0 0 0 0 1 1 1
 0 1 1 1 0 1 1 0 1 0 1 0 1 1 0 1 1 0 1 1 1 1 0 0 0 1 1 1 1 0 1 1 1 0 1 0 0
 0 1 0 0 1 1 1 0 0 0 1 1 1 1 0 1 1 0 0 1 1 1 1 0 1 0 1 1 0 1 0 0 0 1 0 1 1
 0 1 1]


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [40]:
knn.fit(X_train, y_train)
knn_pred = knn.predict(X_test)
knn_acc = accuracy_score(y_test, lr_pred)
print(f"knn 정확도 :{knn_acc:.3f}")

knn 정확도 :0.930


In [41]:
voting.fit(X_train, y_train)
voting_pred = voting.predict(X_test)
voting_acc = accuracy_score(y_test, lr_pred)
print(f"voting 정확도 :{voting_acc:.3f}")

voting 정확도 :0.930


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
