In [1]:
import pandas as pd
import os

path = r'D:\elice_python\GAS_5\pytest_machine'
os.chdir(path)

In [2]:
data = pd.read_csv('cancer.csv', header=0)
print(data.head(1))
print(data.shape)

   mean radius  mean texture  mean perimeter  mean area  mean smoothness  \
0         18.0          10.4           123.0     1000.0            0.118   

   mean compactness  mean concavity  mean concave points  mean symmetry  \
0             0.278             0.3                0.147          0.242   

   mean fractal dimension  ...  worst texture  worst perimeter  worst area  \
0                  0.0787  ...           17.3            185.0      2020.0   

   worst smoothness  worst compactness  worst concavity  worst concave points  \
0             0.162              0.666            0.712                 0.265   

   worst symmetry  worst fractal dimension  benign  
0            0.46                    0.119     0.0  

[1 rows x 31 columns]
(569, 31)


In [3]:
X = data.iloc[:,:-1]
y = data.iloc[:,-1]
print(X.shape, y.shape)

(569, 30) (569,)


In [4]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, random_state=42)

In [5]:
from sklearn.ensemble import VotingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neural_network import MLPClassifier

lr = LogisticRegression(solver='liblinear')
knn = KNeighborsClassifier(n_neighbors=8)
mlp = MLPClassifier(max_iter=1000)

In [6]:
hard_model = VotingClassifier(estimators=[('LR',lr),('KNN',knn),('MLP',mlp)], voting='hard')
hard_model.fit(X_train, y_train)

print(hard_model.predict(X_test))
print(hard_model.score(X_train, y_train))
print(hard_model.score(X_test, y_test))

[1. 0. 1. 1. 1. 0. 0. 0. 0. 1. 1. 1. 0. 0. 1. 1. 0. 1. 1. 1. 0. 0. 1. 1.
 1. 1. 1. 1. 1. 0. 1. 1. 1. 1. 1. 1. 1. 1. 0. 1. 0. 1. 0. 1. 1. 1. 0. 1.
 0. 1. 1. 1. 1. 0. 1. 1. 1. 0. 0. 0. 1. 0. 1. 0. 1. 1. 0. 1. 1. 1. 1. 1.
 1. 1. 1. 0. 0. 0. 0. 1. 1. 1. 1. 1. 0. 1. 0. 1. 1. 1. 1. 0. 0. 1. 0. 0.
 0. 1. 0. 1. 0. 0. 0. 1. 1. 0. 1. 1. 0. 1. 0. 1. 1. 1. 1. 0. 1. 1. 1. 0.
 1. 1. 0. 1. 0. 1. 1. 0. 1. 1. 1. 1. 1. 1. 0. 1. 1. 1. 1. 1. 1. 1. 1.]
0.9460093896713615
0.9440559440559441


In [12]:
soft_model = VotingClassifier(estimators=[('LR',lr),('KNN',knn),('MLP',mlp)], voting='soft')
soft_model.fit(X_train, y_train)

print(soft_model.predict(X_test))
print(soft_model.score(X_train, y_train))
print(soft_model.score(X_test, y_test))

[1. 0. 1. 1. 1. 0. 0. 0. 0. 1. 1. 1. 0. 0. 1. 1. 0. 1. 1. 1. 0. 0. 1. 1.
 1. 1. 1. 1. 1. 0. 1. 1. 1. 1. 1. 1. 1. 1. 0. 1. 0. 1. 0. 1. 1. 1. 0. 1.
 0. 1. 1. 1. 1. 0. 1. 1. 1. 0. 0. 0. 1. 0. 1. 0. 1. 1. 0. 1. 1. 1. 1. 1.
 1. 1. 1. 0. 0. 0. 0. 1. 1. 1. 1. 1. 0. 1. 0. 1. 1. 1. 1. 0. 0. 1. 0. 0.
 0. 1. 0. 1. 0. 0. 0. 1. 1. 0. 1. 1. 0. 1. 0. 1. 1. 1. 1. 0. 1. 1. 1. 0.
 1. 1. 0. 1. 0. 1. 1. 0. 1. 1. 1. 1. 1. 1. 0. 1. 1. 1. 1. 1. 1. 1. 1.]
0.9483568075117371
0.9440559440559441


In [10]:
print(soft_model.named_estimators_['LR'].score(X_test,y_test))
print(soft_model.named_estimators_['KNN'].score(X_test,y_test))
print(soft_model.named_estimators_['MLP'].score(X_test,y_test))

0.958041958041958
0.9440559440559441
0.9370629370629371


### 연습문제 1
- Scaler를 사용해 Hard Voting과 Soft Voting 진행하기

In [11]:
from sklearn.preprocessing import MinMaxScaler
mms = MinMaxScaler().fit(X_train)
X_train_mms = mms.transform(X_train)
X_test_mms = mms.transform(X_test)

hard_model2 = VotingClassifier(estimators=[('LR',lr),('KNN',knn),('MLP',mlp)], voting='hard')
hard_model2.fit(X_train_mms, y_train)

soft_model2 = VotingClassifier(estimators=[('LR',lr),('KNN',knn),('MLP',mlp)], voting='soft')
soft_model2.fit(X_train_mms, y_train)

In [14]:
print(hard_model2.predict(X_test_mms))
print(hard_model2.score(X_train_mms, y_train))
print(hard_model2.score(X_test_mms, y_test))

[1. 0. 1. 1. 0. 0. 0. 0. 0. 1. 1. 1. 0. 0. 1. 1. 0. 0. 1. 1. 0. 0. 1. 1.
 1. 1. 1. 1. 1. 0. 1. 1. 1. 1. 1. 1. 1. 1. 0. 1. 0. 1. 0. 1. 1. 1. 0. 1.
 0. 1. 1. 1. 1. 0. 1. 1. 1. 0. 0. 0. 1. 0. 0. 0. 1. 1. 0. 1. 1. 1. 1. 1.
 1. 1. 1. 0. 0. 0. 0. 1. 1. 1. 1. 1. 0. 1. 0. 1. 1. 1. 1. 0. 1. 1. 1. 0.
 0. 1. 0. 1. 0. 0. 0. 1. 1. 0. 1. 1. 0. 1. 0. 1. 1. 0. 1. 0. 1. 1. 1. 0.
 1. 1. 0. 1. 0. 1. 1. 0. 1. 1. 1. 1. 1. 0. 0. 1. 1. 1. 1. 1. 1. 1. 1.]
0.9812206572769953
0.9790209790209791


In [None]:
print(soft_model2.predict(X_test_mms))
print(soft_model2.score(X_train_mms, y_train))
print(soft_model2.score(X_test_mms, y_test))

[1. 0. 1. 1. 0. 0. 0. 0. 0. 1. 1. 1. 0. 0. 1. 1. 0. 0. 1. 1. 0. 0. 1. 1.
 1. 1. 1. 1. 1. 0. 1. 1. 1. 1. 1. 1. 1. 1. 0. 1. 0. 1. 0. 1. 1. 1. 0. 1.
 0. 1. 1. 1. 1. 0. 1. 1. 1. 0. 0. 0. 1. 0. 0. 0. 1. 1. 0. 1. 1. 1. 1. 1.
 1. 1. 1. 0. 0. 0. 0. 1. 1. 1. 1. 1. 0. 1. 0. 1. 1. 1. 1. 0. 1. 1. 1. 0.
 0. 1. 0. 1. 0. 0. 0. 1. 1. 0. 1. 1. 0. 1. 0. 0. 1. 0. 1. 0. 1. 1. 0. 0.
 1. 1. 0. 1. 0. 1. 1. 0. 1. 1. 1. 1. 1. 0. 0. 1. 1. 1. 1. 1. 1. 1. 1.]
0.9788732394366197
0.9790209790209791
