In [26]:
import pandas as pd

from sklearn.model_selection import train_test_split

from sklearn.preprocessing import StandardScaler

from sklearn.linear_model import LogisticRegression
from sklearn import svm
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import VotingClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import BaggingClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.ensemble import StackingClassifier

from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report

In [2]:
df = pd.read_csv('c:/reposit/data/철원이형/wine_data.csv')
df.head(3)

Unnamed: 0,Alcohol,Malic,Ash,Alcalinity,Magesium,Phenols,Flavanoids,Nonflavanoids,Proanthocyanins,Color,Hue,Dilution,Proline,class
0,14.23,1.71,2.43,15.6,127,2.8,3.06,0.28,2.29,5.64,1.04,3.92,1065,0
1,13.2,1.78,2.14,11.2,100,2.65,2.76,0.26,1.28,4.38,1.05,3.4,1050,0
2,13.16,2.36,2.67,18.6,101,2.8,3.24,0.3,2.81,5.68,1.03,3.17,1185,0


In [3]:
features = df.columns[:-1]

X = df[features]
y = df['class']

# 데이터 분할
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=808, stratify=y)

In [4]:
# 데이터 표준화
std_scale = StandardScaler()
std_scale.fit(X_train)
X_train_std = std_scale.transform(X_train)
X_test_std = std_scale.transform(X_test)

In [5]:
clf1 = LogisticRegression()
clf2 = svm.SVC(kernel='linear')
clf3 = GaussianNB()

clf_voting = VotingClassifier(
    estimators=[
            ('lr', clf1),
            ('svm', clf2),
            ('gnb', clf3)
    ],
    voting='hard',
    weights=[1, 1, 1])

clf_voting.fit(X_train_std, y_train)

In [6]:
# 예측
pred_voting = clf_voting.predict(X_test_std)
pred_voting

array([0, 0, 1, 1, 1, 2, 2, 1, 0, 2, 2, 1, 0, 1, 2, 0, 2, 0, 1, 1, 0, 0,
       1, 1, 0, 0, 2, 1, 1, 2, 1, 0, 1, 1, 1, 0, 1, 2, 0, 1, 2, 0, 2, 0,
       2], dtype=int64)

In [8]:
# confusion matrix 확인
conf_matrix = confusion_matrix(y_test, pred_voting)
conf_matrix

array([[15,  0,  0],
       [ 0, 18,  0],
       [ 0,  0, 12]], dtype=int64)

In [9]:
class_report = classification_report(y_test, pred_voting)
print(class_report)

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        15
           1       1.00      1.00      1.00        18
           2       1.00      1.00      1.00        12

    accuracy                           1.00        45
   macro avg       1.00      1.00      1.00        45
weighted avg       1.00      1.00      1.00        45



## 랜덤 포레스트

In [11]:
clf_rf = RandomForestClassifier(max_depth=2, random_state=808)
clf_rf.fit(X_train_std, y_train)

In [12]:
# 예측
pred_rf = clf_rf.predict(X_test_std)
pred_rf

array([0, 0, 1, 2, 1, 2, 2, 1, 0, 2, 2, 1, 0, 1, 2, 0, 2, 0, 1, 1, 0, 0,
       1, 1, 0, 0, 2, 2, 1, 2, 1, 0, 1, 1, 1, 0, 1, 2, 0, 1, 2, 0, 2, 0,
       2], dtype=int64)

In [13]:
# confusion matrix 확인
conf_matrix = confusion_matrix(y_test, pred_rf)
conf_matrix

array([[15,  0,  0],
       [ 0, 16,  2],
       [ 0,  0, 12]], dtype=int64)

In [14]:
# 분류 레포트 확인
class_report = classification_report(y_test, pred_rf)
print(class_report)

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        15
           1       1.00      0.89      0.94        18
           2       0.86      1.00      0.92        12

    accuracy                           0.96        45
   macro avg       0.95      0.96      0.95        45
weighted avg       0.96      0.96      0.96        45



### 튜닝 

In [17]:
clf_rf = RandomForestClassifier(max_depth=3, random_state=808)

# 학습
clf_rf.fit(X_train_std, y_train)

# 예측
pred_rf = clf_rf.predict(X_test_std)

# confusion matrix 확인
conf_matrix = confusion_matrix(y_test, pred_rf)
print(conf_matrix)

# 분류 레포트 확인
class_report = classification_report(y_test, pred_rf)
print(class_report)

[[15  0  0]
 [ 0 17  1]
 [ 0  0 12]]
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        15
           1       1.00      0.94      0.97        18
           2       0.92      1.00      0.96        12

    accuracy                           0.98        45
   macro avg       0.97      0.98      0.98        45
weighted avg       0.98      0.98      0.98        45



In [16]:
clf_rf = RandomForestClassifier(max_depth=4, random_state=808)

# 학습
clf_rf.fit(X_train_std, y_train)

# 예측
pred_rf = clf_rf.predict(X_test_std)

# confusion matrix 확인
conf_matrix = confusion_matrix(y_test, pred_rf)
print(conf_matrix)

# 분류 레포트 확인
class_report = classification_report(y_test, pred_rf)
print(class_report)

[[15  0  0]
 [ 0 17  1]
 [ 0  0 12]]
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        15
           1       1.00      0.94      0.97        18
           2       0.92      1.00      0.96        12

    accuracy                           0.98        45
   macro avg       0.97      0.98      0.98        45
weighted avg       0.98      0.98      0.98        45



In [18]:
clf_rf = RandomForestClassifier(max_depth=5, random_state=808)

# 학습
clf_rf.fit(X_train_std, y_train)

# 예측
pred_rf = clf_rf.predict(X_test_std)

# confusion matrix 확인
conf_matrix = confusion_matrix(y_test, pred_rf)
print(conf_matrix)

# 분류 레포트 확인
class_report = classification_report(y_test, pred_rf)
print(class_report)

[[15  0  0]
 [ 0 17  1]
 [ 0  0 12]]
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        15
           1       1.00      0.94      0.97        18
           2       0.92      1.00      0.96        12

    accuracy                           0.98        45
   macro avg       0.97      0.98      0.98        45
weighted avg       0.98      0.98      0.98        45



## 배깅

In [21]:
clf_bagging = BaggingClassifier(estimator=GaussianNB(),
                                n_estimators=10,
                                random_state=808)

# 학습
clf_bagging.fit(X_train_std, y_train)

# 예측
pred_bagging = clf_bagging.predict(X_test_std)

# confusion matrix 확인
conf_matrix = confusion_matrix(y_test, pred_bagging)
print(conf_matrix)

# 분류 레포트 확인
class_report = classification_report(y_test, pred_bagging)
print(class_report)

[[15  0  0]
 [ 0 17  1]
 [ 0  0 12]]
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        15
           1       1.00      0.94      0.97        18
           2       0.92      1.00      0.96        12

    accuracy                           0.98        45
   macro avg       0.97      0.98      0.98        45
weighted avg       0.98      0.98      0.98        45



## 부스팅 boosting

### 에이다 부스트 ada boost

In [23]:
clf_ada = AdaBoostClassifier(random_state=808)

# 학습
clf_ada.fit(X_train_std, y_train)

# 예측
pred_ada = clf_ada.predict(X_test_std)

# confusion matrix
conf_matrix = confusion_matrix(y_test, pred_ada)
print(conf_matrix)

# 분류 레포트
class_report = classification_report(y_test, pred_ada)
print(class_report)

[[15  0  0]
 [ 0 17  1]
 [ 0  0 12]]
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        15
           1       1.00      0.94      0.97        18
           2       0.92      1.00      0.96        12

    accuracy                           0.98        45
   macro avg       0.97      0.98      0.98        45
weighted avg       0.98      0.98      0.98        45



### 그래디언트 부스팅 gradient boosting

In [25]:
clf_gbt = GradientBoostingClassifier(max_depth=2, learning_rate=.1, random_state=808)

# 학습
clf_gbt.fit(X_train_std, y_train)

# 예측
pred_gboost = clf_gbt.predict(X_test_std)

# confusion matrix
conf_matrix = confusion_matrix(y_test, pred_gboost)
print(conf_matrix)

# 분류 레포트
class_report = classification_report(y_test, pred_gboost)
print(class_report)

[[15  0  0]
 [ 0 17  1]
 [ 0  0 12]]
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        15
           1       1.00      0.94      0.97        18
           2       0.92      1.00      0.96        12

    accuracy                           0.98        45
   macro avg       0.97      0.98      0.98        45
weighted avg       0.98      0.98      0.98        45



## 스태킹 stacking

In [27]:
clf1 = svm.SVC(kernel='linear', random_state=808)
clf2 = GaussianNB()

clf_stkg = StackingClassifier(
    estimators=[
        ('svm', clf1),
        ('gnb', clf2)
    ],
    final_estimator=LogisticRegression())


# 학습
clf_stkg.fit(X_train_std, y_train)

# 예측
pred_stkg = clf_stkg.predict(X_test_std)

# confusion matrix
conf_matrix = confusion_matrix(y_test, pred_stkg)
print(conf_matrix)

# 분류 레포트
class_report = classification_report(y_test, pred_stkg)
print(class_report)

[[15  0  0]
 [ 0 18  0]
 [ 0  0 12]]
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        15
           1       1.00      1.00      1.00        18
           2       1.00      1.00      1.00        12

    accuracy                           1.00        45
   macro avg       1.00      1.00      1.00        45
weighted avg       1.00      1.00      1.00        45

