In [2]:
import pandas as pd

df = pd.read_csv("./data/wine_data.csv")
df.columns

Index(['Alcohol', 'Malic', 'Ash', 'Alcalinity', 'Magesium', 'Phenols',
       'Flavanoids', 'Nonflavanoids', 'Proanthocyanins', 'Color', 'Hue',
       'Dilution', 'Proline', 'class'],
      dtype='object')

In [3]:
features = ['Alcohol', 'Malic', 'Ash', 'Alcalinity', 'Magesium', 'Phenols',
       'Flavanoids', 'Nonflavanoids', 'Proanthocyanins', 'Color', 'Hue',
       'Dilution', 'Proline']

X = df[features]
y = df['class']

In [4]:
from sklearn.model_selection import  train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state= 0)

from sklearn.preprocessing import StandardScaler
std_scale = StandardScaler()
std_scale.fit(X_train)

X_train_std = std_scale.transform(X_train)
X_test_std = std_scale.transform(X_test)


In [8]:
from sklearn import svm

clf_svm_lr = svm.SVC(kernel='linear')
clf_svm_lr.fit(X_train_std,y_train)

pred_svm = clf_svm_lr.predict(X_test_std)
print(pred_svm)

[0 2 1 0 1 1 0 2 1 1 2 2 0 1 2 1 0 0 1 0 1 0 0 1 1 1 1 1 1 2 0 0 1 0 0 0 2
 1 1 2 0 0 1 1 1]


In [7]:
from sklearn.metrics import accuracy_score
print(accuracy_score(y_test, pred_svm))

from sklearn.metrics import confusion_matrix
print(confusion_matrix(y_test, pred_svm))

from sklearn.metrics import classification_report
print(classification_report(y_test, pred_svm))

1.0
[[16  0  0]
 [ 0 21  0]
 [ 0  0  8]]
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        16
           1       1.00      1.00      1.00        21
           2       1.00      1.00      1.00         8

    accuracy                           1.00        45
   macro avg       1.00      1.00      1.00        45
weighted avg       1.00      1.00      1.00        45



In [10]:
## 보팅 실습
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import VotingClassifier

clf1 = LogisticRegression()
clf2 = GaussianNB()
clf3 = svm.SVC(kernel='linear')

clf_voting = VotingClassifier(
    estimators = [
        ('lr',clf1),
        ('gnb',clf2),
        ('svm',clf3)
    ],
    voting='hard',
    weights=[1,1,1])

clf_voting.fit(X_train_std, y_train)

pred_voting = clf_voting.predict(X_test_std)
print(pred_voting)


[0 2 1 0 1 1 0 2 1 1 2 2 0 1 2 1 0 0 1 0 1 0 0 1 1 1 1 1 1 2 0 0 1 0 0 0 2
 1 1 2 0 0 1 1 1]


In [11]:
from sklearn.ensemble import RandomForestClassifier
clf_rf = RandomForestClassifier(max_depth=2,
                                random_state=0)
clf_rf.fit(X_train_std,y_train)

pred_rf = clf_rf.predict(X_test_std)
print(pred_rf)

[0 2 1 0 1 1 0 2 1 1 2 2 0 1 2 1 0 0 2 0 0 0 0 1 1 1 1 1 1 2 0 0 1 0 0 0 2
 1 1 2 0 0 1 1 1]


In [12]:
print(classification_report(y_test,pred_rf))

              precision    recall  f1-score   support

           0       0.94      1.00      0.97        16
           1       1.00      0.90      0.95        21
           2       0.89      1.00      0.94         8

    accuracy                           0.96        45
   macro avg       0.94      0.97      0.95        45
weighted avg       0.96      0.96      0.96        45



In [20]:
from sklearn.ensemble import BaggingClassifier

clf_bagging = BaggingClassifier(estimator=GaussianNB(),
                                n_estimators= 2, ##횟수
                                random_state=0)
clf_bagging.fit(X_train_std, y_train)
pred_bagging = clf_bagging.predict(X_test_std)

In [18]:
print(classification_report(y_test,pred_bagging))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        16
           1       1.00      1.00      1.00        21
           2       1.00      1.00      1.00         8

    accuracy                           1.00        45
   macro avg       1.00      1.00      1.00        45
weighted avg       1.00      1.00      1.00        45



In [21]:
## ada boost

from sklearn.ensemble import AdaBoostClassifier
clf_ada = AdaBoostClassifier(random_state=0)
clf_ada.fit(X_train_std,y_train)

pred_ada = clf_ada.predict(X_test_std)
print(pred_ada)

[0 2 0 0 1 0 0 2 1 1 2 2 0 0 2 1 0 0 1 0 0 0 0 1 1 1 1 1 1 2 0 0 1 0 0 0 2
 1 0 2 1 0 1 1 1]


In [22]:
print(confusion_matrix(y_test, pred_ada))

print(classification_report(y_test, pred_ada))

[[15  1  0]
 [ 5 16  0]
 [ 0  0  8]]
              precision    recall  f1-score   support

           0       0.75      0.94      0.83        16
           1       0.94      0.76      0.84        21
           2       1.00      1.00      1.00         8

    accuracy                           0.87        45
   macro avg       0.90      0.90      0.89        45
weighted avg       0.88      0.87      0.87        45



In [23]:
#GradientBoostingClassifier
from sklearn.ensemble import GradientBoostingClassifier
clf_gbt = GradientBoostingClassifier(max_depth = 2,
                                     learning_rate= 0.1,
                                     random_state= 0) 
#max depth는 데이터에 따라 tune해야하는 정도가 다를 수 있다.

clf_gbt.fit(X_train_std,y_train)

pred_gboost = clf_gbt.predict(X_test_std)
print(pred_gboost)

[0 2 1 0 1 0 0 2 1 1 2 2 0 1 2 1 0 0 2 0 1 0 0 1 1 1 1 1 1 2 0 0 1 0 0 0 2
 1 1 2 0 0 1 1 1]


In [24]:
print(confusion_matrix(y_test,pred_gboost))
print(classification_report(y_test,pred_gboost))

[[16  0  0]
 [ 1 19  1]
 [ 0  0  8]]
              precision    recall  f1-score   support

           0       0.94      1.00      0.97        16
           1       1.00      0.90      0.95        21
           2       0.89      1.00      0.94         8

    accuracy                           0.96        45
   macro avg       0.94      0.97      0.95        45
weighted avg       0.96      0.96      0.96        45



In [28]:
from sklearn.ensemble import StackingClassifier

clf1 = svm.SVC(kernel='linear',random_state=0)
clf2 = GaussianNB()

clf_stck = StackingClassifier(
    estimators= [
        ('svm',clf1),
        ('gnb',clf2)
    ],
    final_estimator=LogisticRegression(),
    stack_method='predict'
)

clf_stck.fit(X_train_std,y_train)
pred_stck = clf_stck.predict(X_test_std)
print(pred_stck)

print(confusion_matrix(y_test,pred_stck))
print(classification_report(y_test,pred_stck))

[0 2 1 0 1 1 0 2 1 1 2 2 0 0 2 1 0 0 2 0 0 0 0 1 1 1 1 1 1 2 0 0 1 0 0 0 2
 1 1 2 0 0 1 1 1]
[[16  0  0]
 [ 2 18  1]
 [ 0  0  8]]
              precision    recall  f1-score   support

           0       0.89      1.00      0.94        16
           1       1.00      0.86      0.92        21
           2       0.89      1.00      0.94         8

    accuracy                           0.93        45
   macro avg       0.93      0.95      0.94        45
weighted avg       0.94      0.93      0.93        45

