In [1]:
import pandas as pd

df = pd.read_csv('../data/wine_data.csv') 

In [2]:
df.columns

Index(['Alcohol', 'Malic', 'Ash', 'Alcalinity', 'Magesium', 'Phenols',
       'Flavanoids', 'Nonflavanoids', 'Proanthocyanins', 'Color', 'Hue',
       'Dilution', 'Proline', 'class'],
      dtype='object')

In [3]:
features = ['Alcohol', 'Malic', 'Ash', 'Alcalinity', 'Magesium', 'Phenols',
       'Flavanoids', 'Nonflavanoids', 'Proanthocyanins', 'Color', 'Hue',
       'Dilution', 'Proline']

X = df[features]
y = df['class']

In [4]:
# 트레이닝/테스트 데이터 분할
from sklearn.model_selection import train_test_split
X_tn, X_te, y_tn, y_te = train_test_split(X, y, random_state=0)

In [6]:
# 데이터 표준화
from sklearn.preprocessing import StandardScaler
std_scale = StandardScaler()
std_scale.fit(X_tn)
X_tn_std = std_scale.transform(X_tn)
X_te_std = std_scale.transform(X_te)

In [8]:
# 나이브 베이즈 알고리즘
from sklearn.naive_bayes import GaussianNB
clf_gnb = GaussianNB()
clf_gnb.fit(X_tn_std, y_tn)

In [10]:
# 예측
pred_gnb = clf_gnb.predict(X_te_std)
print(pred_gnb)

[0 2 1 0 1 1 0 2 1 1 2 2 0 0 2 1 0 0 2 0 0 0 0 1 1 1 1 1 1 2 0 0 1 0 0 0 2
 1 1 2 0 0 1 1 1]


In [None]:
# model.exog_names
# variance_inflation_factor(model.exog, 1)

vif = pd.DataFrame()
vif['VIF Factor'] = [variance_inflation_factor(train_x.values,i) for i in range(train_x.shape[1])]
vif['feature'] = feature
vif = vif.sort_values("VIF Factor").reset_index(drop=True)
vif

In [11]:
# 정확도 
from sklearn.metrics import accuracy_score
accuracy = accuracy_score(y_te, pred_gnb)
print("accuracy:", accuracy)

# 정밀도 
from sklearn.metrics import precision_score
precision = precision_score(y_te, pred_gnb, average='macro')
print("precision:", precision)

# 리콜 
from sklearn.metrics import recall_score
recall = recall_score(y_te, pred_gnb, average='macro')
print("recall:", recall)

# F1 스코어
from sklearn.metrics import f1_score
f1 = f1_score(y_te, pred_gnb, average='macro')
print("f1:", f1)


accuracy: 0.9333333333333333
precision: 0.9259259259259259
recall: 0.9523809523809524
f1: 0.9351432880844645


In [12]:
# confusion matrix 확인
from sklearn.metrics import confusion_matrix
conf_matrix = confusion_matrix(y_te, pred_gnb)
print(conf_matrix)

[[16  0  0]
 [ 2 18  1]
 [ 0  0  8]]


In [14]:
# 분류 레포트 확인
from sklearn.metrics import classification_report
class_report = classification_report(y_te, pred_gnb)
print(class_report)


              precision    recall  f1-score   support

           0       0.89      1.00      0.94        16
           1       1.00      0.86      0.92        21
           2       0.89      1.00      0.94         8

    accuracy                           0.93        45
   macro avg       0.93      0.95      0.94        45
weighted avg       0.94      0.93      0.93        45



의사결정나무

In [15]:
# 의사결정나무 실습
from sklearn import tree
clf_tree = tree.DecisionTreeClassifier(random_state=0)
clf_tree.fit(X_tn_std, y_tn)

In [16]:
# 예측
pred_tree = clf_tree.predict(X_te_std)
print(pred_tree)

[0 2 1 0 1 1 0 2 1 1 2 2 0 1 2 1 0 0 2 0 1 0 1 1 1 1 1 1 1 2 0 0 1 0 0 0 2
 1 1 2 1 0 1 1 1]


In [17]:
# 정확도 
from sklearn.metrics import accuracy_score
accuracy_t = accuracy_score(y_te, pred_tree)
print("accuracy:", accuracy_t)

# 정밀도 
from sklearn.metrics import precision_score
precision_t = precision_score(y_te, pred_tree, average='macro')
print("precision:", precision_t)

# 리콜 
from sklearn.metrics import recall_score
recall_t = recall_score(y_te, pred_tree, average='macro')
print("recall:", recall_t)

# F1 스코어
from sklearn.metrics import f1_score
f1_t = f1_score(y_te, pred_tree, average='macro')
print("f1:", f1_t)


accuracy: 0.9333333333333333
precision: 0.9326599326599326
recall: 0.9424603174603176
f1: 0.9349141206870346


In [18]:
# confusion matrix 실습
from sklearn.metrics import confusion_matrix
conf_matrix_t = confusion_matrix(y_te, pred_tree)
print(conf_matrix_t)

[[14  2  0]
 [ 0 20  1]
 [ 0  0  8]]


In [20]:
# 분류 레포트 확인
from sklearn.metrics import classification_report
class_report_t = classification_report(y_te, pred_tree)
print(class_report_t)

              precision    recall  f1-score   support

           0       1.00      0.88      0.93        16
           1       0.91      0.95      0.93        21
           2       0.89      1.00      0.94         8

    accuracy                           0.93        45
   macro avg       0.93      0.94      0.93        45
weighted avg       0.94      0.93      0.93        45

