In [5]:
import pandas as pd

from sklearn.datasets import load_wine
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

In [6]:
# 데이터 준비
wine = load_wine()
wine_data = wine.data

In [19]:
# 데이터 이해하기

# Feature Data 저장하기
wine_df = pd.DataFrame(data=wine_data, columns=wine.feature_names)

# Label Data 지정하기
wine_df['label'] = wine.target
wine_label = wine.target

# Target Names 출력해 보기
print(wine.target_names)

# 데이터 Describe 해보기
print(wine_df.describe())

['class_0' 'class_1' 'class_2']
          alcohol  malic_acid         ash  alcalinity_of_ash   magnesium  \
count  178.000000  178.000000  178.000000         178.000000  178.000000   
mean    13.000618    2.336348    2.366517          19.494944   99.741573   
std      0.811827    1.117146    0.274344           3.339564   14.282484   
min     11.030000    0.740000    1.360000          10.600000   70.000000   
25%     12.362500    1.602500    2.210000          17.200000   88.000000   
50%     13.050000    1.865000    2.360000          19.500000   98.000000   
75%     13.677500    3.082500    2.557500          21.500000  107.000000   
max     14.830000    5.800000    3.230000          30.000000  162.000000   

       total_phenols  flavanoids  nonflavanoid_phenols  proanthocyanins  \
count     178.000000  178.000000            178.000000       178.000000   
mean        2.295112    2.029270              0.361854         1.590899   
std         0.625851    0.998859              0.124453    

In [10]:
# train, test 데이터 분리
X_train, X_test, y_train, y_test = train_test_split(wine_data,
                                                    wine_label,
                                                    test_size=0.2,
                                                    random_state=7)

In [18]:
# 다양한 모델로 학습시켜보기
from sklearn.metrics import accuracy_score
# DecisionTree
from sklearn.tree import DecisionTreeClassifier

decision_tree = DecisionTreeClassifier(random_state=32)
decision_tree.fit(X_train, y_train)

decision_y_pred = decision_tree.predict(X_test)
print('Decision 정확도 : ', accuracy_score(y_test, decision_y_pred))

# Random Forest
from sklearn.ensemble import RandomForestClassifier

random_forest = RandomForestClassifier(random_state=32)
random_forest.fit(X_train, y_train)

random_forest_y_pred = random_forest.predict(X_test)
print("RandomForest 정확도 : ", accuracy_score(y_test, random_forest_y_pred))

# SVM
from sklearn import svm

svm_model = svm.SVC()
svm_model.fit(X_train, y_train)

svm_model_y_pred = svm_model.predict(X_test)
print("SVM 정확도 : ", accuracy_score(y_test, svm_model_y_pred))

# SGD Classifier
from sklearn.linear_model import SGDClassifier

sgd_model = SGDClassifier()
sgd_model.fit(X_train, y_train)

sgd_model_y_pred = sgd_model.predict(X_test)
print("SGD 정확도 : ", accuracy_score(y_test, sgd_model_y_pred))

# Logistic Regression
from sklearn.linear_model import LogisticRegression

logistic_model = LogisticRegression(max_iter=10000)
logistic_model.fit(X_train, y_train)

logistic_model_y_pred = logistic_model.predict(X_test)
print("Logistics 정확도 : ", accuracy_score(y_test, logistic_model_y_pred))

'''
알코올, 사과산, 마그네슘, 총 페놀, 색상 강도, 색조 등 성분 수치와 일치할수록 
와인 판별을 정확하게 할 수 있으니까 정확도 지표가 이 경우에도 중요할 거라 생각함
'''

Decision 정확도 :  0.9444444444444444
RandomForest 정확도 :  1.0
SVM 정확도 :  0.6111111111111112
SGD 정확도 :  0.6111111111111112
Logistics 정확도 :  0.9722222222222222
