# 와인의 맛 다중분류
## 3, 9등급은 제외하기

In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
seed=2021
np.random.seed(seed)
tf.random.set_seed(seed)

In [33]:
wine = pd.read_csv('dataset/wine.csv', header=None)
wine.head(3)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12
0,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,5,1
1,7.8,0.88,0.0,2.6,0.098,25.0,67.0,0.9968,3.2,0.68,9.8,5,1
2,7.8,0.76,0.04,2.3,0.092,15.0,54.0,0.997,3.26,0.65,9.8,5,1


In [24]:
# 결측치 확인 
wine.isnull().sum().sum()

0

In [34]:
# 와인 등급별 분포 알아보기
wine[11].value_counts()

6    2836
5    2138
7    1079
4     216
8     193
3      30
9       5
Name: 11, dtype: int64

In [35]:
# 3, 9등급 와인은 제외하기
wine = wine[wine[11]!=3]
wine = wine[wine[11]!=9]

In [36]:
# 제거되었는지 확인
wine[11].value_counts()

6    2836
5    2138
7    1079
4     216
8     193
Name: 11, dtype: int64

In [54]:
# 컬럼 순서바꾸기
wine_t = wine[[0,1,2,3,4,5,6,7,8,9,10,12,11]]
wine_t.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,12,11
0,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,1,5
1,7.8,0.88,0.0,2.6,0.098,25.0,67.0,0.9968,3.2,0.68,9.8,1,5
2,7.8,0.76,0.04,2.3,0.092,15.0,54.0,0.997,3.26,0.65,9.8,1,5
3,11.2,0.28,0.56,1.9,0.075,17.0,60.0,0.998,3.16,0.58,9.8,1,6
4,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,1,5


In [52]:
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(wine_t.iloc[:,:-1].values)

In [69]:
# LabelEncoder
from sklearn.preprocessing import LabelEncoder
encoder = LabelEncoder()
y = encoder.fit_transform(wine_t.iloc[:,-1].values)

In [70]:
# One-hot encoding
from tensorflow.keras.utils import to_categorical
y_onehot = to_categorical(y)
y_onehot[:3]

array([[0., 1., 0., 0., 0.],
       [0., 1., 0., 0., 0.],
       [0., 1., 0., 0., 0.]], dtype=float32)

In [78]:
# train/test set 분리
from sklearn.model_selection import train_test_split
X_train, X_test, Y_train, Y_test = train_test_split(
    X_scaled, y_onehot, stratify= y_onehot, random_state=seed
)
X_train.shape, X_test.shape, Y_train.shape, Y_test.shape

((4846, 12), (1616, 12), (4846, 5), (1616, 5))

## 모델 정의/설정

In [58]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

In [79]:
model = Sequential([
    Dense(128, input_dim=12, activation='softmax'),               
    Dense(80, activation='softmax'),
    Dense(48, activation='softmax'),
    Dense(30, activation='softmax'),
    Dense(12, activation='softmax'),
    Dense(5, activation='sigmoid')
])
model.summary()

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_8 (Dense)              (None, 128)               1664      
_________________________________________________________________
dense_9 (Dense)              (None, 80)                10320     
_________________________________________________________________
dense_10 (Dense)             (None, 48)                3888      
_________________________________________________________________
dense_11 (Dense)             (None, 30)                1470      
_________________________________________________________________
dense_12 (Dense)             (None, 12)                372       
_________________________________________________________________
dense_13 (Dense)             (None, 5)                 65        
Total params: 17,779
Trainable params: 17,779
Non-trainable params: 0
__________________________________________________

In [80]:
model.compile(
    optimizer='adam', loss='categorical_crossentropy', metrics = ['accuracy'],
     )

## 모델 저장관련 설정

In [62]:
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping

In [81]:
modelpath = 'model/best_wine2.h5'
checkpointer = ModelCheckpoint(
    modelpath, monitor='val_loss', verbose=0, save_best_only=True
)
early_stopping = EarlyStopping(patience=20)

## 모델 학습 및 저장

In [82]:
history = model.fit(
    X_train, Y_train,
    validation_split=0.2,
    epochs=500, batch_size=100, verbose=0,
    callbacks=[checkpointer, early_stopping])

## 베스트 모델 로딩 후 평가

In [83]:
from tensorflow.keras.models import load_model
best_model = load_model(modelpath)

In [84]:
best_model.evaluate(X_test, Y_test)



[1.2445120811462402, 0.4387376308441162]

## 훈련과정 시각화