In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import plotly_express as px
import plotly.graph_objects as go
from mlxtend.plotting import plot_decision_regions
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn import tree

In [None]:
red_url = 'https://raw.githubusercontent.com/PinkWink/forML_study_data/main/data/winequality-red.csv'
white_url = 'https://raw.githubusercontent.com/PinkWink/forML_study_data/main/data/winequality-white.csv'

red_wine = pd.read_csv(red_url, sep=';')
white_wine = pd.read_csv(white_url, sep=';')

In [None]:
red_wine['color']=1.
white_wine['color']=0.
wine = pd.concat([red_wine, white_wine])
wine.info()


In [None]:
fig = px.histogram(wine, x='quality', color='color')
fig.show()

In [None]:
x = wine.drop(['color'], axis=1)
y = wine['color']

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=5)

fig = go.Figure()
fig.add_trace(go.Histogram(x=x_train['quality'], name='train'))
fig.add_trace(go.Histogram(x=x_test['quality'], name='test'))

fig.update_layout(barmode='overlay')
fig.update_traces(opacity=0.75)
fig.show()

In [None]:
wine_tree = DecisionTreeClassifier(max_depth=2, random_state=5)
wine_tree.fit(x_train, y_train)

In [None]:
y_pred_tr = wine_tree.predict(x_train)
y_pred_test = wine_tree.predict(x_test)
print('Train Acc    : ', accuracy_score(y_train, y_pred_tr))
print('Test Acc     : ', accuracy_score(y_test, y_pred_test))

In [None]:
MMS = MinMaxScaler()
SS = StandardScaler()
MMS.fit(x)
SS.fit(x)
x_mms = MMS.transform(x)
x_ss = SS.transform(x)
x_mms_pd = pd.DataFrame(x_mms, columns=x.columns)
x_ss_pd = pd.DataFrame(x_ss, columns=x.columns)

In [None]:
fig = go.Figure()
fig.add_trace(go.Box(y=x_mms_pd['fixed acidity'], name= 'fixed acidity'))
fig.add_trace(go.Box(y=x_mms_pd['chlorides'], name= 'chlorides'))
fig.add_trace(go.Box(y=x_mms_pd['quality'], name= 'quality'))
fig.show()

In [None]:
fig = go.Figure()
fig.add_trace(go.Box(y=x_ss_pd['fixed acidity'], name= 'fixed acidity'))
fig.add_trace(go.Box(y=x_ss_pd['chlorides'], name= 'chlorides'))
fig.add_trace(go.Box(y=x_ss_pd['quality'], name= 'quality'))
fig.show()

In [None]:
x_train, x_test, y_train, y_test = train_test_split(x_mms_pd, y, test_size=0.2, random_state=5)
wine_tree = DecisionTreeClassifier(max_depth=4, random_state=5)
wine_tree.fit(x_train, y_train)
y_pred_tr = wine_tree.predict(x_train)
y_pred_test = wine_tree.predict(x_test)
print('Train Acc    : ', accuracy_score(y_train, y_pred_tr))
print('Test Acc     : ', accuracy_score(y_test, y_pred_test))

In [None]:
fig = plt.figure(figsize=(25,10))
_ = tree.plot_tree(wine_tree, feature_names=x_train.columns, class_names =['white', 'red'], filled=True)

In [None]:
dict(zip(x_train.columns, wine_tree.feature_importances_))

In [None]:
wine['taste'] = [1. if grade > 5 else 0. for grade in wine['quality']]
x = wine.drop(['taste', 'quality'], axis=1)
y = wine['taste']
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=5)
wine_tree = DecisionTreeClassifier(max_depth=4, random_state=5)
wine_tree.fit(x_train, y_train)
y_pred_tr = wine_tree.predict(x_train)
y_pred_test = wine_tree.predict(x_test)
print('Train Acc    : ', accuracy_score(y_train, y_pred_tr))
print('Test Acc     : ', accuracy_score(y_test, y_pred_test))

In [None]:
fig = plt.figure(figsize=(25,10))
_ = tree.plot_tree(wine_tree, feature_names=x_train.columns, class_names =['soso', 'good'], filled=True)