# Import libraries

In [None]:
from sklearn import tree
import pandas as pd
import numpy as np
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from matplotlib import pyplot as plt
from sklearn.datasets import make_classification
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay

# Import Data

In [None]:
red_df = pd.read_csv("Data/winequality-red.csv", delimiter=",")
white_df = pd.read_csv("Data/winequality-white.csv", delimiter=",")  

In [None]:
red_df.head()
#red_df['quality'].value_counts()

In [None]:
white_df.head()
#white_df['quality'].value_counts()

# Train Test Split

In [None]:
red_X = red_df.drop(columns = "quality")
white_X = white_df.drop(columns = "quality")

In [None]:
red_y = red_df["quality"]
white_y = white_df["quality"]

In [None]:
X_train_red, X_test_red, y_train_red, y_test_red = train_test_split(red_X, red_y, stratify=red_y, random_state=42)
X_train_white, X_test_white, y_train_white, y_test_white = train_test_split(white_X, white_y, stratify=white_y, random_state=42)

In [None]:
number_list = np.array(y_train_white)
(unique, counts) = np.unique(number_list, return_counts=True)
frequencies = np.asarray((unique, counts)).T
frequencies

In [None]:
number_list = np.array(y_test_white)
(unique, counts) = np.unique(number_list, return_counts=True)
frequencies = np.asarray((unique, counts)).T
frequencies

# Decision Tree Model

In [None]:
# Red wine
red_clf = DecisionTreeClassifier()
red_clf = red_clf.fit(X_train_red, y_train_red)
red_clf.score(X_train_red, y_train_red)

In [None]:
red_clf.score(X_test_red, y_test_red)

In [None]:
# White wine
white_clf = DecisionTreeClassifier()
white_clf = white_clf.fit(X_train_white, y_train_white)
white_clf.score(X_train_white, y_train_white)

In [None]:
white_clf.score(X_test_white, y_test_white)

# Print Text Representation

In [None]:
# Red wine
text_representation = tree.export_text(red_clf)
print(text_representation)

In [None]:
# White wine
text_representation = tree.export_text(white_clf)
print(text_representation)

# Plot the Trees

In [None]:
# Red wine
fig = plt.figure(figsize=(50, 50))
tree.plot_tree(red_clf, filled=True)
#plt.savefig('red_decision_tree.png', dpi=250)
plt.show()

In [None]:
# White wine
fig = plt.figure(figsize=(50, 50))
tree.plot_tree(white_clf, filled=True)
#plt.savefig('white_decision_tree.png', dpi=250)
plt.show()

# Confusion Model

In [None]:
#import seaborn as sns
#from sklearn.metrics import confusion_matrix
#sns.heatmap(confusion_matrix(y_test_red, red_clf.predict(X_test_red)), annot = True, fmt="d")

In [None]:
# Red Wine
predictions = red_clf.predict(X_test_red)
cm = confusion_matrix(y_test_red, predictions, labels=red_clf.classes_)
disp = ConfusionMatrixDisplay(confusion_matrix=cm,
                              display_labels=red_clf.classes_)
disp.plot()

plt.show()

In [None]:
# White wine
predictions = white_clf.predict(X_test_white)
cm = confusion_matrix(y_test_white, predictions, labels=white_clf.classes_)
disp = ConfusionMatrixDisplay(confusion_matrix=cm,
                              display_labels=white_clf.classes_)
disp.plot()

plt.show()
