In [1]:
import time

import pandas as pd
from sklearn.model_selection import train_test_split

from src.DecisionTree import DecisionTree
from src.RandomForest import RandomForest
from tests.utils import *

Load data

In [2]:
df_red = pd.read_csv('data/winequality-red_NO_ALCOHOL.csv', sep=';')
df_white = pd.read_csv('data/winequality-white_NO_ALCOHOL.csv', sep=';')

Split data

In [3]:
X_red = df_red.drop('quality', axis=1).values
y_red = df_red['quality'].values

X_train_red, X_test_red, y_train_red, y_test_red = train_test_split(X_red, y_red, test_size=0.2)

In [4]:
X_white = df_white.drop('quality', axis=1).values
y_white = df_white['quality'].values

X_train_white, X_test_white, y_train_white, y_test_white = train_test_split(X_white, y_white, test_size=0.2)

Train Decision Tree

In [5]:
dt_red = DecisionTree(max_depth=10, min_samples_split=2)

start = time.time()

dt_red.fit(X_train_red, y_train_red)

end = time.time()

print('Training time: ', end - start)

Training time:  0.8346724510192871


In [6]:
dt_white = DecisionTree(max_depth=10, min_samples_split=2)

start = time.time()

dt_white.fit(X_train_white, y_train_white)

end = time.time()

print('Training time: ', end - start)

Training time:  1.9262189865112305


Evaluate Decision Tree

In [7]:
y_pred_red = dt_red.predict(X_test_red)

m_red = max_dif(y_red)

print('MAE: ', mae(y_test_red, y_pred_red))
print('MSE: ', mse(y_test_red, y_pred_red))

print('Categorical accuracy: ', categorical_accuracy(y_test_red, y_pred_red))
print('MAE accuracy: ', mae_accuracy(m_red, y_test_red, y_pred_red))
print('MSE accuracy: ', mse_accuracy(m_red, y_test_red, y_pred_red))

MAE:  0.725
MSE:  1.125
Categorical accuracy:  0.4625
MAE accuracy:  0.855
MSE accuracy:  0.955


In [8]:
y_pred_white = dt_white.predict(X_test_white)

m_white = max_dif(y_white)

print('MAE: ', mae(y_test_white, y_pred_white))
print('MSE: ', mse(y_test_white, y_pred_white))

print('Categorical accuracy: ', categorical_accuracy(y_test_white, y_pred_white))
print('MAE accuracy: ', mae_accuracy(m_white, y_test_white, y_pred_white))
print('MSE accuracy: ', mse_accuracy(m_white, y_test_white, y_pred_white))

MAE:  0.6265306122448979
MSE:  0.7755102040816326
Categorical accuracy:  0.44387755102040816
MAE accuracy:  0.895578231292517
MSE accuracy:  0.9784580498866213


Train Random Forest

In [9]:
rf_red = RandomForest(n_trees=10, max_depth=10, min_samples_split=2)

start = time.time()

rf_red.fit(X_train_red, y_train_red)

end = time.time()

print('Training time: ', end - start)

Training time:  5.53687310218811


In [10]:
rf_white = RandomForest(n_trees=10, max_depth=10, min_samples_split=2)

start = time.time()

rf_white.fit(X_train_white, y_train_white)

end = time.time()

print('Training time: ', end - start)

Training time:  14.447388410568237


Evaluate Random Forest

In [11]:
y_pred_red = rf_red.predict(X_test_red)

m_red = max_dif(y_red)

print('MAE: ', mae(y_test_red, y_pred_red))
print('MSE: ', mse(y_test_red, y_pred_red))

print('Categorical accuracy: ', categorical_accuracy(y_test_red, y_pred_red))
print('MAE accuracy: ', mae_accuracy(m_red, y_test_red, y_pred_red))
print('MSE accuracy: ', mse_accuracy(m_red, y_test_red, y_pred_red))

MAE:  0.725
MSE:  1.125
Categorical accuracy:  0.4625
MAE accuracy:  0.855
MSE accuracy:  0.955


In [12]:
y_pred_white = rf_white.predict(X_test_white)

m_white = max_dif(y_white)

print('MAE: ', mae(y_test_white, y_pred_white))
print('MSE: ', mse(y_test_white, y_pred_white))

print('Categorical accuracy: ', categorical_accuracy(y_test_white, y_pred_white))
print('MAE accuracy: ', mae_accuracy(m_white, y_test_white, y_pred_white))
print('MSE accuracy: ', mse_accuracy(m_white, y_test_white, y_pred_white))

MAE:  0.6265306122448979
MSE:  0.7755102040816326
Categorical accuracy:  0.44387755102040816
MAE accuracy:  0.895578231292517
MSE accuracy:  0.9784580498866213
