In [1]:
import pandas as pd
import numpy as np
from model import DecisionTree

# Banknote Authentication Dataset
**from [UCI Repository](https://archive.ics.uci.edu/ml/datasets/banknote+authentication)**  

Variables are:
- variance of Wavelet Transformed image (continuous)
- skewness of Wavelet Transformed image (continuous)
- curtosis of Wavelet Transformed image (continuous)
- entropy of image (continuous)
- class (integer)


In [2]:
df = pd.read_csv('datasets/data_banknote_authentication.csv', header=None)
df

Unnamed: 0,0,1,2,3,4
0,3.62160,8.66610,-2.8073,-0.44699,0
1,4.54590,8.16740,-2.4586,-1.46210,0
2,3.86600,-2.63830,1.9242,0.10645,0
3,3.45660,9.52280,-4.0112,-3.59440,0
4,0.32924,-4.45520,4.5718,-0.98880,0
...,...,...,...,...,...
1367,0.40614,1.34920,-1.4501,-0.55949,1
1368,-1.38870,-4.87730,6.4774,0.34179,1
1369,-3.75030,-13.45860,17.5932,-2.77710,1
1370,-3.56370,-8.38270,12.3930,-1.28230,1


### Run decision tree with different cost functions

In [3]:
cost_functions = ['GINI', 'ENTROPY', 'CLASSIFICATION ERROR']
for cost_function in cost_functions:
    decision_tree = DecisionTree(data=df, number_of_folds=5, max_depth=5, min_size=10,
                                 cost_function=cost_function)
    scores = decision_tree.run()
    print(f"{cost_function}: {np.mean(scores):.2f}%")

GINI: 97.74%
ENTROPY: 98.03%
CLASSIFICATION ERROR: 97.15%


### Run with ENTROPY loss and different parameters

In [4]:
max_depth_values = [3, 5, 7, 10]
min_size_values = [2, 5, 10, 15]
best_mean_score, best_max_depth, best_min_size = 0, None, None
for max_depth in max_depth_values:
    for min_size in min_size_values:
        decision_tree = DecisionTree(data=df, number_of_folds=5, max_depth=max_depth, min_size=min_size,
                                 cost_function="ENTROPY")
        scores = decision_tree.run()
        mean_score = np.mean(scores)
        if mean_score > best_mean_score:
            best_mean_score, best_max_depth, best_min_size = mean_score, max_depth, min_size
print(f"max_depth: {best_max_depth}, min_size: {best_min_size}, mean_score: {best_mean_score:.2f}%")

max_depth: 3, min_size: 10, mean_score: 98.03%
