# Generate Results

In [1]:
import pandas as pd
import decision_tree
from collections import Counter

In [2]:
folder = 'data_sets2/'

In [3]:
print(f'Reading data from {folder}')
training_set = pd.read_csv(folder + 'training_set.csv')
validation_set = pd.read_csv(folder + 'validation_set.csv')
test_set = pd.read_csv(folder + 'test_set.csv')
    
# Initialize the two trees
attributes = training_set.columns.to_list()
attributes.remove('Class')

tree1 = decision_tree.Node(attributes, [], training_set, decision_tree.entropy_gain)
tree2 = decision_tree.Node(attributes, [], training_set, decision_tree.impurity_gain)
print('Trees Initialized, trees will train now this may take up to 5 minutes')

tree1.train()
tree2.train()
print('Trees Trained')

Reading data from data_sets2/
Trees Initialized, trees will train now this may take up to 5 minutes
Trees Trained


In [4]:
results = []
results.append({'L':0, 'K': 0,
                'Entropy Acc' : decision_tree.accuracy(tree1, test_set),
                'Impurity Acc' : decision_tree.accuracy(tree2, test_set)})
results[0]

{'L': 0, 'K': 0, 'Entropy Acc': 0.7233333333333334, 'Impurity Acc': 0.725}

In [5]:
for L in [10,20]:
    for K in [3, 7, 11, 15, 19]:
        pruned1 = tree1.post_pruning(L, K, validation_set)
        pruned2 = tree2.post_pruning(L, K, validation_set)
            
        results.append({'L':L, 'K': K,
                        'Entropy Acc' : decision_tree.accuracy(pruned1, test_set),
                        'Impurity Acc' : decision_tree.accuracy(pruned2, test_set)})

In [6]:
results_df = pd.DataFrame(results)
results_df

Unnamed: 0,Entropy Acc,Impurity Acc,K,L
0,0.723333,0.725,0,0
1,0.723333,0.735,3,10
2,0.703333,0.73,7,10
3,0.735,0.718333,11,10
4,0.723333,0.726667,15,10
5,0.725,0.716667,19,10
6,0.733333,0.74,3,20
7,0.76,0.726667,7,20
8,0.726667,0.74,11,20
9,0.725,0.741667,15,20


In [7]:
results_df.to_csv(folder + 'pruning_tests_results.csv')