In [1]:
import time
from os import path

In [2]:
import pandas as pd
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split

In [3]:
import dataset
import tree as miptree
from sklearn import tree

In [4]:
timelimit = 600
datasets = ['balance-scale', 'breast-cancer', 'car-evaluation', 'hayes-roth', 'house-votes-84', 
            'soybean-small', 'spect', 'tic-tac-toe', 'monks-1', 'monks-2', 'monks-3']
datasets.reverse()
alpha = [0, 0.01, 0.1]
depth = [2, 3, 4, 5]
seeds = [37, 42, 53]

In [5]:
train_ratio = 0.5
val_ratio = 0.25
test_ratio = 0.25

In [6]:
# create or load table
res_sk = pd.DataFrame(columns=['instance', 'depth', 'seed', 'train_acc', 'val_acc', 'test_acc', 'train_time'])
if path.isfile('./res/oct.csv'):
    res_oct = pd.read_csv('./res/oct.csv')
else:
    res_oct = pd.DataFrame(columns=['instance', 'depth', 'alpha', 'seed', 'train_acc', 'val_acc', 'test_acc', 'train_time'])
if path.isfile('./res/mfoct.csv'):
    res_mfoct = pd.read_csv('./res/mfoct.csv')
else:
    res_mfoct = pd.DataFrame(columns=['instance', 'depth', 'alpha', 'seed', 'train_acc', 'val_acc', 'test_acc', 'train_time'])
if path.isfile('./res/boct.csv'):
    res_boct = pd.read_csv('./res/boct.csv')
else:
    res_boct = pd.DataFrame(columns=['instance', 'depth', 'seed', 'train_acc', 'val_acc', 'test_acc', 'train_time'])

In [7]:
for s in seeds:
    for d in depth:
        for data in datasets:
            x, y = dataset.loadData(data)
            x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=1-train_ratio, random_state=s)
            x_val, x_test, y_val, y_test = train_test_split(x_test, y_test, 
                                                            test_size=test_ratio/(test_ratio+val_ratio), random_state=s)
            clf = tree.DecisionTreeClassifier(max_depth=d)
            tick = time.time()
            clf.fit(x_train, y_train)
            tock = time.time()
            train_time = tock - tick
            train_acc = accuracy_score(y_train, clf.predict(x_train))
            val_acc = accuracy_score(y_val, clf.predict(x_val))
            test_acc = accuracy_score(y_test, clf.predict(x_test))
            print(data, 'cart-d{}'.format(d), 'train acc:', train_acc, 'val acc:', val_acc)
            row = {'instance':data, 'depth':d, 'seed':s, 'train_acc':train_acc, 
                   'val_acc':val_acc, 'test_acc':test_acc, 'train_time':train_time}
            res_sk = res_sk.append(row, ignore_index=True)
            res_sk.to_csv('./res/sk.csv', index=False)

monks-3 cart-d2 train acc: 0.9711191335740073 val acc: 0.9492753623188406
monks-2 cart-d2 train acc: 0.6266666666666667 val acc: 0.6733333333333333
monks-1 cart-d2 train acc: 0.7733812949640287 val acc: 0.697841726618705
tic-tac-toe cart-d2 train acc: 0.7202505219206681 val acc: 0.694560669456067
spect cart-d2 train acc: 0.7894736842105263 val acc: 0.7611940298507462
soybean-small cart-d2 train acc: 0.9130434782608695 val acc: 0.5
house-votes-84 cart-d2 train acc: 0.9741379310344828 val acc: 0.9482758620689655
hayes-roth cart-d2 train acc: 0.6125 val acc: 0.4
car-evaluation cart-d2 train acc: 0.7951388888888888 val acc: 0.7662037037037037
breast-cancer cart-d2 train acc: 0.7753623188405797 val acc: 0.8260869565217391
balance-scale cart-d2 train acc: 0.7275641025641025 val acc: 0.6089743589743589
monks-3 cart-d3 train acc: 0.9783393501805054 val acc: 0.9420289855072463
monks-2 cart-d3 train acc: 0.6766666666666666 val acc: 0.6266666666666667
monks-1 cart-d3 train acc: 0.8633093525179856

monks-1 cart-d5 train acc: 0.8741007194244604 val acc: 0.8057553956834532
tic-tac-toe cart-d5 train acc: 0.8622129436325678 val acc: 0.803347280334728
spect cart-d5 train acc: 0.8646616541353384 val acc: 0.7611940298507462
soybean-small cart-d5 train acc: 1.0 val acc: 1.0
house-votes-84 cart-d5 train acc: 0.9913793103448276 val acc: 0.9310344827586207
hayes-roth cart-d5 train acc: 0.8 val acc: 0.775
car-evaluation cart-d5 train acc: 0.8680555555555556 val acc: 0.8564814814814815
breast-cancer cart-d5 train acc: 0.8840579710144928 val acc: 0.6956521739130435
balance-scale cart-d5 train acc: 0.8814102564102564 val acc: 0.7756410256410257


In [8]:
for s in seeds:
    for d in depth:
        for data in datasets:
            # load data
            x, y = dataset.loadData(data)
            # onehot encoding
            x_enc = dataset.oneHot(x)
            # data splition
            x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=1-train_ratio, random_state=s)
            x_val, x_test, y_val, y_test = train_test_split(x_test, y_test, 
                                                            test_size=test_ratio/(test_ratio+val_ratio), random_state=s)
            x_train_enc, x_test_enc, y_train, y_test = train_test_split(x_enc, y, test_size=1-train_ratio, random_state=s)
            x_val_enc, x_test_enc, y_val, y_test = train_test_split(x_test_enc, y_test, 
                                                                    test_size=test_ratio/(test_ratio+val_ratio), random_state=s)
            
            for a in alpha:
                # oct
                row = res_oct[(res_oct['instance'] == data) & (res_oct['depth'] == d) & 
                              (res_oct['alpha'] == a) & (res_oct['seed'] == s)]
                if len(row):
                    print(data, 'oct-d{}-a{}'.format(row['depth'].values[0],row['alpha'].values[0]),
                          'train acc:', row['train_acc'].values[0], 'val acc:', row['val_acc'].values[0])
                else:
                    octree = miptree.optimalDecisionTreeClassifier(max_depth=d, min_samples_split=0, alpha=a,
                                                                   timelimit=timelimit, output=False)
                    tick = time.time()
                    octree.fit(x_train, y_train)
                    tock = time.time()
                    train_time = tock - tick
                    train_acc = accuracy_score(y_train, octree.predict(x_train))
                    val_acc = accuracy_score(y_val, octree.predict(x_val))
                    test_acc = accuracy_score(y_test, octree.predict(x_test))
                    row = {'instance':data, 'depth':d, 'alpha':a, 'seed':s, 'train_acc':train_acc, 'val_acc':val_acc,
                           'test_acc':test_acc, 'train_time':train_time}
                    res_oct = res_oct.append(row, ignore_index=True)
                    res_oct.to_csv('./res/oct.csv', index=False)
                    print(data, 'oct-d{}-a{}'.format(d,a), 'train acc:', train_acc, 'val acc:', val_acc)
                    #print(row)
                
                # mfoct
                row = res_mfoct[(res_mfoct['instance'] == data) & (res_mfoct['depth'] == d) & 
                                (res_mfoct['alpha'] == a) & (res_mfoct['seed'] == s)]
                if len(row):
                    print(data, 'mfoct-d{}-a{}'.format(row['depth'].values[0],row['alpha'].values[0]),
                          'train acc:', row['train_acc'].values[0], 'val acc:', row['val_acc'].values[0])
                else:
                    mfoct = miptree.maxFlowOptimalDecisionTreeClassifier(max_depth=d, alpha=a,
                                                                         timelimit=timelimit, output=False)
                    tick = time.time()
                    mfoct.fit(x_train_enc, y_train)
                    tock = time.time()
                    train_time = tock - tick
                    train_acc = accuracy_score(y_train, mfoct.predict(x_train_enc))
                    val_acc = accuracy_score(y_val, mfoct.predict(x_val_enc))
                    test_acc = accuracy_score(y_test, mfoct.predict(x_test_enc))
                    row = {'instance':data, 'depth':d, 'alpha':a, 'seed':s, 'train_acc':train_acc, 'val_acc':val_acc,
                           'test_acc':test_acc, 'train_time':train_time}
                    res_mfoct = res_mfoct.append(row, ignore_index=True)
                    res_mfoct.to_csv('./res/mfoct.csv', index=False)
                    print(data, 'mfoct-d{}-a{}'.format(d,a), 'train acc:', train_acc, 'val acc:', val_acc)
                    #print(row)
            
            # boct
            row = res_boct[(res_boct['instance'] == data) & (res_boct['depth'] == d) & (res_boct['seed'] == s)]
            if len(row):
                print(data, 'boct-d{}'.format(row['depth'].values[0]),
                      'train acc:', row['train_acc'].values[0], 'val acc:', row['val_acc'].values[0])
            else:
                boct = miptree.binOptimalDecisionTreeClassifier(max_depth=d, min_samples_split=0,
                                                                timelimit=timelimit, output=False)
                tick = time.time()
                boct.fit(x_train, y_train)
                tock = time.time()
                train_time = tock - tick
                train_acc = accuracy_score(y_train, boct.predict(x_train))
                val_acc = accuracy_score(y_val, boct.predict(x_val))
                test_acc = accuracy_score(y_test, boct.predict(x_test))
                row = {'instance':data, 'depth':d, 'seed':s, 'train_acc':train_acc, 'val_acc':val_acc,
                       'test_acc':test_acc, 'train_time':train_time}
                res_boct = res_boct.append(row, ignore_index=True)
                res_boct.to_csv('./res/boct.csv', index=False)
                print(data, 'boct-d{}'.format(d), 'train acc:', train_acc, 'val acc:', val_acc)
                #print(row)

monks-3 oct-d2-a0.0 train acc: 0.971119134 val acc: 0.949275362
monks-3 mfoct-d2-a0.0 train acc: 0.9711191335740073 val acc: 0.9492753623188406
monks-3 oct-d2-a0.01 train acc: 0.971119134 val acc: 0.949275362
monks-3 mfoct-d2-a0.01 train acc: 0.9711191335740073 val acc: 0.9492753623188406
monks-3 oct-d2-a0.1 train acc: 0.971119134 val acc: 0.949275362
monks-3 mfoct-d2-a0.1 train acc: 0.9711191335740073 val acc: 0.9492753623188406
monks-3 boct-d2 train acc: 0.9711191335740073 val acc: 0.9492753623188406
monks-2 oct-d2-a0.0 train acc: 0.63 val acc: 0.6
monks-2 mfoct-d2-a0.0 train acc: 0.63 val acc: 0.62
monks-2 oct-d2-a0.01 train acc: 0.626666667 val acc: 0.673333333
monks-2 mfoct-d2-a0.01 train acc: 0.6266666666666667 val acc: 0.6733333333333333
monks-2 oct-d2-a0.1 train acc: 0.626666667 val acc: 0.673333333
monks-2 mfoct-d2-a0.1 train acc: 0.6266666666666667 val acc: 0.6733333333333333
monks-2 boct-d2 train acc: 0.63 val acc: 0.6
monks-1 oct-d2-a0.0 train acc: 0.809352518 val acc: 0.70

house-votes-84 oct-d3-a0.01 train acc: 0.974137931 val acc: 0.948275862
house-votes-84 mfoct-d3-a0.01 train acc: 0.9741379310344828 val acc: 0.9482758620689656
house-votes-84 oct-d3-a0.1 train acc: 0.974137931 val acc: 0.948275862
house-votes-84 mfoct-d3-a0.1 train acc: 0.9741379310344828 val acc: 0.9482758620689656
house-votes-84 boct-d3 train acc: 0.9913793103448276 val acc: 0.9310344827586208
hayes-roth oct-d3-a0.0 train acc: 0.7625 val acc: 0.65
hayes-roth mfoct-d3-a0.0 train acc: 0.7875 val acc: 0.625
hayes-roth oct-d3-a0.01 train acc: 0.7375 val acc: 0.5
hayes-roth mfoct-d3-a0.01 train acc: 0.7875 val acc: 0.625
hayes-roth oct-d3-a0.1 train acc: 0.6875 val acc: 0.575
hayes-roth mfoct-d3-a0.1 train acc: 0.725 val acc: 0.525
hayes-roth boct-d3 train acc: 0.7 val acc: 0.7
car-evaluation oct-d3-a0.0 train acc: 0.821759259 val acc: 0.805555556
car-evaluation mfoct-d3-a0.0 train acc: 0.8217592592592593 val acc: 0.8055555555555556
car-evaluation oct-d3-a0.01 train acc: 0.685185185 val a

spect oct-d5-a0.1 train acc: 0.789473684 val acc: 0.76119403
spect mfoct-d5-a0.1 train acc: 0.7894736842105263 val acc: 0.7611940298507462
spect boct-d5 train acc: 0.924812030075188 val acc: 0.6865671641791045
soybean-small oct-d5-a0.0 train acc: 1.0 val acc: 0.75
soybean-small mfoct-d5-a0.0 train acc: 1.0 val acc: 0.5
soybean-small oct-d5-a0.01 train acc: 1.0 val acc: 0.75
soybean-small mfoct-d5-a0.01 train acc: 1.0 val acc: 0.9166666666666666
soybean-small oct-d5-a0.1 train acc: 1.0 val acc: 0.916666667
soybean-small mfoct-d5-a0.1 train acc: 1.0 val acc: 0.6666666666666666
soybean-small boct-d5 train acc: 1.0 val acc: 1.0
house-votes-84 oct-d5-a0.0 train acc: 1.0 val acc: 0.913793103
house-votes-84 mfoct-d5-a0.0 train acc: 1.0 val acc: 0.896551724137931
house-votes-84 oct-d5-a0.01 train acc: 0.99137931 val acc: 0.948275862
house-votes-84 mfoct-d5-a0.01 train acc: 0.9913793103448276 val acc: 0.9655172413793104
house-votes-84 oct-d5-a0.1 train acc: 0.974137931 val acc: 0.948275862
hous

monks-2 oct-d3-a0.01 train acc: 0.64 val acc: 0.68
monks-2 mfoct-d3-a0.01 train acc: 0.6866666666666666 val acc: 0.6333333333333333
monks-2 oct-d3-a0.1 train acc: 0.64 val acc: 0.68
monks-2 mfoct-d3-a0.1 train acc: 0.64 val acc: 0.68
monks-2 boct-d3 train acc: 0.6766666666666666 val acc: 0.66
monks-1 oct-d3-a0.0 train acc: 0.881294964 val acc: 0.90647482
monks-1 mfoct-d3-a0.0 train acc: 0.9064748201438848 val acc: 0.8489208633093526
monks-1 oct-d3-a0.01 train acc: 0.852517986 val acc: 0.870503597
monks-1 mfoct-d3-a0.01 train acc: 0.9064748201438848 val acc: 0.8489208633093526
monks-1 oct-d3-a0.1 train acc: 0.748201439 val acc: 0.73381295
monks-1 mfoct-d3-a0.1 train acc: 0.7482014388489209 val acc: 0.7338129496402878
monks-1 boct-d3 train acc: 0.8525179856115108 val acc: 0.8705035971223022
tic-tac-toe oct-d3-a0.0 train acc: 0.741127349 val acc: 0.690376569
tic-tac-toe mfoct-d3-a0.0 train acc: 0.755741127348643 val acc: 0.7489539748953975
tic-tac-toe oct-d3-a0.01 train acc: 0.730688935 v

hayes-roth oct-d4-a0.01 train acc: 0.9125 val acc: 0.7
hayes-roth mfoct-d4-a0.01 train acc: 0.925 val acc: 0.625
hayes-roth oct-d4-a0.1 train acc: 0.7625 val acc: 0.475
hayes-roth mfoct-d4-a0.1 train acc: 0.775 val acc: 0.5
hayes-roth boct-d4 train acc: 0.9 val acc: 0.575
car-evaluation oct-d4-a0.0 train acc: 0.7974537037037037 val acc: 0.8495370370370371
car-evaluation mfoct-d4-a0.0 train acc: 0.8287037037037037 val acc: 0.8333333333333334
car-evaluation oct-d4-a0.01 train acc: 0.8564814814814815 val acc: 0.8773148148148148
car-evaluation mfoct-d4-a0.01 train acc: 0.8101851851851852 val acc: 0.8240740740740741
car-evaluation oct-d4-a0.1 train acc: 0.6875 val acc: 0.7129629629629629
car-evaluation mfoct-d4-a0.1 train acc: 0.6875 val acc: 0.7129629629629629
car-evaluation boct-d4 train acc: 0.8321759259259259 val acc: 0.8611111111111112
breast-cancer oct-d4-a0.0 train acc: 0.8768115942028986 val acc: 0.6666666666666666
breast-cancer mfoct-d4-a0.0 train acc: 0.8985507246376812 val acc: 0

monks-2 oct-d2-a0.1 train acc: 0.6466666666666666 val acc: 0.72
monks-2 mfoct-d2-a0.1 train acc: 0.6466666666666666 val acc: 0.72
monks-2 boct-d2 train acc: 0.6066666666666667 val acc: 0.5733333333333334
monks-1 oct-d2-a0.0 train acc: 0.8093525179856115 val acc: 0.7841726618705036
monks-1 mfoct-d2-a0.0 train acc: 0.8093525179856115 val acc: 0.7841726618705036
monks-1 oct-d2-a0.01 train acc: 0.7769784172661871 val acc: 0.7338129496402878
monks-1 mfoct-d2-a0.01 train acc: 0.8093525179856115 val acc: 0.7841726618705036
monks-1 oct-d2-a0.1 train acc: 0.7769784172661871 val acc: 0.7338129496402878
monks-1 mfoct-d2-a0.1 train acc: 0.7769784172661871 val acc: 0.7338129496402878
monks-1 boct-d2 train acc: 0.8093525179856115 val acc: 0.7841726618705036
tic-tac-toe oct-d2-a0.0 train acc: 0.7077244258872651 val acc: 0.7071129707112971
tic-tac-toe mfoct-d2-a0.0 train acc: 0.7077244258872651 val acc: 0.7071129707112971
tic-tac-toe oct-d2-a0.01 train acc: 0.7035490605427975 val acc: 0.67364016736401

hayes-roth mfoct-d3-a0.1 train acc: 0.725 val acc: 0.6
hayes-roth boct-d3 train acc: 0.8 val acc: 0.675
car-evaluation oct-d3-a0.0 train acc: 0.8171296296296297 val acc: 0.7916666666666666
car-evaluation mfoct-d3-a0.0 train acc: 0.8171296296296297 val acc: 0.7916666666666666
car-evaluation oct-d3-a0.01 train acc: 0.8090277777777778 val acc: 0.7962962962962963
car-evaluation mfoct-d3-a0.01 train acc: 0.8078703703703703 val acc: 0.7893518518518519
car-evaluation oct-d3-a0.1 train acc: 0.7106481481481481 val acc: 0.6967592592592593
car-evaluation mfoct-d3-a0.1 train acc: 0.7106481481481481 val acc: 0.6967592592592593
car-evaluation boct-d3 train acc: 0.8171296296296297 val acc: 0.8148148148148148
breast-cancer oct-d3-a0.0 train acc: 0.8333333333333334 val acc: 0.6811594202898551
breast-cancer mfoct-d3-a0.0 train acc: 0.855072463768116 val acc: 0.7536231884057971
breast-cancer oct-d3-a0.01 train acc: 0.8405797101449275 val acc: 0.6811594202898551
breast-cancer mfoct-d3-a0.01 train acc: 0.8

spect oct-d5-a0.0 train acc: 0.8872180451127819 val acc: 0.746268656716418
spect mfoct-d5-a0.0 train acc: 0.924812030075188 val acc: 0.7014925373134329
spect oct-d5-a0.01 train acc: 0.8120300751879699 val acc: 0.746268656716418
spect mfoct-d5-a0.01 train acc: 0.8721804511278195 val acc: 0.8059701492537313
spect oct-d5-a0.1 train acc: 0.8120300751879699 val acc: 0.746268656716418
spect mfoct-d5-a0.1 train acc: 0.8120300751879699 val acc: 0.746268656716418
spect boct-d5 train acc: 0.9022556390977444 val acc: 0.746268656716418
soybean-small oct-d5-a0.0 train acc: 1.0 val acc: 0.6666666666666666
soybean-small mfoct-d5-a0.0 train acc: 1.0 val acc: 0.8333333333333334
soybean-small oct-d5-a0.01 train acc: 0.9130434782608696 val acc: 0.75
soybean-small mfoct-d5-a0.01 train acc: 1.0 val acc: 1.0
soybean-small oct-d5-a0.1 train acc: 1.0 val acc: 0.8333333333333334
soybean-small mfoct-d5-a0.1 train acc: 1.0 val acc: 1.0
soybean-small boct-d5 train acc: 0.6956521739130435 val acc: 0.58333333333333