In [1]:
import time
from os import path

In [2]:
import pandas as pd
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split

In [3]:
import dataset
import tree as miptree
from sklearn import tree

In [4]:
timelimit = 600
datasets = ['balance-scale', 'breast-cancer', 'car-evaluation', 'hayes-roth', 'house-votes-84', 
            'soybean-small', 'spect', 'tic-tac-toe', 'monks-1', 'monks-2', 'monks-3']
datasets.reverse()
alpha = [0, 0.01, 0.1]
depth = [2, 3, 4, 5]
seeds = [37, 42, 53]

In [5]:
train_ratio = 0.5
val_ratio = 0.25
test_ratio = 0.25

In [6]:
# create or load table
res_sk = pd.DataFrame(columns=['instance', 'depth', 'seed', 'train_acc', 'val_acc', 'test_acc', 'train_time'])
if path.isfile('./res/oct.csv'):
    res_oct = pd.read_csv('./res/oct.csv')
else:
    res_oct = pd.DataFrame(columns=['instance', 'depth', 'alpha', 'seed', 'train_acc', 'val_acc', 'test_acc', 'train_time'])
if path.isfile('./res/mfoct.csv'):
    res_mfoct = pd.read_csv('./res/mfoct.csv')
else:
    res_mfoct = pd.DataFrame(columns=['instance', 'depth', 'alpha', 'seed', 'train_acc', 'val_acc', 'test_acc', 'train_time'])
if path.isfile('./res/boct.csv'):
    res_boct = pd.read_csv('./res/boct.csv')
else:
    res_boct = pd.DataFrame(columns=['instance', 'depth', 'seed', 'train_acc', 'val_acc', 'test_acc', 'train_time'])

In [7]:
for s in seeds:
    for d in depth:
        for data in datasets:
            x, y = dataset.loadData(data)
            x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=1-train_ratio, random_state=s)
            x_val, x_test, y_val, y_test = train_test_split(x_test, y_test, 
                                                            test_size=test_ratio/(test_ratio+val_ratio), random_state=s)
            clf = tree.DecisionTreeClassifier(max_depth=2)
            tick = time.time()
            clf.fit(x_train, y_train)
            tock = time.time()
            train_time = tock - tick
            train_acc = accuracy_score(y_train, clf.predict(x_train))
            val_acc = accuracy_score(y_val, clf.predict(x_val))
            test_acc = accuracy_score(y_test, clf.predict(x_test))
            row = {'instance':data, 'depth':d, 'seed':s, 'train_acc':train_acc, 
                   'val_acc':val_acc, 'test_acc':test_acc, 'train_time':train_time}
            res_sk = res_sk.append(row, ignore_index=True)
            res_sk.to_csv('./res/sk.csv', index=False)

In [None]:
for s in seeds:
    for d in depth:
        for data in datasets:
            # load data
            x, y = dataset.loadData(data)
            # onehot encoding
            x_enc = dataset.oneHot(x)
            # data splition
            x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=1-train_ratio, random_state=s)
            x_val, x_test, y_val, y_test = train_test_split(x_test, y_test, 
                                                            test_size=test_ratio/(test_ratio+val_ratio), random_state=s)
            x_train_enc, x_test_enc, y_train, y_test = train_test_split(x_enc, y, test_size=1-train_ratio, random_state=s)
            x_val_enc, x_test_enc, y_val, y_test = train_test_split(x_test_enc, y_test, 
                                                                    test_size=test_ratio/(test_ratio+val_ratio), random_state=s)
            
            for a in alpha:
                # oct
                row = res_oct[(res_oct['instance'] == data) & (res_oct['depth'] == d) & 
                              (res_oct['alpha'] == a) & (res_oct['seed'] == s)]
                if len(row):
                    print(data, 'oct-d{}-a{}'.format(row['depth'].values[0],row['alpha'].values[0]),
                          'train acc:', row['train_acc'].values[0], 'val acc:', row['val_acc'].values[0])
                else:
                    octree = miptree.optimalDecisionTreeClassifier(max_depth=d, min_samples_split=0, alpha=a,
                                                                   timelimit=timelimit, output=False)
                    tick = time.time()
                    octree.fit(x_train, y_train)
                    tock = time.time()
                    train_time = tock - tick
                    train_acc = accuracy_score(y_train, octree.predict(x_train))
                    val_acc = accuracy_score(y_val, octree.predict(x_val))
                    test_acc = accuracy_score(y_test, octree.predict(x_test))
                    row = {'instance':data, 'depth':d, 'alpha':a, 'seed':s, 'train_acc':train_acc, 'val_acc':val_acc,
                           'test_acc':test_acc, 'train_time':train_time}
                    res_oct = res_oct.append(row, ignore_index=True)
                    res_oct.to_csv('./res/oct.csv', index=False)
                    print(data, 'oct-d{}-a{}'.format(d,a), 'train acc:', train_acc, 'val acc:', val_acc)
                    #print(row)
                
                # mfoct
                row = res_mfoct[(res_mfoct['instance'] == data) & (res_mfoct['depth'] == d) & 
                                (res_mfoct['alpha'] == a) & (res_mfoct['seed'] == s)]
                if len(row):
                    print(data, 'mfoct-d{}-a{}'.format(row['depth'].values[0],row['alpha'].values[0]),
                          'train acc:', row['train_acc'].values[0], 'val acc:', row['val_acc'].values[0])
                else:
                    mfoct = miptree.maxFlowOptimalDecisionTreeClassifier(max_depth=d, alpha=a,
                                                                         timelimit=timelimit, output=False)
                    tick = time.time()
                    mfoct.fit(x_train_enc, y_train)
                    tock = time.time()
                    train_time = tock - tick
                    train_acc = accuracy_score(y_train, mfoct.predict(x_train_enc))
                    val_acc = accuracy_score(y_val, mfoct.predict(x_val_enc))
                    test_acc = accuracy_score(y_test, mfoct.predict(x_test_enc))
                    row = {'instance':data, 'depth':d, 'alpha':a, 'seed':s, 'train_acc':train_acc, 'val_acc':val_acc,
                           'test_acc':test_acc, 'train_time':train_time}
                    res_mfoct = res_mfoct.append(row, ignore_index=True)
                    res_mfoct.to_csv('./res/mfoct.csv', index=False)
                    print(data, 'mfoct-d{}-a{}'.format(d,a), 'train acc:', train_acc, 'val acc:', val_acc)
                    #print(row)
            
            # boct
            row = res_boct[(res_boct['instance'] == data) & (res_boct['depth'] == d) & (res_boct['seed'] == s)]
            if len(row):
                print(data, 'boct-d{}'.format(row['depth'].values[0]),
                      'train acc:', row['train_acc'].values[0], 'val acc:', row['val_acc'].values[0])
            else:
                boct = miptree.binOptimalDecisionTreeClassifier(max_depth=d, min_samples_split=0,
                                                                timelimit=timelimit, output=False)
                tick = time.time()
                boct.fit(x_train, y_train)
                tock = time.time()
                train_time = tock - tick
                train_acc = accuracy_score(y_train, boct.predict(x_train))
                val_acc = accuracy_score(y_val, boct.predict(x_val))
                test_acc = accuracy_score(y_test, boct.predict(x_test))
                row = {'instance':data, 'depth':d, 'seed':s, 'train_acc':train_acc, 'val_acc':val_acc,
                       'test_acc':test_acc, 'train_time':train_time}
                res_boct = res_boct.append(row, ignore_index=True)
                res_boct.to_csv('./res/boct.csv', index=False)
                print(data, 'boct-d{}'.format(d), 'train acc:', train_acc, 'val acc:', val_acc)
                #print(row)

monks-3 oct-d2-a0.0 train acc: 0.9711191335740073 val acc: 0.9492753623188406
monks-3 mfoct-d2-a0.0 train acc: 0.9711191335740073 val acc: 0.9492753623188406
monks-3 oct-d2-a0.01 train acc: 0.9711191335740073 val acc: 0.9492753623188406
monks-3 mfoct-d2-a0.01 train acc: 0.9711191335740073 val acc: 0.9492753623188406
monks-3 oct-d2-a0.1 train acc: 0.9711191335740073 val acc: 0.9492753623188406
monks-3 mfoct-d2-a0.1 train acc: 0.9711191335740073 val acc: 0.9492753623188406
monks-3 boct-d2 train acc: 0.9711191335740073 val acc: 0.9492753623188406
monks-2 oct-d2-a0.0 train acc: 0.63 val acc: 0.6
monks-2 mfoct-d2-a0.0 train acc: 0.63 val acc: 0.62
monks-2 oct-d2-a0.01 train acc: 0.6266666666666667 val acc: 0.6733333333333333
monks-2 mfoct-d2-a0.01 train acc: 0.6266666666666667 val acc: 0.6733333333333333
monks-2 oct-d2-a0.1 train acc: 0.6266666666666667 val acc: 0.6733333333333333
monks-2 mfoct-d2-a0.1 train acc: 0.6266666666666667 val acc: 0.6733333333333333
monks-2 boct-d2 train acc: 0.63

hayes-roth oct-d3-a0.0 train acc: 0.7625 val acc: 0.65
hayes-roth mfoct-d3-a0.0 train acc: 0.7875 val acc: 0.625
hayes-roth oct-d3-a0.01 train acc: 0.7375 val acc: 0.5
hayes-roth mfoct-d3-a0.01 train acc: 0.7875 val acc: 0.625
hayes-roth oct-d3-a0.1 train acc: 0.6875 val acc: 0.575
hayes-roth mfoct-d3-a0.1 train acc: 0.725 val acc: 0.525
hayes-roth boct-d3 train acc: 0.7 val acc: 0.7
car-evaluation oct-d3-a0.0 train acc: 0.8217592592592593 val acc: 0.8055555555555556
car-evaluation mfoct-d3-a0.0 train acc: 0.8217592592592593 val acc: 0.8055555555555556
car-evaluation oct-d3-a0.01 train acc: 0.6851851851851852 val acc: 0.7430555555555556
car-evaluation mfoct-d3-a0.01 train acc: 0.8182870370370371 val acc: 0.7939814814814815
car-evaluation oct-d3-a0.1 train acc: 0.6851851851851852 val acc: 0.7430555555555556
car-evaluation mfoct-d3-a0.1 train acc: 0.6851851851851852 val acc: 0.7430555555555556
car-evaluation boct-d3 train acc: 0.8252314814814815 val acc: 0.7962962962962963
breast-cancer 

Using license file C:\Users\Apocrypse\gurobi.lic
monks-2 mfoct-d5-a0.01 train acc: 0.7133333333333334 val acc: 0.6533333333333333
monks-2 oct-d5-a0.1 train acc: 0.6266666666666667 val acc: 0.6733333333333333
monks-2 mfoct-d5-a0.1 train acc: 0.6266666666666667 val acc: 0.6733333333333333
monks-2 boct-d5 train acc: 0.8333333333333334 val acc: 0.8066666666666666
monks-1 oct-d5-a0 train acc: 0.9280575539568345 val acc: 0.8776978417266187
monks-1 mfoct-d5-a0 train acc: 1.0 val acc: 1.0
monks-1 oct-d5-a0.01 train acc: 1.0 val acc: 1.0
monks-1 mfoct-d5-a0.01 train acc: 1.0 val acc: 1.0
monks-1 oct-d5-a0.1 train acc: 0.7733812949640287 val acc: 0.697841726618705
monks-1 mfoct-d5-a0.1 train acc: 0.7733812949640287 val acc: 0.697841726618705
monks-1 boct-d5 train acc: 0.8345323741007195 val acc: 0.8489208633093526
tic-tac-toe oct-d5-a0 train acc: 0.7870563674321504 val acc: 0.7447698744769874
tic-tac-toe mfoct-d5-a0 train acc: 0.8225469728601252 val acc: 0.7824267782426778
tic-tac-toe oct-d5-a0.

house-votes-84 mfoct-d2-a0.01 train acc: 0.9655172413793104 val acc: 0.9827586206896551
house-votes-84 oct-d2-a0.1 train acc: 0.9655172413793104 val acc: 0.9827586206896551
house-votes-84 mfoct-d2-a0.1 train acc: 0.9655172413793104 val acc: 0.9827586206896551
house-votes-84 boct-d2 train acc: 0.9655172413793104 val acc: 0.9655172413793104
hayes-roth oct-d2-a0 train acc: 0.675 val acc: 0.475
hayes-roth mfoct-d2-a0 train acc: 0.675 val acc: 0.5
hayes-roth oct-d2-a0.01 train acc: 0.675 val acc: 0.5
hayes-roth mfoct-d2-a0.01 train acc: 0.675 val acc: 0.5
hayes-roth oct-d2-a0.1 train acc: 0.6625 val acc: 0.525
hayes-roth mfoct-d2-a0.1 train acc: 0.6625 val acc: 0.525
hayes-roth boct-d2 train acc: 0.4875 val acc: 0.4
car-evaluation oct-d2-a0 train acc: 0.7511574074074074 val acc: 0.8009259259259259
car-evaluation mfoct-d2-a0 train acc: 0.7511574074074074 val acc: 0.8009259259259259
car-evaluation oct-d2-a0.01 train acc: 0.7511574074074074 val acc: 0.8009259259259259
car-evaluation mfoct-d2-a

monks-3 mfoct-d4-a0 train acc: 0.9963898916967509 val acc: 0.9855072463768116
monks-3 oct-d4-a0.01 train acc: 0.9963898916967509 val acc: 0.9855072463768116
monks-3 mfoct-d4-a0.01 train acc: 0.9963898916967509 val acc: 0.9855072463768116
monks-3 oct-d4-a0.1 train acc: 0.9711191335740073 val acc: 0.9492753623188406
monks-3 mfoct-d4-a0.1 train acc: 0.9711191335740073 val acc: 0.9492753623188406
monks-3 boct-d4 train acc: 0.5379061371841155 val acc: 0.5942028985507246
monks-2 oct-d4-a0 train acc: 0.73 val acc: 0.6333333333333333
monks-2 mfoct-d4-a0 train acc: 0.7233333333333334 val acc: 0.6733333333333333
monks-2 oct-d4-a0.01 train acc: 0.64 val acc: 0.68
monks-2 mfoct-d4-a0.01 train acc: 0.6733333333333333 val acc: 0.6866666666666666
monks-2 oct-d4-a0.1 train acc: 0.64 val acc: 0.68
monks-2 mfoct-d4-a0.1 train acc: 0.64 val acc: 0.68
monks-2 boct-d4 train acc: 0.6933333333333334 val acc: 0.64
monks-1 oct-d4-a0 train acc: 0.9820143884892086 val acc: 0.9424460431654677
monks-1 mfoct-d4-a0 