In [1]:
import time

In [2]:
import pandas as pd
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split

In [3]:
import dataset
import tree as miptree
from sklearn import tree

In [4]:
timelimit = 600
datasets = ['balance-scale', 'breast-cancer', 'car-evaluation', 'hayes-roth', 'house-votes-84', 
            'soybean-small', 'spect', 'tic-tac-toe', 'monks-1', 'monks-2', 'monks-3']
alpha = [0, 0.01, 0.1]
depth = [2, 3, 4, 5]
seeds = [31, 42, 53]

In [5]:
train_ratio = 0.5
val_ratio = 0.25
test_ratio = 0.25

In [6]:
# create table
res_sk = pd.DataFrame(columns=['instance', 'depth', 'seed', 'train_acc', 'val_acc', 'test_acc', 'train_time'])
res_oct = pd.DataFrame(columns=['instance', 'depth', 'alpha', 'seed', 'train_acc', 'val_acc', 'test_acc', 'train_time'])
res_mfoct = pd.DataFrame(columns=['instance', 'depth', 'alpha', 'seed', 'train_acc', 'val_acc', 'test_acc', 'train_time'])
res_boct = pd.DataFrame(columns=['instance', 'depth', 'seed', 'train_acc', 'val_acc', 'test_acc', 'train_time'])

In [7]:
for s in seeds:
    for d in depth:
        for data in datasets:
            x, y = dataset.loadData(data)
            x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=1-train_ratio, random_state=s)
            x_val, x_test, y_val, y_test = train_test_split(x_test, y_test, 
                                                            test_size=test_ratio/(test_ratio+val_ratio), random_state=s)
            clf = tree.DecisionTreeClassifier(max_depth=2)
            tick = time.time()
            clf.fit(x_train, y_train)
            tock = time.time()
            train_time = tock - tick
            train_acc = accuracy_score(y_train, clf.predict(x_train))
            val_acc = accuracy_score(y_val, clf.predict(x_val))
            test_acc = accuracy_score(y_test, clf.predict(x_test))
            row = {'instance':data, 'depth':d, 'seed':s, 'train_acc':train_acc, 
                   'val_acc':val_acc, 'test_acc':test_acc, 'train_time':train_time}
            res_sk = res_sk.append(row, ignore_index=True)
            res_sk.to_csv('./res/sk.csv', index=False)

In [8]:
for s in seeds:
    for d in depth:
        for data in datasets:
            # load data
            x, y = dataset.loadData(data)
            # onehot encoding
            x_enc = dataset.oneHot(x)
            # data splition
            x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=1-train_ratio, random_state=s)
            x_val, x_test, y_val, y_test = train_test_split(x_test, y_test, 
                                                            test_size=test_ratio/(test_ratio+val_ratio), random_state=s)
            x_train_enc, x_test_enc, y_train, y_test = train_test_split(x_enc, y, test_size=1-train_ratio, random_state=s)
            x_val_enc, x_test_enc, y_val, y_test = train_test_split(x_test_enc, y_test, 
                                                                    test_size=test_ratio/(test_ratio+val_ratio), random_state=s)
            
            for a in alpha:
                # oct
                octree = miptree.optimalDecisionTreeClassifier(max_depth=d, min_samples_split=0, alpha=a,
                                                               timelimit=timelimit, output=False)
                tick = time.time()
                octree.fit(x_train, y_train)
                tock = time.time()
                train_time = tock - tick
                train_acc = accuracy_score(y_train, octree.predict(x_train))
                val_acc = accuracy_score(y_val, octree.predict(x_val))
                test_acc = accuracy_score(y_test, octree.predict(x_test))
                row = {'instance':data, 'depth':d, 'alpha':a, 'seed':s, 'train_acc':train_acc, 'val_acc':val_acc,
                       'test_acc':test_acc, 'train_time':train_time}
                res_oct = res_oct.append(row, ignore_index=True)
                res_oct.to_csv('./res/oct.csv', index=False)
                #print(row)
                
                # mfoct
                mfoct = miptree.maxFlowOptimalDecisionTreeClassifier(max_depth=d, alpha=a,
                                                                     timelimit=timelimit, output=False)
                tick = time.time()
                mfoct.fit(x_train_enc, y_train)
                tock = time.time()
                train_time = tock - tick
                train_acc = accuracy_score(y_train, mfoct.predict(x_train_enc))
                val_acc = accuracy_score(y_val, mfoct.predict(x_val_enc))
                test_acc = accuracy_score(y_test, mfoct.predict(x_test_enc))
                row = {'instance':data, 'depth':d, 'alpha':a, 'seed':s, 'train_acc':train_acc, 'val_acc':val_acc,
                       'test_acc':test_acc, 'train_time':train_time}
                res_mfoct = res_mfoct.append(row, ignore_index=True)
                res_mfoct.to_csv('./res/mfoct.csv', index=False)
                #print(row)
            
            # boct
            boct = miptree.binOptimalDecisionTreeClassifier(max_depth=d, min_samples_split=0,
                                                            timelimit=timelimit, output=False)
            tick = time.time()
            boct.fit(x_train, y_train)
            tock = time.time()
            train_time = tock - tick
            train_acc = accuracy_score(y_train, boct.predict(x_train))
            val_acc = accuracy_score(y_val, boct.predict(x_val))
            test_acc = accuracy_score(y_test, boct.predict(x_test))
            row = {'instance':data, 'depth':d, 'seed':s, 'train_acc':train_acc, 'val_acc':val_acc,
                   'test_acc':test_acc, 'train_time':train_time}
            res_boct = res_boct.append(row, ignore_index=True)
            res_boct.to_csv('./res/boct.csv', index=False)
            #print(row)

Training data include 312 instances, 4 features.
Academic license - for non-commercial use only - expires 2021-06-13
Using license file C:\Users\Apocrypse\gurobi.lic
Changed value of parameter timelimit to 600.0
   Prev: inf  Min: 0.0  Max: inf  Default: inf


PermissionError: [Errno 13] Permission denied: './res/oct.csv'