In [None]:
from subprocess import Popen, PIPE, DEVNULL, TimeoutExpired
import numpy as np
from functools import lru_cache
from multiprocessing.dummy import Pool as ThreadPool 

In [None]:
# For loading bar
try:
    from ipywidgets import FloatProgress
except Exception as e:
    print("Couldn't import ipywidget. Using deprecated alternative", e)
    from IPython.html.widgets import FloatProgress
from IPython.display import display

In [None]:
possible_configs = {
    'binary_splits': (True, False), # -B
    'confidence_factor': tuple([float(v) for v in np.around(np.arange(0.1, 1, 0.1), 2)]), # -C %1.2f
    'min_num_obj': tuple(range(1, 10)), # -M %d
    'num_folds': tuple(range(2, 10)), # -N 3
    'reduced_error_pruning': (True, False), # -R
    'save_instance_data': (True, False), # -L
    'subtree_raising': (True, False), # -S
    'unpruned': (True, False), # -U,
    'use_laplace': (True, False), # -A
}

In [None]:
def extract_config(config):
    reduced_error_pruning = False
    unpruned = False
    
    
    cmd = []
    if 'reduced_error_pruning' in config and config['reduced_error_pruning'] is True:
        cmd.append('-R')
        reduced_error_pruning = True
    if 'unpruned' in config and config['unpruned'] is True and not reduced_error_pruning:
        cmd.append('-U')
        unpruned = True
    if 'binary_splits' in config and config['binary_splits'] is True:
        cmd.append('-B')
    if 'confidence_factor' in config and not reduced_error_pruning and not unpruned:
        cmd += ['-C', '{:1.2f}'.format(config['confidence_factor'])]
    if 'min_num_obj' in config:
        cmd += ['-M', str(config['min_num_obj'])]
    if 'num_folds' in config and reduced_error_pruning:
        cmd += ['-N', str(config['num_folds'])]
    if 'save_instance_data' in config and config['save_instance_data'] is True:
        cmd.append('-L')
    if 'subtree_raising' in config and config['subtree_raising'] is False and not unpruned:
        cmd.append('-S')
    if 'use_laplace' in config and config['use_laplace'] is True:
        cmd.append('-A')
        
    return cmd
    

In [None]:
def filename_from_config(prefix, config, extension):
    if type(extension) is str and len(extension) > 0 and extension[0] == '.':
        extension = extension[1:]
    
    basic_name = '_'.join(extract_config(config)).replace('-', '').lower()
    return '{}{}.{}'.format(prefix, basic_name, extension)

In [None]:
def run_one_experiment(config):
    command_line = 'java -Xmx1000M -cp /usr/share/java/weka.jar weka.classifiers.trees.J48 -t auto_imports_85_data.arff -c first -x 10'

    cmd_array = command_line.split()
    cmd_array += extract_config(config)
    
    proc = Popen(cmd_array, stderr=PIPE, stdout=PIPE)
    try:
        outs, errs = proc.communicate(timeout=30)
        if len(outs) > 0:
            return outs.decode('utf8')
        else:
            error = errs.decode('utf8').split('\n')[1]
            raise ValueError(error)
        return res
    except TimeoutExpired:
        print('Process Timeout !', filename_from_config(config))
        proc.kill()
        raise ValueError('timeout')


In [None]:
@lru_cache()
def combinations(a, b):
    if a is None:
        return b
    res = []
    for _a in a:
        for _b in b:
            res.append((_a, _b))
    return tuple(res)

@lru_cache()
def unwrap(elem):
    a, b = elem
    if type(a) is not tuple and type(b) is not tuple:
        return (a,b)
    if type(a) is tuple:
        return unwrap(a) + (b,)

def unwrap_all(elems):
    return ([unwrap(elem) for elem in elems])

In [None]:
class HashableConfig:
    def __init__(self, idx, config):
        self.idx = idx
        self.config = config
    
    def __hash__(self):
        return hash(' '.join(extract_config(self.config)))
    
    def __repr__(self):
        return ' '.join(extract_config(self.config))

    def __str__(self):
        return self.__repr__()
    
    def __eq__(self, other):
        return hash(self) == hash(other)

In [None]:
def get_all_unique_configs():
    combined = None
    for possible_values in possible_configs.values():
        combined = combinations(combined, possible_values)
    
    all_configs = [dict(zip(possible_configs.keys(), values)) for values in unwrap_all(combined)]

    hashables = [HashableConfig(idx, conf) for idx, conf in enumerate(all_configs)]
    uniques_hashable = set(hashables)
    
    unique_configs = [all_configs[hconf.idx] for hconf in uniques_hashable]
    return unique_configs

In [None]:
def run_all():
    
    thread_pool = ThreadPool(4) 
    
    process_loading_bar = FloatProgress(min=0, max=len(all_configs))
    display(process_loading_bar)
    
    all_configs = get_all_unique_configs()
    
    print('Running {} trees... Please wait'.format(len(all_configs)))
    
    def subfunc(param):
        idx, config = param
        try:
            experiment = run_one_experiment(config)
            with open(filename_from_config('output/results_', config, 'out'), 'w') as file:
                file.write(experiment)
        except ValueError as e:
            print('Error:', e)
        finally:
            process_loading_bar.value += 1
    
    thread_pool.map(subfunc, enumerate(all_configs))


In [None]:
run_all()