Design idea:

A graph can represent the pipeline. 

1.  Build the pipeline naively
2.  Rearrange the order of the pipeline by the distance to `root` node

In [1]:
import numpy as np
import pandas as pd

from hinge import Hinge, error_on_split
from interaction import Interaction

from sklearn import datasets
from sklearn import preprocessing
from sklearn.pipeline import Pipeline
from sklearn import preprocessing

# using svm as per scikit-feature repo
from sklearn.decomposition import FactorAnalysis # we shall use factor analysis to fix the size of final modelling ds.
from sklearn.svm import LinearSVC
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score

# for sampling
import random # use random.choice? and random.sample for interactions

# use networkx to keep track of changes to our data, so we can recreate things...
import networkx as nx
import matplotlib.pyplot as plt
from graph_utils import *
import itertools
%matplotlib inline

In [2]:
from sklearn.datasets import make_classification

problem_setup = {
    'n_samples': 1000, 
    'n_features': 20, 
    'n_informative': 8, 
    'n_redundant': 8, 
    'n_classes': 3, 
    'random_state': 0    
}

X, y = make_classification(**problem_setup)
X = X.astype(float)
n_samples, n_features = X.shape    # number of samples and number of features

In [3]:
import pandas as pd # for making sure and exploring created datasets...
X_df = pd.DataFrame(X)
base_names = ["base_{}".format(x) for x in range(n_features)]
X_df.columns = base_names[:]
X_df.head()

Unnamed: 0,base_0,base_1,base_2,base_3,base_4,base_5,base_6,base_7,base_8,base_9,base_10,base_11,base_12,base_13,base_14,base_15,base_16,base_17,base_18,base_19
0,-1.561416,-0.302123,-1.626526,-0.808229,0.415516,1.60608,1.471883,-1.677559,5.249528,1.048403,-2.459287,-2.809152,-1.315362,-2.930697,0.120546,-0.759469,-2.690498,1.952268,6.930261,0.428036
1,-0.745867,-1.085664,-1.424702,1.785155,0.155847,-2.346231,-0.789581,2.918363,0.139034,-1.310294,0.02839,3.175972,-1.645095,1.452931,1.912759,-2.638561,-0.175355,1.094468,-3.710453,-1.011041
2,2.209442,-0.210728,0.237275,0.908409,0.03954,1.349943,2.685406,-3.007716,-3.675619,1.099558,-1.161589,-2.289721,2.003953,0.206058,-3.070732,1.392493,0.658975,-2.729099,1.860675,-1.611407
3,-0.390274,-1.501618,0.562233,-0.60103,-5.216131,-0.682441,-4.833625,10.759309,-2.81049,0.824669,4.931424,6.417655,-0.18994,1.523022,2.051892,-7.072514,2.589547,-2.276084,-10.504724,-0.750008
4,-1.523687,-2.82278,-0.941431,-3.189133,0.479385,1.655941,-0.655178,5.341404,4.316366,1.35317,0.343184,2.012068,0.96228,-1.108046,4.737343,-2.803268,1.014875,3.380321,1.554379,1.937787


In [4]:
def eval_pipeline(additional_feats=[], X=X_df, y=y, verbose=True):
    #print(additional_feats)
    
    pipeline = additional_feats[:]
    pipeline.append(('factor analysis', FactorAnalysis(20)))
    pipeline.append(('linear svm', LinearSVC()))
    model = Pipeline(pipeline[:])

    # split data into 10 folds
    kfold = KFold(n_splits=10, shuffle=True)
    results = cross_val_score(model, X, y, cv=kfold)
    if verbose:
        print("Accuracy: {}".format(results.mean()))
    return results.mean()



In [5]:
# baseline solution - vanilla pipeline
eval_pipeline(X=X_df)

Accuracy: 0.68


0.68000000000000005

Feature Search
=======

We have 4 possible situations:

1. Grow - split : assumption is we always grow 2
2. Grow - interaction
3. Remove - split : assumption is we always destroy both
4. Remove - interaction

Proposal Distributions
------------------------

Assume each action has equal probability and is uniform.

Then each proposal is simply:

$$P(x^*|x^{(t)}) = \frac{1}{\text{num possible actions}}$$


------

In this setting there is no need for RJMCMC (yet) as the transformations are parameter free.

What this means is we can do a straight MH

In [6]:
"""
Graph details...

hinge parents will have attribute name hinge_children with value equal to the node name of the children
interaction parents will have attribute name interaction_children with value equal to list of all nodes that are children
"""

# generate the base graph, where we have a the root node and all the base features...
G=nx.DiGraph()
G.add_node("root")

for col in base_names:
    # we can add node attributes, eg
    #G.add_node(col, attribute='here')
    G.add_node(col)
    G.add_edge("root", col)

# add attribute to node base_0
#G = set_graph_node_attributes(G, 'base_0', {'hinge_children': ['a', 'b']})




In [7]:
graph_to_dict(G)

{'edges': [['root', 'base_8', {}],
  ['root', 'base_10', {}],
  ['root', 'base_2', {}],
  ['root', 'base_18', {}],
  ['root', 'base_19', {}],
  ['root', 'base_6', {}],
  ['root', 'base_11', {}],
  ['root', 'base_4', {}],
  ['root', 'base_15', {}],
  ['root', 'base_5', {}],
  ['root', 'base_3', {}],
  ['root', 'base_14', {}],
  ['root', 'base_1', {}],
  ['root', 'base_9', {}],
  ['root', 'base_7', {}],
  ['root', 'base_17', {}],
  ['root', 'base_16', {}],
  ['root', 'base_0', {}],
  ['root', 'base_12', {}],
  ['root', 'base_13', {}]],
 'nodes': [['base_8', {}],
  ['root', {}],
  ['base_2', {}],
  ['base_18', {}],
  ['base_19', {}],
  ['base_9', {}],
  ['base_11', {}],
  ['base_1', {}],
  ['base_15', {}],
  ['base_14', {}],
  ['base_3', {}],
  ['base_6', {}],
  ['base_5', {}],
  ['base_7', {}],
  ['base_4', {}],
  ['base_16', {}],
  ['base_17', {}],
  ['base_0', {}],
  ['base_12', {}],
  ['base_13', {}],
  ['base_10', {}]]}

In [8]:
def get_all_nodes_by_attribute(G, attri_key):
    g_nodes_attr = graph_to_dict(G)['nodes']
    g_nodes = [node for node, attr in g_nodes_attr if attri_key in attr.keys()]
    return g_nodes

def get_graph_attributes_by_key(G, attri_key):
    g_nodes_attr = graph_to_dict(G)['nodes']
    attr_values = [attri.get(attri_key, None) for _, attri in g_nodes_attr]
    attr_values = [x for x in attr_values if x is not None]
    return attr_values

def get_all_node_attributes(G, node):
    return G[node]

In [9]:
def set_graph_node_attributes(G1, node, attri_dict):
    """
    G is a networkx graph...
    """
    G = G1.copy()
    
    #dict_items = attri_dict.items()    
    for attri_name, attri_value in attri_dict.items():
        nx.set_node_attributes(G, attri_name, {node: attri_value})
    return G.copy()

In [10]:
"""
Migrate this to another module in the future

Details
-------

### hinge

**parent node**

Will have details of the children in "hinge_children". 
That is...

```
Hinge(mask=parent)
```

A parent node will create two children

pos_name = "{}_poshinge".format(remove_node_name)
neg_name = "{}_neghinge".format(remove_node_name)

### interaction

**child node**

Will have details of the parent in "interaction_parent"
That is...

```
Interaction(['interaction1', 'interaction2'])
```

the node created will be the name

"{}_{}".format(inter1, inter2)

"""

def get_remove_hinge_candidates(G):
    # get all nodes with attribute "hinge_children"
    # where their totol children is 2 or less.    
    hinge_parents = get_all_nodes_by_attribute(G, 'hinge_children')
    
    # now check that all hinge parents have 2 or less children...
    hinge_parents = [parent for parent in hinge_parents if len(nx.descendants(G, parent)) <= 2]
    return hinge_parents

def get_grow_hinge_candidates(G):
    # these are all nodes who do not have a hinge children attribute. 
    hinge_parents = get_all_nodes_by_attribute(G, 'hinge_children')
    hinge_candidates = [node for node in G.nodes() if node not in hinge_parents and node != "root"]
    return hinge_candidates

def get_remove_interact_candidates(G):
    # get all nodes with attribute "interaction_parent"
    # where they have no children
    interaction_nodes = get_all_nodes_by_attribute(G, 'interaction_parent')
    # now check that all hinge parents have 2 or less children...
    nodes = [node for node in interaction_nodes if len(nx.descendants(G, node)) == 0]
    return nodes
    
def get_grow_interact_candidates(G):
    # get all pairwise nodes which are not currently a pair?
    all_nodes = G.nodes()
    all_nodes = [x for x in all_nodes if x != "root"]
    
    # generate all pairs...
    pairwise = itertools.combinations(all_nodes, 2)
    itself = zip(*[all_nodes, all_nodes])
    all_pairs = list(pairwise) + list(itself)
    
    seen_pairs = get_graph_attributes_by_key(G, 'hinge_children')
    seen_pairs = [set([x,y]) for x,y in seen_pairs]
    # remove "seen" pairs.
    filter_pairs = [set([x,y]) for x,y in all_pairs if set([x,y]) not in seen_pairs]
    return filter_pairs

In [11]:
# begin trying stuff
# start simulations!
def spawn_possible_actions(G):
    actions = []
    if get_remove_interact_candidates(G):
        actions.append(("remove", "interact"))
    if get_remove_hinge_candidates(G):
        actions.append(("remove", "hinge"))
    if get_grow_hinge_candidates(G):
        actions.append(("grow", "hinge"))
    if get_grow_interact_candidates(G):
        actions.append(("grow", "interact"))
    return actions

spawn_possible_actions(G)

[('grow', 'hinge'), ('grow', 'interact')]

In [12]:
def spawn_action_candidates(G, action, transform):
    if action == 'grow':
        if transform == 'hinge':
            return get_grow_hinge_candidates(G)
        elif transform == 'interact':
            return get_grow_interact_candidates(G)
        else:
            raise Exception("invalid transform function")
    if action == 'remove':
        if transform == 'hinge':
            return get_remove_hinge_candidates(G)
        elif transform == 'interact':
            return get_remove_interact_candidates(G)
        else:
            raise Exception("invalid transform function")
    else:
        raise Exception("invalid grow function")

In [13]:
def spawn_new_model(G):
    # to spawn a model, spawn possible actions:    
    all_actions = spawn_possible_actions(G)
    action, transform = random.choice(all_actions)
    # this just returns the function that is randomly chosen.
    candidates = spawn_action_candidates(G, action, transform)
    selected_node = random.choice(candidates)
    return {'action': action, 
            'transform': transform,
            'selected_node': selected_node}
    

In [14]:
def spawn_new_pipeline(G):
    """
    Take in a networkx graph as a dictionary object
    
    We will convert a graph to pipeline objects,
    and then reorder based on distance to root node. 
    
    Usage for shortest path: 
    print(nx.shortest_path_length(G,source=0,target=4))
    
    hopefully this works...
    """
    # just randomly add...pipeline transformations   
    g_nodes = graph_to_dict(G)['nodes']
    
    # add all hinge nodes...
    hinge_info = [("hinge_{}".format(node), Hinge(mask=node), node,
                  nx.shortest_path_length(G,source="root",target=node)) 
                  for node, attr in g_nodes if 'hinge_children' in attr.keys()]
    
    # add all interaction nodes...
    # length + 1 as interaction thing is at parent level? when we consider things
    # like removal
    interact_info = [("interact_{}".format('_'.join(attr['interaction_parent'])), 
                      Interaction(attr['interaction_parent']), 
                      node, 
                      nx.shortest_path_length(G,source="root",target=node)+1)
                     for node, attr in g_nodes if 'interaction_parent' in attr.keys()]
    
    
    # join all together
    pipeline = hinge_info + interact_info
    
    # sort the pipeline by location of node to root...
    pipeline.sort(key=lambda x: x[3])
    #print(pipeline)
    # remove last element in pipeline..
    pipeline = [(x, y) for x,y,_,_ in pipeline]
    return pipeline

In [15]:
### spawn one pass...
new_model = spawn_new_model(G)
print("Chosen action/transform: {} {}".format(new_model['action'], new_model['transform']))
print("selected node(s): {}".format(new_model['selected_node']))

Chosen action/transform: grow hinge
selected node(s): base_8


In [16]:
def _proposal(G, action, transform):
    """
    G is current graph of model.
    
    return proportional probabilities related to proposal distributions,
    both the current proposal and the "proposed" proposal
    
    x*|xt is proposal prime
    xt|x* is proposal cur
    
    available transforms:
    if get_remove_interact_candidates(G):
    if get_remove_hinge_candidates(G):
    if get_grow_hinge_candidates(G):
    if get_grow_interact_candidates(G):

    use this: simulate_new_graph

    """
    if action == "remove" and transform == "interact":
        all_pos_parents = get_remove_interact_candidates(G)
        proposal_prime = 1.0/len(all_pos_parents)
        
        # simulate removing a pair...
        all_interact_nodes = graph_to_dict(G.copy())['nodes']
        all_interact_nodes = [node for node, attr in all_interact_nodes if 'interaction_parent' in attr.keys()]
        
        G_prime = simulate_new_graph(G.copy(), {
            'action': action, 'transform': transform, 'selected_node': all_interact_nodes[0]
        })
        
        proposal_cur = 1.0/len(get_grow_interact_candidates(G_prime))
        
    if action == "remove" and transform == "hinge":
        all_pos_parents = get_remove_hinge_candidates(G)
        proposal_prime = 1.0/len(all_pos_parents)
        
        # simulate removing a pair
        G_prime = G.copy()
        remove_node_name = all_pos_parents[0]
        pos_name = "{}_poshinge".format(remove_node_name)
        neg_name = "{}_neghinge".format(remove_node_name)
        G_prime.remove_node(pos_name)
        G_prime.remove_node(neg_name)
        proposal_cur = 1.0/len(get_grow_hinge_candidates(G_prime))
        
                
    if action == "grow" and transform == "hinge":
        all_pos_candidates = get_grow_hinge_candidates(G)
        proposal_prime = 1.0/len(all_pos_candidates)
        
        # simulate adding a random proposal
        G_prime = G.copy()
        add_node_name = all_pos_candidates[0]
        pos_name = "{}_poshinge".format(add_node_name)
        neg_name = "{}_neghinge".format(add_node_name)
        
        # add...
        G_prime.add_node(pos_name, hinge='', hinge_parent=add_node_name)
        G_prime.add_node(neg_name, hinge='', hinge_parent=add_node_name)
        G_prime.add_edge(add_node_name, pos_name)
        G_prime.add_edge(add_node_name, neg_name)
        
        proposal_cur = 1.0/len(get_remove_hinge_candidates(G_prime))
        
    if action == "grow" and transform == "interact":
        all_pos_candidates = get_grow_interact_candidates(G)
        proposal_prime = 1.0/len(all_pos_candidates)

        # simulate adding a random proposal
        G_prime = G.copy()
        
        # interaction
        G_prime = G.copy()
        add_node_name = list(all_pos_candidates[0])
        if len(add_node_name) == 1:
            add_node_name = add_node_name[:] + add_node_name[:]
        interact_name = "{}_{}".format(add_node_name[0], add_node_name[1])
        
        # add...
        G_prime.add_node(interact_name, interaction=[add_node_name[0], add_node_name[1]])
        G_prime.add_edge(add_node_name[0], interact_name)
        G_prime.add_edge(add_node_name[1], interact_name)
        
        proposal_cur = 1.0/len(get_remove_interact_candidates(G_prime))
    
    # add fudge scaling factor
    proposal_cur = min(proposal_cur, 0.25)
    proposal_prime = min(proposal_prime, 0.25)
    
    return proposal_cur, proposal_prime
    
def proposal(G, action, transform):
    try: 
        proposal_cur, proposal_prime = _proposal(G, action, transform)
        return proposal_cur, proposal_prime
    except ZeroDivisionError as err:
        print("Warning: Move appears to be invalid")
        return 1.0, 1.0

In [17]:
def _acceptance(proba_cur, proba_prime, proposal_cur, proposal_prime):
    accept = (proba_prime/proba_cur) * (proposal_cur/proposal_prime)
    return min(1.0, accept)

def propose_iter(G_dict):
    """
    Takes in list of graphs as dict objects    
    """
    last_model = G_dict.copy()
    #print(last_model )
    curr_G = dict_to_graph(last_model)
    propose_model = spawn_new_model(dict_to_graph(last_model)) # provide the action/transform pair
    proposed_graph = simulate_new_graph(curr_G, propose_model)
    curr_pipeline = spawn_new_pipeline(curr_G) # maybe rename to spawn_pipeline
    propose_pipeline = spawn_new_pipeline(dict_to_graph(proposed_graph))
    
    #print(propose_pipeline)
    #print(curr_pipeline)
    proba_cur = eval_pipeline(additional_feats=curr_pipeline, X=X_df, verbose=False)
    #print(propose_pipeline)
    proba_prime = eval_pipeline(additional_feats=propose_pipeline, X=X_df, verbose=False)
    
    proposal_cur, proposal_prime = proposal(curr_G, propose_model['action'], propose_model['transform'])
    
    result = {'acceptance_proba': _acceptance(proba_cur, proba_prime, proposal_cur, proposal_prime)}
    
    result_new = result.copy()
    result_new.update(propose_model)
    result['proposed_graph'] = curr_G.copy
    return result_new

# if proposal is accepted...update G and transform list here...

In [18]:
def simulate_new_graph(G_new, proposed):
    """
    G_new is a graph
    proposed is a dict like this: {'action': 'grow', 'selected_node': 'base_12', 'transform': 'hinge'}
    
    return a graph as a dict obj
    
    """
    #G_new = dict_to_graph(G_list[-1].copy())    
    
    # now in G, we add or remove as needed...
    if proposed['action'] == 'grow':
        if proposed['transform'] == 'hinge':
            #print(proposed['selected_node'])
            pos_name = "{}_poshinge".format(proposed['selected_node'])
            neg_name = "{}_neghinge".format(proposed['selected_node'])
            G_new.add_node(pos_name)
            G_new.add_node(neg_name)
            G_new.add_edge(proposed['selected_node'], pos_name)
            G_new.add_edge(proposed['selected_node'], neg_name)
            
            # add attributes, which is to the parent node.
            attributes = get_all_node_attributes(G_new, proposed['selected_node']).copy()
            attributes['hinge_children'] = [pos_name, neg_name]
            G_new = set_graph_node_attributes(G_new, proposed['selected_node'], attributes)
            
        elif proposed['transform'] == 'interact':
            inter_nodes = list(proposed['selected_node'])
            #print(inter_nodes)
            if len(inter_nodes) == 1:
                inter_nodes = inter_nodes[:] + inter_nodes[:]
            interact_name = "{}_{}".format(inter_nodes[0], inter_nodes[1])
            G_new.add_node(interact_name)
            G_new.add_edge(inter_nodes[0], interact_name)
            G_new.add_edge(inter_nodes[1], interact_name)
            
            # add attributes, which is on the child node
            attributes = get_all_node_attributes(G_new, interact_name).copy()
            attributes['interaction_parent'] = [inter_nodes[0], inter_nodes[1]]
            G_new = set_graph_node_attributes(G_new, interact_name, attributes)
        else:
            raise Exception("action/transform pair: {} {} appears to be invalid".format(proposed['action'], proposed['transform']))
    elif proposed['action'] == 'remove':        
        if proposed['transform'] == 'hinge':
            #print("\tremove all child nodes of node {}".format(proposed['selected_node']))            
            all_children = nx.descendants(G, proposed['selected_node'])
            for node in all_children:
                G_new.remove_node(node)
        elif proposed['transform'] == 'interact':
            #print("\tremove interact node {}".format(proposed['selected_node']))
            G_new.remove_node(proposed['selected_node'])
        else:
            raise Exception("action/transform pair: {} {} appears to be invalid".format(proposed['action'], proposed['transform']))    
    else:
        raise Exception("action/transform pair: {} {} appears to be invalid".format(proposed['action'], proposed['transform']))
    return graph_to_dict(G_new)

In [19]:
def create_iter(G_list, proposed):
    G_last = dict_to_graph(G_list[-1].copy())    
    G_new_dict = simulate_new_graph(G_last, proposed)
    
    # add things...
    G_new_list = G_list[:]
    G_new_list.append(G_new_dict)
    return G_new_list[:]

In [20]:
def mh_iter(G_list):
    u = np.random.uniform()    
    proposed = propose_iter(G_list[-1])
    
    # make u =0 
    if u < proposed['acceptance_proba']:
        #print(proposed)
        return create_iter(G_list, proposed)
    else:
        print("repeat previous\n\t")
        #print(proposed)
        # repeat the previous one...
        return G_list[:]


In [21]:
# generate the base graph, where we have a the root node and all the base features...
G=nx.DiGraph()
G.add_node("root")

for col in base_names:
    # we can add node attributes, eg
    #G.add_node(col, attribute='here')
    G.add_node(col)
    G.add_edge("root", col)

proposed = spawn_new_model(G)
proposed_graph = simulate_new_graph(G, proposed)

In [None]:
# generate the base graph, where we have a the root node and all the base features...
G=nx.DiGraph()
G.add_node("root")

for col in base_names:
    # we can add node attributes, eg
    #G.add_node(col, attribute='here')
    G.add_node(col)
    G.add_edge("root", col)

In [None]:
iters = 1000

G_list = [graph_to_dict(G)]

for i in range(iters):
    print(i)
    try:
        G_list_temp = mh_iter(G_list[:])
        G_list = G_list_temp[:]
    except:
        # stuff still buggy but generally works enough of the time
        print("something wrong happened in mh_iter")
        pass
    
    if i % 10 == 0:
        pipeline = G_list[-1]
        check_pipeline = spawn_new_pipeline(dict_to_graph(pipeline))
        human_readable_pipeline = [x for x, y in check_pipeline]
        #print("Evaluating pipeline: {}".format(spawn_pipeline))
        perf = eval_pipeline(additional_feats=check_pipeline, X=X_df, verbose=False)
        print("Performance at pipeline: {} is \n\t{}\n".format(human_readable_pipeline, perf))

0
Performance at pipeline: ['hinge_base_9'] is 
	0.6759999999999999

1
2
3
4
5
6
7
8
something wrong happened in mh_iter
9
10
Performance at pipeline: ['hinge_base_6', 'hinge_base_17', 'hinge_base_10', 'hinge_base_9', 'hinge_base_17_poshinge'] is 
	0.682

11
repeat previous
	
12
13
14
something wrong happened in mh_iter
15
16
something wrong happened in mh_iter
17
18
something wrong happened in mh_iter
19
something wrong happened in mh_iter
20
Performance at pipeline: ['hinge_base_11', 'hinge_base_17', 'hinge_base_7', 'hinge_base_6', 'hinge_base_10', 'hinge_base_9', 'hinge_base_17_poshinge', 'interact_base_5_base_15', 'interact_base_0_base_11_poshinge'] is 
	0.71

21
repeat previous
	
22
23
repeat previous
	
24
something wrong happened in mh_iter
25
repeat previous
	
26
27
28
29
repeat previous
	
30
Performance at pipeline: ['hinge_base_10', 'hinge_base_7', 'hinge_base_11', 'hinge_base_6', 'hinge_base_17', 'hinge_base_9', 'hinge_base_15', 'hinge_base_5', 'hinge_base_9_neghinge', 'hinge

something wrong happened in mh_iter
92
93
94
something wrong happened in mh_iter
95
96
97
98
something wrong happened in mh_iter
99
100
something wrong happened in mh_iter
Performance at pipeline: ['hinge_base_10', 'hinge_base_14', 'hinge_base_9', 'hinge_base_16', 'hinge_base_2', 'hinge_base_0', 'hinge_base_13', 'hinge_base_17', 'hinge_base_7', 'hinge_base_4', 'hinge_base_3', 'hinge_base_11', 'hinge_base_6', 'hinge_base_15', 'hinge_base_1', 'hinge_base_5', 'hinge_base_10_poshinge', 'hinge_base_17_neghinge', 'hinge_base_9_poshinge', 'hinge_base_16_poshinge', 'hinge_base_15_neghinge', 'hinge_base_9_neghinge', 'hinge_base_6_poshinge', 'hinge_base_11_poshinge', 'hinge_base_1_neghinge', 'hinge_base_2_neghinge', 'hinge_base_17_poshinge', 'hinge_base_5_poshinge', 'hinge_base_16_poshinge_poshinge', 'interact_base_1_base_17_poshinge_neghinge', 'interact_base_2_base_2_poshinge', 'interact_base_5_base_15', 'interact_base_0_base_11_poshinge', 'interact_base_15_neghinge_poshinge_base_15', 'interact

132
something wrong happened in mh_iter
133
something wrong happened in mh_iter
134
something wrong happened in mh_iter
135
something wrong happened in mh_iter
136
something wrong happened in mh_iter
137
something wrong happened in mh_iter
138
something wrong happened in mh_iter
139
repeat previous
	
140
Performance at pipeline: ['hinge_base_10', 'hinge_base_14', 'hinge_base_18', 'hinge_base_2', 'hinge_base_16', 'hinge_base_0', 'hinge_base_13', 'hinge_base_17', 'hinge_base_7', 'hinge_base_4', 'hinge_base_3', 'hinge_base_6', 'hinge_base_9', 'hinge_base_15', 'hinge_base_1', 'hinge_base_5', 'hinge_base_11', 'hinge_base_17_neghinge', 'hinge_base_9_poshinge', 'hinge_base_16_poshinge', 'hinge_base_15_neghinge', 'hinge_base_18_poshinge', 'hinge_base_9_neghinge', 'hinge_base_6_poshinge', 'hinge_base_11_poshinge', 'hinge_base_4_poshinge', 'hinge_base_1_neghinge', 'hinge_base_13_neghinge', 'hinge_base_2_neghinge', 'hinge_base_10_poshinge', 'hinge_base_4_neghinge', 'hinge_base_17_poshinge', 'hing

something wrong happened in mh_iter
172
something wrong happened in mh_iter
173
something wrong happened in mh_iter
174
something wrong happened in mh_iter
175
something wrong happened in mh_iter
176
something wrong happened in mh_iter
177
something wrong happened in mh_iter
178
something wrong happened in mh_iter
179
something wrong happened in mh_iter
180
Performance at pipeline: ['hinge_base_10', 'hinge_base_14', 'hinge_base_9', 'hinge_base_18', 'hinge_base_2', 'hinge_base_16', 'hinge_base_0', 'hinge_base_13', 'hinge_base_17', 'hinge_base_7', 'hinge_base_4', 'hinge_base_3', 'hinge_base_6', 'hinge_base_15', 'hinge_base_1', 'hinge_base_5', 'hinge_base_11', 'hinge_base_4_poshinge', 'hinge_base_17_neghinge', 'hinge_base_9_poshinge', 'hinge_base_16_poshinge', 'hinge_base_15_neghinge', 'hinge_base_18_poshinge', 'hinge_base_9_neghinge', 'hinge_base_6_poshinge', 'hinge_base_11_poshinge', 'hinge_base_1_neghinge', 'hinge_base_13_neghinge', 'hinge_base_2_neghinge', 'hinge_base_10_poshinge', 'h

In [None]:
# evaluate performance so far...

for pipeline in G_list:   
    # spawn_new_pipeline
    check_pipeline = spawn_new_pipeline(dict_to_graph(pipeline))
    human_readable_pipeline = [x for x, y in check_pipeline]
    #print("Evaluating pipeline: {}".format(spawn_pipeline))
    perf = eval_pipeline(additional_feats=check_pipeline, X=X_df, verbose=False)
    print("Performance at pipeline: {} is \n\t{}\n".format(human_readable_pipeline, perf))