In [3]:
import sys
import os
sys.path.insert(0, os.path.abspath('../code'))
print(len(sys.path))
#from code.learners.EC.deap_extra import GP_predict, get_pset

12


In [2]:
# Code for MOGP 

from deap import gp
from deap import creator, base, tools
from deap.algorithms import varAnd
import numpy as np
import operator
import random
from code.metrics.classification_metrics import *
from code.learners.EC.deap_extra import GP_predict, get_pset
import pandas as pd 


def get_toolbox(pset, t_size, max_depth, X, y):
    toolbox = base.Toolbox()
    toolbox.register("expr", gp.genHalfAndHalf, pset=pset, min_=1, max_=max_depth)
    toolbox.register("individual", tools.initIterate, creator.Individual, toolbox.expr)
    toolbox.register("population", tools.initRepeat, list, toolbox.individual)
    toolbox.register("compile", gp.compile, pset=pset)
    toolbox.register("evaluate", fitness_calculation, toolbox=toolbox, X=X, y=y) # HERE?
    toolbox.register("select", tools.selTournament, tournsize=t_size)
    toolbox.register("mate", gp.cxOnePoint)
    toolbox.register("expr_mut", gp.genHalfAndHalf, min_=0, max_=max_depth)
    toolbox.register("mutate", gp.mutUniform, expr=toolbox.expr_mut, pset=pset)
    toolbox.decorate("mate", gp.staticLimit(key=operator.attrgetter("height"), max_value=max_depth))
    toolbox.decorate("mutate", gp.staticLimit(key=operator.attrgetter("height"), max_value=max_depth))
    return toolbox

def fitness_calculation(individual, toolbox, X, y, w=0.5):
    """
    Fitness function. Compiles GP then tests
    """
    func = toolbox.compile(expr=individual)
    # Calculated the 'ave' function
    ypred = GP_predict(func, X, np.unique(y))
    x = accuracy(y, ypred) # this is 
    return x,

def gp_member_generation(X,y, params, seed):
    random.seed(seed)
    # default fitness function
    fitness_func = fitness_calculation
    # unpack parameters
    max_depth = params["max_depth"]
    pc = params["pc"]
    pm = params["pm"]
    ngen = params["ngen"]
    p_size = params['p_size']
    verbose = params["verbose"]
    t_size = params['t_size']

    if 'bagging' in params:
        fitness_func = params['fitness_function']
        curr_ensemble = params['current_ensemble']

    # Initalise primitives
    
    pset = get_pset(num_args=X.shape[1])

    # Initialise GP settings
    creator.create("FitnessMax", base.Fitness, weights=(1.0,)) # max
    creator.create("Individual", gp.PrimitiveTree, fitness=creator.FitnessMax)

    # Initalise tool box
    toolbox = get_toolbox(pset, t_size, max_depth, X, y)

    # Run GP
    pop = toolbox.population(n=p_size)
    print(type(pop))
    print(type(pop[0]))
    print(pop[0].fitness)
    print(dir(pop[0]))
    halloffame = tools.HallOfFame(1)

    # Stats
    stats_fit = tools.Statistics(lambda ind: ind.fitness.values)
    stats_size = tools.Statistics(len)
    mstats = tools.MultiStatistics(fitness=stats_fit, size=stats_size)
    mstats.register("avg", np.mean)
    mstats.register("std", np.std)
    mstats.register("min", np.min)
    mstats.register("max", np.max)
    logbook = tools.Logbook()
    logbook.header = ['gen', 'nevals'] + (mstats.fields if mstats else [])

    # Evolution process 
    for gen in range(1, ngen + 1):
        
        #if verbose:
            #print(f'Generation {gen}/{ngen}')
        
        

        # Select the next generation individuals
        offspring_a = toolbox.select(pop, len(pop))

        # Vary the pool of individuals
        offspring_a = varAnd(offspring_a, toolbox, pc, pm)

        # Update pop a
        invalid_ind = [ind for ind in offspring_a if not ind.fitness.valid]
        fitnesses = toolbox.map(toolbox.evaluate, invalid_ind)
        for ind, fit in zip(invalid_ind, fitnesses):
            ind.fitness.values = fit

        # Update the hall of fame with the generated individuals
        if halloffame is not None:
            halloffame.update(offspring_a)


        # Replace the current population by the offspring
        pop[:] = offspring_a

        # Append the current generation statistics to the logbook
        record = mstats.compile(pop) if mstats else {}
        logbook.record(gen=gen, nevals=len(invalid_ind), **record)
        if verbose:
            print(logbook.stream)

    df = pd.DataFrame(logbook)
    return [toolbox.compile(ind) for ind in pop], df, [str(ind) for ind in pop], pop[0]

from code.data_processing import get_data
GP_params_1 = {"p_size": 1, "max_depth": 2, "pc": 0.6, "pm": 0.4, "ngen": 2, "verbose": False, "t_size": 7}
X, y = get_data('cleveland')
gpens, df, strs, gptree = gp_member_generation(X, y, GP_params_1, 0)

ModuleNotFoundError: No module named 'code.metrics'; 'code' is not a package

In [None]:
gptree

In [None]:
gptree.__str__()

In [None]:
type(gptree) # also primative 

In [None]:
 gptree.searchSubtree(0) # searched from an index, presumably along 

In [None]:
gptree.root

In [None]:
gptree.root.arity

In [None]:
gptree.end

In [None]:
import copy
import math
import random
import re
import sys
import warnings

from collections import defaultdict, deque
from functools import partial, wraps
from inspect import isclass
from operator import eq, lt

def my_if(a, b, c):
    if a > 0:
        return b
    return c

def custom_get_pset(num_args):
    pset = gp.PrimitiveSet("MAIN", num_args)
    pset.addPrimitive(my_if, 3)
    for n in range(num_args):
        pset.renameArguments(ARG0=f'x{n}')
    pset.addTerminal(3)
    pset.addTerminal(2)
    pset.addTerminal(1)
    return pset

pset = custom_get_pset(3)

def f_generate(pset, min_, max_, type_=None):
    if type_ is None:
        type_ = pset.ret
        
    def condition(height, depth):
        """Expression generation stops when the depth is equal to height
        or when it is randomly determined that a node should be a terminal.
        """
        return depth == height or \
               (depth >= min_ and random.random() < pset.terminalRatio)
    
    expr = []
    height = random.randint(min_, max_)
    stack = [(0, type_)]
    while len(stack) != 0:
        depth, type_ = stack.pop()
        if condition(height, depth):
            try:
                term = random.choice(pset.terminals[type_])
            except IndexError:
                _, _, traceback = sys.exc_info()
                raise IndexError("The gp.generate function tried to add " \
                                  "a terminal of type '%s', but there is " \
                                  "none available." % (type_,)).with_traceback(traceback)
            if isclass(term):
                term = term()
            expr.append(term)
        else:
            try:
                prim = random.choice(pset.primitives[type_])
            except IndexError:
                _, _, traceback = sys.exc_info()
                raise IndexError("The gp.generate function tried to add " \
                                  "a primitive of type '%s', but there is " \
                                  "none available." % (type_,)).with_traceback(traceback)
            expr.append(prim)
            for arg in reversed(prim.args):
                stack.append((depth + 1, arg))
    return expr


    
z = f_generate(pset, 2, 3, None)
print(z)
for zz in z:
    print(zz)

In [None]:
pset

In [None]:
gptree.from_string('sub 2 2 ', pset=pset)

In [None]:
gptree.__class__

In [None]:
type(gptree)

In [None]:
gptree.from_string(None, None)

In [None]:
for x in gptree:
    print(x.name)
    print(x.arity)
    print(x.args)
    print(x.ret)
    print(x.seq)


In [None]:
for x in gptree:
    print(x.format())

In [None]:
gptree.__str__()

In [None]:
from collections import deque
d= deque()
deque.extendleft([1,2,3])

In [None]:
breakpoint()k

In [None]:
import pdb

In [None]:
z = ['|--- feature_9 <= 0.70', '||--- feature_0 <= 58.50', '|||--- feature_10 <= 1.50', '||||--- feature_3 <= 115.00', '|||||--- feature_11 <= 0.50', '||||||--- class: 0.0', '|||||--- feature_11 > 0.50', '||||||--- class: 1.0', '||||--- feature_3 > 115.00', '|||||--- class: 0.0', '|||--- feature_10 > 1.50', '||||--- feature_11 <= 0.50', '|||||--- class: 0.0', '||||--- feature_11 > 0.50', '|||||--- feature_10 <= 2.50', '||||||--- class: 1.0', '|||||--- feature_10 > 2.50', '||||||--- class: 0.0', '||--- feature_0 > 58.50', '|||--- feature_2 <= 1.50', '||||--- class: 1.0', '|||--- feature_2 > 1.50', '||||--- feature_0 <= 65.50', '|||||--- feature_12 <= 6.50', '||||||--- class: 1.0', '|||||--- feature_12 > 6.50', '||||||--- class: 0.0', '||||--- feature_0 > 65.50', '|||||--- class: 0.0', '|--- feature_9 > 0.70', '||--- feature_2 <= 3.50', '|||--- feature_2 <= 1.50', '||||--- feature_10 <= 1.50', '|||||--- class: 0.0', '||||--- feature_10 > 1.50', '|||||--- class: 1.0', '|||--- feature_2 > 1.50', '||||--- feature_0 <= 58.00', '|||||--- class: 0.0', '||||--- feature_0 > 58.00', '|||||--- class: 1.0', '||--- feature_2 > 3.50', '|||--- feature_8 <= 0.50', '||||--- feature_1 <= 0.50', '|||||--- class: 1.0', '||||--- feature_1 > 0.50', '|||||--- feature_7 <= 170.50', '||||||--- class: 1.0', '|||||--- feature_7 > 170.50', '||||||--- class: 0.0', '|||--- feature_8 > 0.50', '||||--- class: 1.0']

In [None]:
for x in z:
    print(x)