In [1]:
import os
os.environ["XLA_PYTHON_CLIENT_MEM_FRACTION"] = ".20"

In [2]:
import pandas as pd
from model import Brain
from submodels import factories
import matplotlib.pyplot as plt
import pandas as pd
from jf.db import DB
from lib.plot import plot_2d_positions_colors
from lib.sde.grn2 import GRNMain2
from lib.sde.mutate import mutate_grn2
from lib.ga.utils import weighted_selection
from lib.ga.objective import Objective
from jf.profiler import Profiler
from jf.autocompute.jf import O

In [3]:
from itertools import accumulate
import numpy as np
from collections import defaultdict

In [4]:
HISTORY = defaultdict(dict)
"""
Format 
generation : dict(
    transition
    solution
    fitness
)
"""

'\nFormat \ngeneration : dict(\n    transition\n    solution\n    fitness\n)\n'

In [5]:
HALL_OF_FAME = []

In [6]:
REF = pd.read_csv("output/results/setup_basic/export/ref_basic2.csv")  # ref is a mean

In [7]:
def individual_generator():
    return Solution(GRNMain2(5, 0, 5))

In [8]:
if False:
    grn = GRNMain2(5, 0, 5)
    ccls = factories["grn2"](grn=grn)

    bb = Brain(time_step=0.5, verbose=False, start_population=4, max_pop_size=1e3,
            cell_cls=ccls.generate, end_time=60, start_time=50, silent=False)
    bb.run()

# cell = bb.population[0]
# cell.cell_program.print_gene_info(2)

# cell.cell_program.grn.print_trees()

In [9]:
def r():
    return np.random.uniform(0, 1)

class Solution:
    def __init__(self, grn):
        self.grn = grn
        
    def copy(self):
        return Solution(self.grn.copy())
        
    def mutate(self):
        mutate_grn2(self.grn)

In [10]:
individual_generator().grn

>> G_0: init: 1.77; noise: 6.02; b: 3.48; m: 7.60; expr: 9.62; deg: 3.14; thr: 8.43; theta: 5.42; tree : 3
>> G_1: init: 2.32; noise: 6.11; b: 5.82; m: 4.50; expr: 7.62; deg: 4.45; thr: 5.98; theta: 5.01; tree : 1
>> G_2: init: 1.97; noise: 9.27; b: 4.27; m: 6.48; expr: 7.88; deg: 6.08; thr: 7.81; theta: 0.75; tree : ((2 OR (4 AND 0)) AND 3)
>> G_3: init: 0.97; noise: 6.27; b: 3.16; m: 5.74; expr: 7.20; deg: 7.76; thr: 9.64; theta: 6.07; tree : 3
>> G_4: init: 0.76; noise: 3.41; b: 6.66; m: 5.09; expr: 0.22; deg: 6.58; thr: 9.33; theta: 4.35; tree : 1

In [11]:
def shrink_and_align_stats(stats, ref, max_step=None, min_step=None):
    min_data_step = max(min(stats.time), min(ref.time))
    if min_step is None:
        min_step = min_data_step
    else:
        min_step = max(min_step, min_data_step)

    if max_step is None:
        max_step = max(ref.time)
    else:
        max_step = min(max_step, max(ref.time))
    
    new_stats = stats[(stats.time >= min_step) & (stats.time <= max_step)].set_index("time")
    new_ref = ref[(ref.time >= min_step) & (ref.time <= max_step)].set_index("time")
    
    return new_stats, new_ref

In [12]:
def preprocess_progenitor_size(stats, ref):
    colname_ref = "progenitor_pop_size"
    colname_stats = "progenitor_pop_size"
    
    min_step = min(ref.index)  # ref.index is the time
    stats[colname_stats] = stats[colname_stats] / stats[colname_stats].get(min_step)
    ref[colname_ref] = ref[colname_ref] / ref[colname_ref].get(min_step)
    x = [ref[colname_ref].get(t, 0) for t in ref.index]
    y = [stats[colname_stats].get(t, 0) for t in ref.index]
    return x, y

def preprocess_whole_size(stats, ref):
    colname_ref = "whole_pop_size"
    colname_stats = "whole_pop_size"
    
    min_step = min(ref.index)  # ref.index is the time
    stats[colname_stats] = stats[colname_stats] / stats[colname_stats].get(min_step)
    ref[colname_ref] = ref[colname_ref] / ref[colname_ref].get(min_step)
    x = [ref[colname_ref].get(t, 0) for t in ref.index]
    y = [stats[colname_stats].get(t, 0) for t in ref.index]
    return x, y

In [13]:
def score_coefficient_variation(x, y):
    return sum([(x_ - y_)**2 / x_ for x_, y_ in zip(x, y)])

In [14]:
def score_progenitor_size(stats, ref, max_step=None, min_step=None):
    stats, ref = shrink_and_align_stats(stats, ref, max_step, min_step)
    x, y = preprocess_progenitor_size(stats, ref)
    return score_coefficient_variation(x, y)

def score_whole_size(stats, ref, max_step=None, min_step=None):
    stats, ref = shrink_and_align_stats(stats, ref, max_step, min_step)
    x, y = preprocess_progenitor_size(stats, ref)
    return score_coefficient_variation(x, y)

def score_both_size(stats, ref, max_step=None, min_step=None):
    return score_progenitor_size(stats, ref, max_step, min_step) \
            + score_whole_size(stats, ref, max_step, min_step)

In [15]:
def run_grn(prun, grn):
    ccls = factories["grn2"](grn=grn)
    bb = Brain(time_step=0.5, verbose=False, start_population=4, max_pop_size=5e2,
            cell_cls=ccls, end_time=prun.end_time, start_time=50, silent=True)
    bb.run()
    return bb

In [16]:
def get_bb(prun, grn):
    ccls = factories["grn2"](grn=grn)
    bb = Brain(time_step=0.5, verbose=False, start_population=4, max_pop_size=5e2,
            cell_cls=ccls, end_time=prun.end_time, start_time=50, silent=True)
    return bb

In [17]:
def fitness_func(prun, grn, score_func):
    bb = run_grn(prun, grn)
    output = score_func(bb.stats, REF, max_step=prun.end_time)
    fitness = 1.0 / output
    return fitness

In [18]:
class ObjectiveStep(O):
    end_time = 0
    max_fitness = 4
    min_fitness = 1
    
example_steps = [
    ObjectiveStep(end_time=53),
    ObjectiveStep(end_time=56),
    ObjectiveStep(end_time=59),
    ObjectiveStep(end_time=62),
    ObjectiveStep(end_time=65),
    ObjectiveStep(end_time=68),
    ObjectiveStep(end_time=71)
]

In [19]:
def fitness_multistep(prun, grn, steps):
    total_fitness = 0
    stop = False
    previous_time = None
    bb = get_bb(prun, grn)
    # first step
    for step in steps:
        if not bb.run_until(step.end_time):
            stop = True
        score_step = score_both_size(bb.stats, prun.ref, max_step=step.end_time, min_step=previous_time)
        fitness_step = 1.0 / score_step
        fitness_step = min(fitness_step, step.max_fitness)
        total_fitness += fitness_step
        if fitness_step < step.min_fitness or stop:
            return total_fitness
        previous_time = step.end_time
        
    return total_fitness
# TODO make a no run version (i.e. just score)

In [20]:
def fitness_strategy(prun, grn):
    total_fitness = 0
    stop = False
    bb = get_bb(prun, grn)
    # first step
    step1_end_time = 53
    step1_max_fitness = 4.
    step1_min_fitness = 1.
    if not bb.run_until(step1_end_time):
        stop = True
    score1 = score_both_size(bb.stats, prun.ref, max_step=step1_end_time)
    fitness1 = 1.0 / score1
    fitness1 = min(fitness1, step1_max_fitness)
    total_fitness += fitness1
    if fitness1 < step1_min_fitness or stop:
        return total_fitness
    
    # second step
    step2_end_time = 56
    step2_max_fitness = 4.
    step2_min_fitness = 1.
    if not bb.run_until(step2_end_time):
        stop = True
    score2 = score_both_size(bb.stats, prun.ref, max_step=step2_end_time, min_step=step1_end_time)
    fitness2 = 1.0 / score2
    fitness2 = min(fitness2, step2_max_fitness)
    total_fitness += fitness2
    if fitness2 < step2_min_fitness or stop:
        return total_fitness
    
    # second step
    step3_end_time = 65
    step3_max_fitness = 4.
    step3_min_fitness = 1.
    if not bb.run_until(step3_end_time):
        stop = True
    score3 = score_both_size(bb.stats, prun.ref, max_step=step3_end_time, min_step=step2_end_time)
    fitness3 = 1.0 / score3
    fitness3 = min(fitness3, step3_max_fitness)
    total_fitness += fitness3
    
    return total_fitness

In [21]:
def mean_sd_fitness(prun, grn, run=3):
    fitnesses = [fitness_strategy(prun, grn) for i in range(run)]
    return np.mean(fitnesses), np.std(fitnesses)

In [22]:
def do_init_pop(prun):
    return [individual_generator() for i in range(prun.pop_size)]

def do_fitness(prun, pop):
    # fitness = [fitness_func(prun, sol.grn, score_func) for sol in pop]
    # fitness = [fitness_strategy(prun, sol.grn) for sol in pop]
    fitness = [fitness_multistep(prun, sol.grn, prun.steps) for sol in pop]
    return fitness

def do_selection(prun, pop_fit, pop):
    # print("Fit score : ", pop_fit)
    acc = list(accumulate(pop_fit))
    best = max(pop_fit)
    best_sol = pop[pop_fit.index(best)]
    
    print("Total fitness :", acc[-1])
    
    pop_sel, _ = weighted_selection(pop, pop_fit, individual_generator, new_fitness=0.3)
        
    return pop_sel, best, best_sol

def do_mutation(prun, pop_sel):
    [p.mutate() for p in pop_sel]
    return pop_sel

In [23]:
class ParamRun(O):
    pop_size = 30
    n_gen = 30
    current_gen = 0
    end_time = 71
    ref = REF

args = ParamRun()
args.steps = example_steps

def main(prun):
    bbest = 0
    ttbest = None
    pop = do_init_pop(prun)
    for generation in range(prun.n_gen):
        # args.generation = generation
        # objective.new_trial()
        fit = do_fitness(prun, pop)
        # objective.best_current(max(fit))
        
        # TODO get the stats associated with the best scores
        sel, best, best_sol = do_selection(prun, fit, pop)
        if best > bbest:
            print(f"++ Best {best}")
            bbest = best
            ttbest = best_sol.copy()
            HALL_OF_FAME.append(best_sol.copy())
        else:
            # print("--")
            print(f"-- Best {best}")
        pop = do_mutation(prun, sel)
        
    return ttbest

In [None]:
"""
objective = Objective([
    (score_progenitor_size, 10, 2),
    (score_both_size, 10, 4)
])
"""
sol = main(args)

Total fitness : 25.469397592816268
++ Best 22.390365212619137
Total fitness : 530.3256955938671
++ Best 24.982241236006878
Total fitness : 516.3659547562022
++ Best 25.35206063685618
Total fitness : 633.3693604191964
++ Best 25.834084936830426
Total fitness : 514.7827818421033
-- Best 24.561441376385513
Total fitness : 598.0937821713811
-- Best 25.003673455433407
Total fitness : 568.0613894691293
-- Best 25.197659720174144
Total fitness : 478.4875431156803
-- Best 25.1323746051846
Total fitness : 606.6564241338947
-- Best 25.42201327932948
Total fitness : 574.4616824603257
-- Best 24.757077744203478
Total fitness : 587.4048123663545
-- Best 25.16697254037605
Total fitness : 514.9239344541294
-- Best 24.681964855497295
Total fitness : 519.8742526173303
++ Best 26.842495996669363
Total fitness : 447.06259875536523
-- Best 25.10500367200521
Total fitness : 608.7644136372674
-- Best 25.94007491924851
Total fitness : 484.87071935177596
++ Best 26.874414050662217
Total fitness : 513.08585699

In [None]:
sol.grn.genes[4].tree

In [None]:
for one in HALL_OF_FAME:
    print(np.mean([fitness_strategy(args, one.grn) for i in range(5)]))

In [None]:
HALL_OF_FAME[-1].grn

In [None]:
bb = run_grn(args, HALL_OF_FAME[-1].grn)

In [None]:
def show_curve(stats, ref, max_step=None, show=True):
    """
    The evaluation function for progenitor population size.
    The lower is the better
    :param stats: the stats of the bb after running
    """
    stats, ref = shrink_and_align_stats(stats, ref, max_step=max_step)
    
    x, y = preprocess_progenitor_size(stats, ref)
    
    plt.plot(ref.index, x, label="Reference Prog")
    plt.plot(ref.index, y, label="Simulation Prog")
    
    x, y = preprocess_whole_size(stats, ref)
    
    plt.plot(ref.index, x, label="Reference Whole")
    plt.plot(ref.index, y, label="Simulation Whole")
    
    plt.legend()
    
    if show:
        plt.show()

In [None]:
show_curve(bb.stats, REF, max_step=args.end_time)

In [None]:
output = objective.get_objective_func()(bb.stats, REF, max_step=args.end_time)
fitness = 1.0 / output
fitness