In [1]:
import os
os.environ["XLA_PYTHON_CLIENT_MEM_FRACTION"] = ".13"

In [2]:
import pandas as pd
from model import Brain
from submodels import factories
import matplotlib.pyplot as plt
import pandas as pd
from itertools import accumulate
import numpy as np
from collections import defaultdict
import re

from jf.db import DB
from lib.score import fate_corr
from lib.preprocess import get_fmetric_pairs
from lib.sde.grn3 import GRNMain3
from lib.sde.mutate import mutate_grn2
from lib.ga.utils import weighted_selection
from lib.ga.objective import Objective
from jf.profiler import Profiler
from jf.utils.export import Exporter
from jf.autocompute.jf import O

In [3]:
HISTORY = defaultdict(dict)
HALL_OF_FAME = []

In [4]:
REF = O(
    stats=pd.read_csv("output/results/setup_basic/export/ref_basic2.csv"),  # ref is a mean
    # fmetric=setup_ref_fmetric("output/results/setup_basic/export/ref_fmetric_tristate.csv"),
)

In [5]:
def individual_generator():
    return Solution(GRNMain3(5, 0, 0))

In [6]:
class Solution:
    def __init__(self, grn):
        self.grn = grn
        
    def copy(self):
        return Solution(self.grn.copy())
        
    def mutate(self):
        for i in range(1):
            mutate_grn2(self.grn)
        # here force values
        self.grn.set_mutable()
        self.grn._params[4, :] = 1
        self.grn._params[5, :] = 0
        self.grn.compile()

In [7]:
sol = individual_generator()
sol.mutate()
sol.grn

>> G_0: init: 0.21; noise: 7.92; b: 8.23; m: 9.50; expr: 1.00; deg: 0.00; theta: 1.90; tree : 0
>> G_1: init: 0.21; noise: 2.04; b: 7.13; m: 2.86; expr: 1.00; deg: 0.00; theta: 3.64; tree : ((0 OR 4) OR 1)
>> G_2: init: 1.71; noise: 0.28; b: 4.13; m: 7.65; expr: 1.00; deg: 0.00; theta: 7.29; tree : (1 OR 3)
>> G_3: init: 0.03; noise: 3.76; b: 5.89; m: 7.23; expr: 1.00; deg: 0.00; theta: 4.68; tree : NOT 1
>> G_4: init: 2.09; noise: 5.38; b: 4.29; m: 5.87; expr: 1.00; deg: 0.00; theta: 2.63; tree : 3

In [8]:
def shrink_and_align_stats(stats, ref, max_step=None, min_step=None):
    min_data_step = max(min(stats.time), min(ref.time))
    if min_step is None:
        min_step = min_data_step
    else:
        min_step = max(min_step, min_data_step)

    if max_step is None:
        max_step = max(ref.time)
    else:
        max_step = min(max_step, max(ref.time))
    
    new_stats = stats[(stats.time >= min_step) & (stats.time <= max_step)].set_index("time")
    new_ref = ref[(ref.time >= min_step) & (ref.time <= max_step)].set_index("time")
    
    return new_stats, new_ref

In [9]:
def preprocess_progenitor_size(stats, ref):
    colname_ref = "progenitor_pop_size"
    colname_stats = "progenitor_pop_size"
    
    min_step = min(ref.index)  # ref.index is the time
    stats[colname_stats] = stats[colname_stats] / stats[colname_stats].get(min_step)
    ref[colname_ref] = ref[colname_ref] / ref[colname_ref].get(min_step)
    x = [ref[colname_ref].get(t, 0) for t in ref.index]
    y = [stats[colname_stats].get(t, 0) for t in ref.index]
    return x, y

def preprocess_whole_size(stats, ref):
    colname_ref = "whole_pop_size"
    colname_stats = "whole_pop_size"
    
    min_step = min(ref.index)  # ref.index is the time
    stats[colname_stats] = stats[colname_stats] / stats[colname_stats].get(min_step)
    ref[colname_ref] = ref[colname_ref] / ref[colname_ref].get(min_step)
    x = [ref[colname_ref].get(t, 0) for t in ref.index]
    y = [stats[colname_stats].get(t, 0) for t in ref.index]
    return x, y

In [10]:
def score_coefficient_variation(x, y):
    return sum([(x_ - y_)**2 / x_ for x_, y_ in zip(x, y)])

In [11]:
def score_progenitor_size(stats, ref, max_step=None, min_step=None):
    stats, ref = shrink_and_align_stats(stats, ref, max_step, min_step)
    x, y = preprocess_progenitor_size(stats, ref)
    return score_coefficient_variation(x, y)

def score_whole_size(stats, ref, max_step=None, min_step=None):
    stats, ref = shrink_and_align_stats(stats, ref, max_step, min_step)
    x, y = preprocess_whole_size(stats, ref)
    return score_coefficient_variation(x, y)

def score_both_size(stats, ref, max_step=None, min_step=None):
    return score_progenitor_size(stats, ref, max_step, min_step) \
            + score_whole_size(stats, ref, max_step, min_step)

def score_bb_size(bb, ref, *args, **kwargs):
    return score_both_size(bb.stats, ref.stats, *args, **kwargs)

In [12]:
def score_fmetric(population, ref, min_step=50, max_step=60):
    pairs = get_fmetric_pairs(population, min_time=min_step, max_time=max_step)
    fmetric = fate_corr(pairs)
    ref_value = ref[float(min_step)][float(max_step)]
    # 0.3 vs 0.4 -> 4
    # 0.0 vs 0.4 -> 0.25
    return 25 * (fmetric - ref_value)**2

def score_bb_fmetric(bb, ref, *args, **kwargs):
    return score_fmetric(bb.population, ref.fmetric, *args, **kwargs)

In [13]:
def get_bb(prun, grn):
    ccls = factories["grn3"](grn=grn)
    bb = Brain(time_step=0.5, verbose=False, start_population=5, max_pop_size=1e3,
            cell_cls=ccls, end_time=prun.end_time, start_time=50, silent=True)
    return bb

In [14]:
def run_grn(prun, grn):
    get_bb(prun, grn)
    bb.run()
    return bb

In [15]:
def fitness_func(prun, grn, score_func):
    bb = run_grn(prun, grn)
    output = score_func(bb.stats, REF, max_step=prun.end_time)
    fitness = 1.0 / output
    return fitness

In [16]:
def fitness_multistep(prun, grn, steps):
    total_fitness = 0
    stop = False
    previous_time = None
    bb = get_bb(prun, grn)
    # first step
    for step in steps:
        if not bb.run_until(step.end_time):
            stop = True
        # score_step = score_both_size(bb.stats, prun.ref, max_step=step.end_time, min_step=previous_time)
        score_step = step.score_func(bb, prun.ref, max_step=step.end_time, min_step=step.start_time)
        fitness_step = 1.0 / score_step
        fitness_step = min(fitness_step, step.max_fitness)
        total_fitness += fitness_step
        if fitness_step < step.min_fitness or stop:
            return total_fitness, bb.stats
        else:
            previous_time = step.end_time
            step.passed()
        
    return total_fitness, bb.stats

def score_multistep(prun, stats, steps):
    total_fitness = 0
    stop = False
    previous_time = None
    # first step
    for i, step in enumerate(steps):
        # score_step = score_both_size(bb.stats, prun.ref, max_step=step.end_time, min_step=previous_time)
        score_step = step.score_func(bb, prun.ref, max_step=step.end_time, min_step=step.start_time)
        print(f"Score for step {i} is {score_step}")
        fitness_step = 1.0 / score_step
        fitness_step = min(fitness_step, step.max_fitness)
        total_fitness += fitness_step
        if fitness_step < step.min_fitness or stop:
            return total_fitness
        previous_time = step.end_time
        
    return total_fitness

In [17]:
def mean_sd_fitness(prun, grn, run=3):
    fitnesses = [fitness_multistep(prun, grn) for i in range(run)]
    return np.mean(fitnesses), np.std(fitnesses)

In [18]:
def do_init_pop(prun):
    return [individual_generator() for i in range(prun.pop_size)]

def do_fitness(prun, pop):
    # fitness = [fitness_func(prun, sol.grn, score_func) for sol in pop]
    # fitness = [fitness_strategy(prun, sol.grn) for sol in pop]
    fitness, stats = zip(*[fitness_multistep(prun, sol.grn, prun.steps) for sol in pop])
    return fitness, stats

def do_selection(prun, pop_fit, pop):
    # print("Fit score : ", pop_fit)
    acc = list(accumulate(pop_fit))
    best = max(pop_fit)
    best_id = pop_fit.index(best)
    
    print("Total fitness :", acc[-1])
    
    pop_sel, history_sel = weighted_selection(pop, pop_fit, individual_generator, new_fitness=0.3)
        
    return pop_sel, history_sel, best_id

def do_mutation(prun, pop_sel):
    [p.mutate() for p in pop_sel]
    return pop_sel

In [19]:
class ObjectiveStep(O):
    start_time = 0
    end_time = 0
    max_fitness = 4
    min_fitness = 1
    name = ""
    _passed = False
    
    def reset(self):
        self._passed = False
    
    def passed(self):
        if self._passed:
            return
        print(f"Step {self.name} passed !")
        self._passed = True
    
example_steps = [
    ObjectiveStep(name="1", start_time=50, end_time=53, score_func=score_bb_size),
    ObjectiveStep(name="2", start_time=53, end_time=56, score_func=score_bb_size),
    ObjectiveStep(name="3", start_time=56, end_time=59, score_func=score_bb_size),
    ObjectiveStep(name="4", start_time=59, end_time=62, score_func=score_bb_size),
    ObjectiveStep(name="5", start_time=62, end_time=65, score_func=score_bb_size),
    ObjectiveStep(name="6", start_time=65, end_time=68, score_func=score_bb_size),
    ObjectiveStep(name="7", start_time=68, end_time=71, score_func=score_bb_size),
    ObjectiveStep(name="8", start_time=71, end_time=74, score_func=score_bb_size),
    ObjectiveStep(name="9", start_time=74, end_time=77, score_func=score_bb_size),
]

class ParamRun(O):
    pop_size = 10
    n_gen = 30
    current_gen = 0
    end_time = 83
    ref = REF

args = ParamRun()
args.steps = example_steps
for step in args.steps:
    step.reset()

In [20]:
def main(prun):
    exporter = Exporter()
    best = 0
    pop = do_init_pop(prun)
    for generation in range(prun.n_gen):
        # args.generation = generation
        # objective.new_trial()
        fit, stats = do_fitness(prun, pop)
        # objective.best_current(max(fit))
        
        # TODO get the stats associated with the best scores
        sel, history_sel, best_id = do_selection(prun, fit, pop)
        if fit[best_id] > best:
            print(f"++ Best {fit[best_id]}")
            best = fit[best_id]
            HALL_OF_FAME.append(pop[best_id].copy())
        else:
            print(f"-- Best {best}")
        pop = do_mutation(prun, sel)
        
        # history
        monitor = dict(
            transition=history_sel,
            solution=pop,
            fitness=fit,
            stats=stats,
        )
        HISTORY[generation] = monitor
        exporter(monitor, f"generation_g{generation}")
        
    return best

In [26]:
exporter = Exporter(name="result_m3_t0.5_n7_o2.0_i2")
res = exporter.load("generation_g24")["solution"][10]

Exporting at output/result_m3_t0.5_n7_o2.0_i2


In [28]:
score = fitness_multistep(args, res.grn, args.steps)[0]
print(score)

Step 1 passed !
2.044739668376468


In [31]:
res.grn

>> G_0: init: 2.62; noise: 1.58; b: 8.26; m: 2.42; expr: 6.69; deg: 3.05; theta: 5.78; tree : 1
>> G_1: init: 1.92; noise: 1.39; b: 7.88; m: 7.28; expr: 7.53; deg: 7.81; theta: 9.26; tree : (4 AND 6)
>> G_2: init: 2.70; noise: 7.15; b: 3.71; m: 5.35; expr: 7.45; deg: 0.03; theta: 5.58; tree : NOT 2
>> G_3: init: 1.19; noise: 4.21; b: 8.76; m: 9.63; expr: 4.24; deg: 0.63; theta: 0.86; tree : ((NOT 6 OR ((4 OR 3) OR 2)) AND (1 OR 0))
>> G_4: init: 2.39; noise: 7.72; b: 6.96; m: 3.94; expr: 0.48; deg: 1.72; theta: 9.58; tree : 3
>> G_5: init: 0.80; noise: 7.51; b: 3.82; m: 3.37; expr: 0.32; deg: 0.40; theta: 7.61; tree : (3 AND NOT (4 OR 6))
>> G_6: init: 2.18; noise: 3.39; b: 6.46; m: 9.45; expr: 8.62; deg: 2.30; theta: 0.86; tree : NOT 4

In [30]:
print(fitness_multistep(args, res.grn, args.steps)[0])
print(fitness_multistep(args, res.grn, args.steps)[0])
print(fitness_multistep(args, res.grn, args.steps)[0])
print(fitness_multistep(args, res.grn, args.steps)[0])
print(fitness_multistep(args, res.grn, args.steps)[0])
print(fitness_multistep(args, res.grn, args.steps)[0])
print(fitness_multistep(args, res.grn, args.steps)[0])
print(fitness_multistep(args, res.grn, args.steps)[0])
print(fitness_multistep(args, res.grn, args.steps)[0])
print(fitness_multistep(args, res.grn, args.steps)[0])

0.06928295447713885
0.6934611681455447
0.6412896135076359
Step 2 passed !
4.412262899006458
0.06940625045227553
Step 3 passed !
Step 4 passed !
6.624808714461611
1.9941938036063613
0.06927775479095162
0.4415428471789777
0.06937888081787842


In [None]:
raise

In [None]:
sol = main(args)

In [None]:
sol2 = main(args)

In [None]:
sol3 = main(args)

In [None]:
sol4 = main(args)

In [None]:
sol5 = main(args)

In [None]:
# HISTORY
# score_multistep(prun, stats, steps)
# score_multistep(args, stats, args.steps)

In [None]:
gen = HISTORY[27]
idx = gen["fitness"].index(max(gen["fitness"]))
best_stats = gen["stats"][idx]

In [None]:
score_multistep(args, best_stats, args.steps)

In [None]:
raise

In [None]:
# GOOD_POP = HISTORY[2]["solution"]

In [None]:
# GOOD_POP = HISTORY[2]

In [None]:
# best = HISTORY[25]

In [None]:
HISTORY.keys()

In [None]:
last = HISTORY[0]
last.keys()

In [None]:
fit = last['fitness']
index = fit.index(max(fit))
sol = last["solution"][index]
index, max(fit)

In [None]:
stats = last["stats"][index]

In [None]:
def show_curve(stats, ref, max_step=None, show=True):
    """
    The evaluation function for progenitor population size.
    The lower is the better
    :param stats: the stats of the bb after running
    """
    stats, ref = shrink_and_align_stats(stats, ref, max_step=max_step)
    
    x, y = preprocess_progenitor_size(stats, ref)
    
    plt.plot(ref.index, x, label="Reference Prog")
    plt.plot(ref.index, y, label="Simulation Prog")
    
    x, y = preprocess_whole_size(stats, ref)
    
    plt.plot(ref.index, x, label="Reference Whole")
    plt.plot(ref.index, y, label="Simulation Whole")
    
    plt.legend()
    
    if show:
        plt.show()

In [None]:
def print_fmetrics(population, ref):
    pairs = get_fmetric_pairs(population, min_time=50, max_time=60)
    fmetric = fate_corr(pairs)
    print(f"Population : {fmetric}, ref : {ref[50][60]}")

In [None]:
show_curve(stats, REF, max_step=args.end_time)

In [None]:
score_multistep(args, stats, args.steps)

In [None]:
REF.head(10)

In [None]:
def fill_df(stats, ls_val):
    for time, prog_pop, whole_pop in ls_val:
        stats.loc[time] = {"progenitor_pop_size": prog_pop, "time": time, "whole_pop_size": whole_pop}

In [None]:
vals = [
    (50.5, 25, 27),
    (51., 26, 30),
    (51.5, 27, 40),
    (52., 30, 50),
    (52.5, 35, 60),
    (53., 40, 27),
    (53.5, 45, 27),
    (54., 50, 27),
    (54.5, 55, 27),
    (55., 60, 27),
]

In [None]:
fill_df(stats, ls_val=vals)
stats

In [None]:
show_curve(stats, REF.stats, max_step=55)
print_fmetrics(population, REF.fmetric)
score_multistep(args, stats, args.steps)

In [None]:
# peut être mettre sqrt plutôt que abs en dénominateur
# ensuite équilibrer les fit value en normalisant avec la médiane ou la moyenne ou autre (3e quartile ?)