# Goal : implementing multiprocessing

In [1]:
import os
os.environ["XLA_PYTHON_CLIENT_MEM_FRACTION"] = ".20"

In [2]:
import pandas as pd
from model import Brain
from submodels import factories
import matplotlib.pyplot as plt
import pandas as pd
from itertools import accumulate
import numpy as np
from collections import defaultdict
import re

from jf.db import DB
from lib.score import fate_corr
from lib.preprocess import get_fmetric_pairs
from lib.sde.grn2 import GRNMain2
from lib.sde.mutate import mutate_grn2
from lib.ga.utils import weighted_selection
from lib.ga.objective import Objective
from jf.profiler import Profiler
from jf.utils.export import Exporter
from jf.autocompute.jf import O

In [3]:
HISTORY = defaultdict(dict)
HALL_OF_FAME = []

In [39]:
def setup_ref_fmetric(fname):
    dict_ref_fmetric = defaultdict(dict)
    df = pd.read_csv(fname, index_col=0)
    for i, row in df.iterrows():
        begin, end = re.findall("fate_corr_(\d+)_(\d+)", row.name)[0]
        dict_ref_fmetric[float(begin)][float(end)] = row["0"]
    return dict_ref_fmetric

In [40]:
setup_ref_fmetric("output/results/setup_basic/export/ref_fmetric_tristate.csv")

defaultdict(dict,
            {50.0: {60.0: 0.6570465461862538,
              70.0: 0.6064722012505317,
              80.0: 0.5574550980380129,
              90.0: 0.5531611479569236},
             60.0: {70.0: 0.5757604703955359},
             70.0: {80.0: 0.4450630556749089},
             80.0: {80.0: 0.4715540706129272}})

In [41]:
REF = O(
    stats=pd.read_csv("output/results/setup_basic/export/ref_basic2.csv"),  # ref is a mean
    fmetric=setup_ref_fmetric("output/results/setup_basic/export/ref_fmetric_tristate.csv"),
)

In [7]:
def individual_generator():
    return Solution(GRNMain2(5, 0, 0))

In [8]:
def r():
    return np.random.uniform(0, 1)

class Solution:
    def __init__(self, grn):
        self.grn = grn
        
    def copy(self):
        return Solution(self.grn.copy())
        
    def mutate(self):
        for i in range(5):
            mutate_grn2(self.grn)

In [9]:
individual_generator().grn

>> G_0: init: 1.28; noise: 2.78; b: 6.66; m: 7.05; expr: 5.86; deg: 10.00; thr: 8.12; theta: 3.32; tree : 1
>> G_1: init: 2.34; noise: 5.90; b: 8.57; m: 6.33; expr: 3.09; deg: 4.68; thr: 4.18; theta: 5.51; tree : 3
>> G_2: init: 1.13; noise: 4.18; b: 8.22; m: 9.15; expr: 3.37; deg: 6.91; thr: 8.00; theta: 6.90; tree : ((0 AND 3) AND (2 OR (4 AND 1)))
>> G_3: init: 1.33; noise: 1.37; b: 9.86; m: 9.21; expr: 3.84; deg: 1.90; thr: 3.23; theta: 3.72; tree : 2
>> G_4: init: 2.51; noise: 2.18; b: 4.84; m: 4.77; expr: 7.07; deg: 5.18; thr: 7.17; theta: 6.87; tree : 1

In [10]:
def shrink_and_align_stats(stats, ref, max_step=None, min_step=None):
    min_data_step = max(min(stats.time), min(ref.time))
    if min_step is None:
        min_step = min_data_step
    else:
        min_step = max(min_step, min_data_step)

    if max_step is None:
        max_step = max(ref.time)
    else:
        max_step = min(max_step, max(ref.time))
    
    new_stats = stats[(stats.time >= min_step) & (stats.time <= max_step)].set_index("time")
    new_ref = ref[(ref.time >= min_step) & (ref.time <= max_step)].set_index("time")
    
    return new_stats, new_ref

In [11]:
def preprocess_progenitor_size(stats, ref):
    colname_ref = "progenitor_pop_size"
    colname_stats = "progenitor_pop_size"
    
    min_step = min(ref.index)  # ref.index is the time
    stats[colname_stats] = stats[colname_stats] / stats[colname_stats].get(min_step)
    ref[colname_ref] = ref[colname_ref] / ref[colname_ref].get(min_step)
    x = [ref[colname_ref].get(t, 0) for t in ref.index]
    y = [stats[colname_stats].get(t, 0) for t in ref.index]
    return x, y

def preprocess_whole_size(stats, ref):
    colname_ref = "whole_pop_size"
    colname_stats = "whole_pop_size"
    
    min_step = min(ref.index)  # ref.index is the time
    stats[colname_stats] = stats[colname_stats] / stats[colname_stats].get(min_step)
    ref[colname_ref] = ref[colname_ref] / ref[colname_ref].get(min_step)
    x = [ref[colname_ref].get(t, 0) for t in ref.index]
    y = [stats[colname_stats].get(t, 0) for t in ref.index]
    return x, y

In [12]:
def score_coefficient_variation(x, y):
    return sum([(x_ - y_)**2 / x_ for x_, y_ in zip(x, y)])

In [13]:
def score_progenitor_size(stats, ref, max_step=None, min_step=None):
    stats, ref = shrink_and_align_stats(stats, ref, max_step, min_step)
    x, y = preprocess_progenitor_size(stats, ref)
    return score_coefficient_variation(x, y)

def score_whole_size(stats, ref, max_step=None, min_step=None):
    stats, ref = shrink_and_align_stats(stats, ref, max_step, min_step)
    x, y = preprocess_whole_size(stats, ref)
    return score_coefficient_variation(x, y)

def score_both_size(stats, ref, max_step=None, min_step=None):
    return score_progenitor_size(stats, ref, max_step, min_step) \
            + score_whole_size(stats, ref, max_step, min_step)

def score_bb_size(bb, ref, *args, **kwargs):
    return score_both_size(bb.stats, ref.stats, *args, **kwargs)

In [14]:
1 / (0.16*25)

0.25

In [42]:
def score_fmetric(population, ref, min_step=50, max_step=60):
    pairs = get_fmetric_pairs(population, min_time=min_step, max_time=max_step)
    fmetric = fate_corr(pairs)
    ref_value = ref[float(min_step)][float(max_step)]
    # 0.3 vs 0.4 -> 4
    # 0.0 vs 0.4 -> 0.25
    return 25 * (fmetric - ref_value)**2

def score_bb_fmetric(bb, ref, *args, **kwargs):
    return score_fmetric(bb.population, ref.fmetric, *args, **kwargs)

In [16]:
def run_grn(prun, grn):
    get_bb(prun, grn)
    bb.run()
    return bb

In [17]:
def get_bb(prun, grn):
    ccls = factories["grn2_opti"](grn=grn)
    bb = Brain(time_step=0.125, verbose=False, start_population=5, max_pop_size=1e3,
            cell_cls=ccls, end_time=prun.end_time, start_time=50, silent=True)
    return bb

In [18]:
def fitness_func(prun, grn, score_func):
    bb = run_grn(prun, grn)
    output = score_func(bb.stats, REF, max_step=prun.end_time)
    fitness = 1.0 / output
    return fitness

In [19]:
def fitness_multistep(prun, grn, steps):
    total_fitness = 0
    stop = False
    previous_time = None
    bb = get_bb(prun, grn)
    # first step
    for step in steps:
        if not bb.run_until(step.end_time):
            stop = True
        # score_step = score_both_size(bb.stats, prun.ref, max_step=step.end_time, min_step=previous_time)
        score_step = step.score_func(bb, prun.ref, max_step=step.end_time, min_step=step.start_time)
        fitness_step = 1.0 / score_step
        fitness_step = min(fitness_step, step.max_fitness)
        total_fitness += fitness_step
        if fitness_step < step.min_fitness or stop:
            return total_fitness, bb.stats
        else:
            previous_time = step.end_time
            step.passed()
        
    return total_fitness, bb.stats

def score_multistep(prun, stats, steps):
    total_fitness = 0
    stop = False
    previous_time = None
    # first step
    for step in steps:
        # score_step = score_both_size(bb.stats, prun.ref, max_step=step.end_time, min_step=previous_time)
        score_step = step.score_func(bb, prun.ref, max_step=step.end_time, min_step=step.start_time)
        fitness_step = 1.0 / score_step
        fitness_step = min(fitness_step, step.max_fitness)
        total_fitness += fitness_step
        if fitness_step < step.min_fitness or stop:
            return total_fitness
        previous_time = step.end_time
        
    return total_fitness

In [20]:
def mean_sd_fitness(prun, grn, run=3):
    fitnesses = [fitness_multistep(prun, grn) for i in range(run)]
    return np.mean(fitnesses), np.std(fitnesses)

In [21]:
def do_init_pop(prun):
    return [individual_generator() for i in range(prun.pop_size)]

def do_fitness(prun, pop):
    # fitness = [fitness_func(prun, sol.grn, score_func) for sol in pop]
    # fitness = [fitness_strategy(prun, sol.grn) for sol in pop]
    fitness, stats = zip(*[fitness_multistep(prun, sol.grn, prun.steps) for sol in pop])
    return fitness, stats

def do_selection(prun, pop_fit, pop):
    # print("Fit score : ", pop_fit)
    acc = list(accumulate(pop_fit))
    best = max(pop_fit)
    best_id = pop_fit.index(best)
    
    print("Total fitness :", acc[-1])
    
    pop_sel, history_sel = weighted_selection(pop, pop_fit, individual_generator, new_fitness=0.3)
        
    return pop_sel, history_sel, best_id

def do_mutation(prun, pop_sel):
    [p.mutate() for p in pop_sel]
    return pop_sel

In [43]:
class ObjectiveStep(O):
    start_time = 0
    end_time = 0
    max_fitness = 6
    min_fitness = 1
    name = ""
    _passed = False
    
    def reset(self):
        self._passed = False
    
    def passed(self):
        if self._passed:
            return
        print(f"Step {self.name} passed !")
        self._passed = True
    
example_steps = [
    ObjectiveStep(name="1", start_time=50, end_time=53, score_func=score_bb_size),
    ObjectiveStep(name="2", start_time=53, end_time=56, score_func=score_bb_size),
    ObjectiveStep(name="3", start_time=56, end_time=59, score_func=score_bb_size),
    ObjectiveStep(name="4", start_time=59, end_time=62, score_func=score_bb_size),
    ObjectiveStep(name="5 - pre fmetric", start_time=62, end_time=65, score_func=score_bb_size),
    
    ObjectiveStep(name="FMetric", start_time=50, end_time=60, score_func=score_bb_fmetric),
    
    ObjectiveStep(name="6", start_time=65, end_time=68, score_func=score_bb_size),
]

class ParamRun(O):
    pop_size = 10
    n_gen = 30
    current_gen = 0
    end_time = 83
    ref = REF

args = ParamRun()
args.steps = example_steps
for step in args.steps:
    step.reset()

In [33]:
def main(prun):
    exporter = Exporter()
    best = 0
    pop = do_init_pop(prun)
    pop = GOOD_POP
    for generation in range(prun.n_gen):
        # args.generation = generation
        # objective.new_trial()
        fit, stats = do_fitness(prun, pop)
        # objective.best_current(max(fit))
        
        # TODO get the stats associated with the best scores
        sel, history_sel, best_id = do_selection(prun, fit, pop)
        if fit[best_id] > best:
            print(f"++ Best {fit[best_id]}")
            best = fit[best_id]
            HALL_OF_FAME.append(pop[best_id].copy())
        else:
            print(f"-- Best {best}")
        pop = do_mutation(prun, sel)
        
        # history
        monitor = dict(
            transition=history_sel,
            solution=pop,
            fitness=fit,
            stats=stats,
        )
        HISTORY[generation] = monitor
        exporter(monitor, f"generation_{generation}")
        
    return best

In [None]:
sol = main(args)

Exporting at output/2022-01-13/16:51:44.359567_483932
Step 1 passed !
Step 2 passed !
Step 3 passed !
Step 4 passed !
Step 5 - pre fmetric passed !
Total fitness : 15.664581799675537
++ Best 12.214489259659103
Total fitness : 13.359414851986369
-- Best 12.214489259659103
Total fitness : 7.112087084499377
-- Best 12.214489259659103
Total fitness : 8.649096240372783
-- Best 12.214489259659103
Total fitness : 13.948441900744362
-- Best 12.214489259659103
Total fitness : 7.050071705284252
-- Best 12.214489259659103
Total fitness : 6.005520944128158
-- Best 12.214489259659103
Total fitness : 4.1573640345549485
-- Best 12.214489259659103
Total fitness : 14.901585912946347
-- Best 12.214489259659103
Total fitness : 12.79289746785239
-- Best 12.214489259659103
Total fitness : 8.670452536414826
-- Best 12.214489259659103
Total fitness : 32.11636467541853
++ Best 17.10565264945934
Total fitness : 51.350410963347876
++ Best 24.403810747841234
Total fitness : 82.2139972522453
-- Best 24.4038107478

In [25]:
raise

RuntimeError: No active exception to reraise

In [32]:
# GOOD_POP = HISTORY[2]["solution"]

In [28]:
# GOOD_POP = HISTORY[2]

In [26]:
# best = HISTORY[25]

In [None]:
HISTORY.keys()

In [None]:
last = HISTORY[0]
last.keys()

In [None]:
fit = last['fitness']
index = fit.index(max(fit))
sol = last["solution"][index]
index, max(fit)

In [None]:
stats = last["stats"][index]

In [None]:
def show_curve(stats, ref, max_step=None, show=True):
    """
    The evaluation function for progenitor population size.
    The lower is the better
    :param stats: the stats of the bb after running
    """
    stats, ref = shrink_and_align_stats(stats, ref, max_step=max_step)
    
    x, y = preprocess_progenitor_size(stats, ref)
    
    plt.plot(ref.index, x, label="Reference Prog")
    plt.plot(ref.index, y, label="Simulation Prog")
    
    x, y = preprocess_whole_size(stats, ref)
    
    plt.plot(ref.index, x, label="Reference Whole")
    plt.plot(ref.index, y, label="Simulation Whole")
    
    plt.legend()
    
    if show:
        plt.show()

In [None]:
def print_fmetrics(population, ref):
    pairs = get_fmetric_pairs(population, min_time=50, max_time=60)
    fmetric = fate_corr(pairs)
    print(f"Population : {fmetric}, ref : {ref[50][60]}")

In [None]:
show_curve(stats, REF, max_step=args.end_time)

In [None]:
score_multistep(args, stats, args.steps)

In [None]:
REF.head(10)

In [None]:
def fill_df(stats, ls_val):
    for time, prog_pop, whole_pop in ls_val:
        stats.loc[time] = {"progenitor_pop_size": prog_pop, "time": time, "whole_pop_size": whole_pop}

In [None]:
vals = [
    (50.5, 25, 27),
    (51., 26, 30),
    (51.5, 27, 40),
    (52., 30, 50),
    (52.5, 35, 60),
    (53., 40, 27),
    (53.5, 45, 27),
    (54., 50, 27),
    (54.5, 55, 27),
    (55., 60, 27),
]

In [None]:
fill_df(stats, ls_val=vals)
stats

In [None]:
show_curve(stats, REF.stats, max_step=55)
print_fmetrics(population, REF.fmetric)
score_multistep(args, stats, args.steps)

In [None]:
# peut être mettre sqrt plutôt que abs en dénominateur
# ensuite équilibrer les fit value en normalisant avec la médiane ou la moyenne ou autre (3e quartile ?)