In [1]:
import os
os.environ["XLA_PYTHON_CLIENT_MEM_FRACTION"] = ".13"

In [2]:
import pandas as pd
from brain import BrainModel
from submodels import factories
import matplotlib.pyplot as plt
import numpy as np
from collections import defaultdict
from random import shuffle

from lib.score import (
    fate_corr, score_both_size_norm, shrink_and_align_stats, score_stats_norm
)
from lib.preprocess import *
from lib.callback import (
    cell_number_callback, progenitor_number_callback, neuron_number_callback,
    TargetPopulation, TagNumberCallback,
)
from lib.sde.grn.grn4 import GRNMain4 as GRNMain
from lib.sde.mutate import mutate_grn4 as mutate_grn

from lib.ga.utils import weighted_selection_one, normalize_fitness_values
from jf.utils.export import Exporter
from jf.autocompute.jf import O, L
from itertools import product
import jf.models.stringmodel as sm
from lib.analyser import show_curve, show_curve_progenitor
import random
from lib.sde.mutate import mutate_gene4, mutate_tree

In [3]:
HISTORY = defaultdict(dict)
HALL_OF_FAME = []

In [4]:
_count = -1
def provide_id():
    global _count
    _count += 1
    return _count

In [5]:
REF = O(
    stats=pd.read_csv("reference/ref_tristate2.csv"),  # ref is a mean
)

In [6]:
def individual_generator(id_=-1, cb_init=None, nb_genes=4):
    return Solution(GRNMain(nb_genes, 0, 1, generate_funcs=cb_init), id_=id_)

In [7]:
_MUTATE_FUNC = mutate_grn

In [8]:
class Solution:
    def __init__(self, grn, id_=0, parent=-1):
        self.id = id_
        self.grn = grn
        self.parent = parent
        self.fit = -1
        self.stats = None
        
    def copy(self, id_=0):
        return Solution(self.grn.copy(), id_=id_, parent=self.id)
        
    def mutate(self):
        global _MUTATE_FUNC
        _MUTATE_FUNC(self.grn)

In [9]:
def score_bb_size(bb, ref, *args, **kwargs):
    s_prog = score_stats_norm(bb.stats, ref.stats, col_stats=f"progenitor_pop_size",
                     col_ref="progenitor_pop_size", norm=2.0, *args, **kwargs)
    s_all = score_stats_norm(bb.stats, ref.stats, col_stats=f"whole_pop_size",
                     col_ref="whole_pop_size", norm=2.0, *args, **kwargs)
    s_neuron = score_stats_norm(bb.stats, ref.stats, col_stats=f"neuron_pop_size",
                     col_ref="neuron_pop_size", col_norm_data="progenitor_pop_size",
                       col_norm_ref="progenitor_pop_size", norm=2.0, *args, **kwargs)

    return np.mean(s_prog + s_all + s_neuron)

In [10]:
def setup_tag(cp):
    indexes = list(cp.base_population.keys())
    shuffle(indexes)
    splits = np.array_split(indexes, 3)
    for i, ls in enumerate(splits):
        for idx in ls:
            cp.base_population[idx].tag["subbrain"] = i

In [11]:
def get_bb(prun, grn):
    ccls = factories["grn4"](grn=grn)
    callbacks = dict(
        progenitor_pop_size=progenitor_number_callback,
        whole_pop_size=cell_number_callback,
        neuron_pop_size=neuron_number_callback,
    )
    bb = BrainModel(time_step=0.5, verbose=False, start_population=prun.size, max_pop_size=3e2,
            cell_cls=ccls, end_time=prun.end_time, start_time=56, silent=True, opti=True,
              run_tissue=True, monitor_callbacks=callbacks, tag_func=setup_tag)
    return bb

In [12]:
def run_grn(prun, grn):
    get_bb(prun, grn)
    bb.run()
    return bb

In [13]:
def fitness_multistep(prun, grn, steps):
    total_fitness = 0
    stop = False
    previous_time = None
    bb = get_bb(prun, grn)
    # first step
    for step in steps:
        if not bb.run_until(step.end_time):
            stop = True
        # score_step = score_both_size(bb.stats, prun.ref, max_step=step.end_time, min_step=previous_time)
        score_step = step.score_func(bb, prun.ref, max_step=step.end_time, min_step=step.start_time)
        fitness_step = 1.0 / score_step
        fitness_step = min(fitness_step, step.max_fitness)
        total_fitness += fitness_step
        if fitness_step < step.min_fitness or stop:
            return total_fitness, bb.stats
        else:
            previous_time = step.end_time
            step.passed()
        
    return total_fitness, bb.stats

In [14]:
def mean_sd_fitness(prun, grn, run=3):
    fitnesses = [fitness_multistep(prun, grn) for i in range(run)]
    return np.mean(fitnesses), np.std(fitnesses)

In [15]:
def multi_fitness(*args):
    fitnesses = [fitness_multistep(*args) for i in range(3)]
    scores = [x[0] for x in fitnesses]
    return fitnesses[scores.index(min(scores))]

In [16]:
def do_init(prun):
    return individual_generator(provide_id(), prun.cb_init)

def do_fitness(prun, sol):
    fitness, stats = fitness_multistep(prun, sol.grn, prun.steps)
    return fitness, stats

def do_selection(prun, pop_fit, pop):
    if len(pop) < prun.min_pop:
        return individual_generator(provide_id(), prun.cb_init)
    
    pop_fit = normalize_fitness_values(pop_fit)
    
    return weighted_selection_one(pop, pop_fit, lambda x: individual_generator(x, prun.cb_init),
                                  new_fitness=0.5, id_=provide_id())[0]

def do_mutation(prun, sol):
    sol.mutate()
    return sol

In [17]:
class ObjectiveStep(O):
    start_time = 0
    end_time = 0
    max_fitness = 3
    min_fitness = 1
    name = ""
    _passed = False
    
    def reset(self):
        self._passed = False
    
    def passed(self):
        if self._passed:
            return
        print(f"Step {self.name} passed !")
        self._passed = True
    
example_steps = [
    ObjectiveStep(name="The ONE", start_time=56, end_time=86, score_func=score_bb_size, min_fitness=0.2),
]

class ParamRun(O):
    pop_size = 10
    n_gen = 30
    current_gen = 0
    end_time = 86
    ref = REF
    min_pop = 25
    max_pop = 50

def get_prun(size=5, exponent=1):
    prun = ParamRun()
    prun.cb_init = dict()
    prun.size = size
    prun.exponent = exponent
    prun.steps = example_steps
    return prun

In [18]:
def main(prun):
    prun.history = defaultdict(dict)
    # exporter = Exporter()
    best = 0
    sol = do_init(prun)
    pop = [sol]
    for generation in range(prun.n_gen * prun.pop_size):
        # args.generation = generation
        # objective.new_trial()
        fit, stats = do_fitness(prun, sol)
        sol.fit = fit
        sol.stats = stats
        
        # history
        # print(f"Fitness = {fit}", end="\t\t")
        if generation % 100 == 0:
            print(f"Step {generation}")
        if fit > best:
            print(f"++ Best {fit} for generation {generation}")
            best = fit
            
        monitor = sol
        prun.history[generation] = monitor
        # exporter(monitor, f"generation_g{generation}")
        
        # TODO get the stats associated with the best scores
        sub_pop = pop[-prun.max_pop:]
        sol = do_selection(prun, [s.fit for s in sub_pop], sub_pop)
            
        sol = do_mutation(prun, sol)
        pop.append(sol)
        
    print("OVER")
    return best

In [19]:
prun = get_prun()
# res = main(prun)

In [20]:
def get_score_parametrized(cb_init, name=None):
    args = get_prun()
    args.cb_init = cb_init
    args.name = name
    main(args)
    p1 = L(args.history.values()).fit
    idx = p1.index(max(p1))
    sol = args.history[idx]
    scores = [fitness_multistep(args, sol.grn, args.steps)[0] for i in range(10)]
    print("Final score", np.mean(scores), "+-", np.std(scores))
    return np.mean(scores), np.std(scores), max(p1)

In [21]:
def mutate_grn_ctrl(grn):
    grn.set_mutable()
    one_gene = random.choice(grn.genes)
    mutate_gene4(one_gene)
    grn.compile()
    
def mutate_grn_sparse(grn, temperature=0.1):
    grn.set_mutable()
    shape = grn._params.shape
    r = random.random()
    param_prob = 0.8
    if r < param_prob:
        mask = (np.random.uniform(0, 1, shape) > 0.8)
        coeff = np.random.normal(0, temperature, shape)
        true_coeff = mask * coeff + 1
        grn._params *= true_coeff
    else:
        one_gene = random.choice(grn.genes)
        one_gene.tree = mutate_tree(one_gene.tree, one_gene.get_labels_not_in_tree())
    grn.compile()
    
def mutate_grn_verysparse(grn, temperature=0.1):
    grn.set_mutable()
    shape = grn._params.shape
    r = random.random()
    param_prob = 0.8
    if r < param_prob:
        mask = (np.random.uniform(0, 1, shape) > 0.9)
        coeff = np.random.normal(0, temperature, shape)
        true_coeff = mask * coeff + 1
        grn._params *= true_coeff
    else:
        one_gene = random.choice(grn.genes)
        one_gene.tree = mutate_tree(one_gene.tree, one_gene.get_labels_not_in_tree())
    grn.compile()
    
def mutate_grn_full(grn, temperature=0.1):
    grn.set_mutable()
    shape = grn._params.shape
    r = random.random()
    param_prob = 0.8
    if r < param_prob:
        grn._params *= np.random.normal(1, temperature, shape)
    else:
        one_gene = random.choice(grn.genes)
        one_gene.tree = mutate_tree(one_gene.tree, one_gene.get_labels_not_in_tree())
    grn.compile()

In [22]:
model = sm.StringModel("expfit_func{mutfunc}_i{i}")
mut_func = dict(
    ctrl=mutate_grn_ctrl,
    sparse=mutate_grn_sparse,
    verysparse=mutate_grn_verysparse,
    full=mutate_grn_full,
)

In [23]:
callback_init = dict(
    init=lambda: np.random.beta(1.5, 3) * 3,
    b=lambda: np.random.beta(1.5, 3) * 5,
    expr=lambda: 1,
    deg=lambda: 0.1,
)

In [24]:
# main loop
exporter = Exporter(name="exp_mutfunc4_230222")
for (funcname, func), i in product(mut_func.items(), range(10)):
    name = model.fill(mutfunc=funcname, i=i)
    print(name)
    if name in exporter.list():
        continue
    _MUTATE_FUNC = func
    res = get_score_parametrized(callback_init)
    exporter(res, name)

Exporting at output/exp_mutfunc4_230222
expfit_funcctrl_i0
expfit_funcctrl_i1
expfit_funcctrl_i2
expfit_funcctrl_i3
expfit_funcctrl_i4
expfit_funcctrl_i5
expfit_funcctrl_i6
expfit_funcctrl_i7
expfit_funcctrl_i8
expfit_funcctrl_i9
expfit_funcsparse_i0
expfit_funcsparse_i1
expfit_funcsparse_i2
expfit_funcsparse_i3
expfit_funcsparse_i4
expfit_funcsparse_i5
expfit_funcsparse_i6
expfit_funcsparse_i7
expfit_funcsparse_i8
expfit_funcsparse_i9
expfit_funcverysparse_i0
expfit_funcverysparse_i1
expfit_funcverysparse_i2
expfit_funcverysparse_i3
expfit_funcverysparse_i4
expfit_funcverysparse_i5
expfit_funcverysparse_i6
expfit_funcverysparse_i7
expfit_funcverysparse_i8
expfit_funcverysparse_i9
expfit_funcfull_i0
expfit_funcfull_i1
expfit_funcfull_i2
expfit_funcfull_i3
expfit_funcfull_i4
expfit_funcfull_i5
expfit_funcfull_i6
Step 0
++ Best 0.005700229783625332 for generation 0
++ Best 0.010222566327396274 for generation 1
++ Best 0.012683904517547643 for generation 52
Step 100
++ Best 0.015177086058

In [25]:
show_curve(sol.stats, REF.stats)

NameError: name 'sol' is not defined

In [26]:
dicts = [dict(**model.extract(x), **dict(zip(("mean", "sd", "max"), exporter.load(x)))) for x in exporter.list() if model.match(x)]

In [27]:
df = pd.DataFrame(dicts)

In [28]:
df.head()

Unnamed: 0,mutfunc,i,mean,sd,max
0,verysparse,3,0.01032,0.000836,0.011604
1,ctrl,5,0.029004,0.018015,0.079249
2,ctrl,4,0.03535,0.023815,0.055724
3,sparse,4,0.007565,0.00357,0.018477
4,sparse,2,0.030606,0.00855,0.025946


In [29]:
new_df = df.groupby(["mutfunc"]).mean()
new_df

Unnamed: 0_level_0,mean,sd,max
mutfunc,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
ctrl,0.020019,0.010299,0.040134
full,0.01638,0.005474,0.025236
sparse,0.015001,0.007552,0.029295
verysparse,0.021006,0.008715,0.036409


In [33]:
new_df["min_b"] = new_df["mean"] - new_df.sd / np.sqrt(10)
new_df["max_b"] = new_df["mean"] + new_df.sd / np.sqrt(10)

In [34]:
new_df

Unnamed: 0_level_0,mean,sd,max,min_b,max_b
mutfunc,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
ctrl,0.020019,0.010299,0.040134,0.016762,0.023276
full,0.01638,0.005474,0.025236,0.014649,0.018111
sparse,0.015001,0.007552,0.029295,0.012613,0.017389
verysparse,0.021006,0.008715,0.036409,0.01825,0.023762


In [30]:
for i, row in new_df.iterrows():
    print(row.name, row["mean"], row["mean"] - row.sd / np.sqrt(10), row["mean"] + row.sd / np.sqrt(10))

ctrl 0.0200190417567745 0.016762332534174626 0.02327575097937437
full 0.01638029520244837 0.014649358028289662 0.01811123237660708
sparse 0.01500115803583385 0.012613081363649907 0.017389234708017794
verysparse 0.021005735370942916 0.018249940130105537 0.023761530611780296


In [48]:
n = 10
row1, row2 = "verysparse", "sparse"
x1 = new_df.loc[row1, "mean"]
x2 = new_df.loc[row2, "mean"]
sd1 = new_df.loc[row1, "sd"]
sd2 = new_df.loc[row2, "sd"]
sp = np.sqrt((sd1**2 + sd2**2) / 2)
ts = (x1 - x2) / (sp * np.sqrt(2 / n))
ts

1.646646675643709

In [49]:
from scipy.stats import t

In [50]:
1 - t.cdf(ts, 19)

0.05803555220344847

In [None]:
row.name