# Goal : implementing multiprocessing

In [1]:
import os
os.environ["XLA_PYTHON_CLIENT_MEM_FRACTION"] = ".20"

In [2]:
import pandas as pd
from model import Brain
from submodels import factories
import matplotlib.pyplot as plt
import pandas as pd
from itertools import accumulate
import numpy as np
from collections import defaultdict

from jf.db import DB
from lib.plot import plot_2d_positions_colors
from lib.sde.grn2 import GRNMain2
from lib.sde.mutate import mutate_grn2
from lib.ga.utils import weighted_selection
from lib.ga.objective import Objective
from jf.profiler import Profiler
from jf.utils.export import Exporter
from jf.autocompute.jf import O

In [3]:
HISTORY = defaultdict(dict)
HALL_OF_FAME = []

In [4]:
REF = pd.read_csv("output/results/setup_basic/export/ref_basic2.csv")  # ref is a mean

In [5]:
def indiv_gen_provider(n=1):
    mutate_func = mutation_func_provider(n)
    def individual_generator():
        return Solution(GRNMain2(5, 0, 5), mutate_func)
    return individual_generator

In [6]:
def mutation_func_provider(n=1):
    def mutate_grn(grn):
        for i in range(n):
            mutate_grn2(grn)
    return mutate_grn

In [7]:
def r():
    return np.random.uniform(0, 1)

class Solution:
    def __init__(self, grn, mutation_func):
        self.grn = grn
        self.mutation_func = mutation_func
        
    def copy(self):
        return Solution(self.grn.copy(), self.mutation_func)
        
    def mutate(self):
        self.mutation_func(self.grn)

In [8]:
def shrink_and_align_stats(stats, ref, max_step=None, min_step=None):
    min_data_step = max(min(stats.time), min(ref.time))
    if min_step is None:
        min_step = min_data_step
    else:
        min_step = max(min_step, min_data_step)

    if max_step is None:
        max_step = max(ref.time)
    else:
        max_step = min(max_step, max(ref.time))
    
    new_stats = stats[(stats.time >= min_step) & (stats.time <= max_step)].set_index("time")
    new_ref = ref[(ref.time >= min_step) & (ref.time <= max_step)].set_index("time")
    
    return new_stats, new_ref

In [9]:
def preprocess_progenitor_size(stats, ref):
    colname_ref = "progenitor_pop_size"
    colname_stats = "progenitor_pop_size"
    
    min_step = min(ref.index)  # ref.index is the time
    stats[colname_stats] = stats[colname_stats] / stats[colname_stats].get(min_step)
    ref[colname_ref] = ref[colname_ref] / ref[colname_ref].get(min_step)
    x = [ref[colname_ref].get(t, 0) for t in ref.index]
    y = [stats[colname_stats].get(t, 0) for t in ref.index]
    return x, y

def preprocess_whole_size(stats, ref):
    colname_ref = "whole_pop_size"
    colname_stats = "whole_pop_size"
    
    min_step = min(ref.index)  # ref.index is the time
    stats[colname_stats] = stats[colname_stats] / stats[colname_stats].get(min_step)
    ref[colname_ref] = ref[colname_ref] / ref[colname_ref].get(min_step)
    x = [ref[colname_ref].get(t, 0) for t in ref.index]
    y = [stats[colname_stats].get(t, 0) for t in ref.index]
    return x, y

In [10]:
def score_coefficient_variation(x, y):
    return sum([(x_ - y_)**2 / x_ for x_, y_ in zip(x, y)])

In [11]:
def score_progenitor_size(stats, ref, max_step=None, min_step=None):
    stats, ref = shrink_and_align_stats(stats, ref, max_step, min_step)
    x, y = preprocess_progenitor_size(stats, ref)
    return score_coefficient_variation(x, y)

def score_whole_size(stats, ref, max_step=None, min_step=None):
    stats, ref = shrink_and_align_stats(stats, ref, max_step, min_step)
    x, y = preprocess_whole_size(stats, ref)
    return score_coefficient_variation(x, y)

def score_both_size(stats, ref, max_step=None, min_step=None):
    return score_progenitor_size(stats, ref, max_step, min_step) \
            + score_whole_size(stats, ref, max_step, min_step)

In [12]:
def run_grn(prun, grn):
    get_bb(prun, grn)
    bb.run()
    return bb

In [13]:
def get_bb(prun, grn):
    ccls = factories["grn2_opti"](grn=grn)
    bb = Brain(time_step=0.125, verbose=False, start_population=5, max_pop_size=1e3,
            cell_cls=ccls, end_time=prun.end_time, start_time=50, silent=True)
    return bb

In [14]:
def fitness_func(prun, grn, score_func):
    bb = run_grn(prun, grn)
    output = score_func(bb.stats, REF, max_step=prun.end_time)
    fitness = 1.0 / output
    return fitness

In [15]:
def fitness_multistep(prun, grn, steps):
    total_fitness = 0
    stop = False
    previous_time = None
    bb = get_bb(prun, grn)
    # first step
    for step in steps:
        if not bb.run_until(step.end_time):
            stop = True
        score_step = score_both_size(bb.stats, prun.ref, max_step=step.end_time, min_step=previous_time)
        fitness_step = 1.0 / score_step
        fitness_step = min(fitness_step, step.max_fitness)
        total_fitness += fitness_step
        if fitness_step < step.min_fitness or stop:
            return total_fitness, bb.stats
        previous_time = step.end_time
        
    return total_fitness, bb.stats

def score_multistep(prun, stats, steps):
    total_fitness = 0
    stop = False
    previous_time = None
    # first step
    for step in steps:
        score_step = score_both_size(stats, prun.ref, max_step=step.end_time, min_step=previous_time)
        fitness_step = 1.0 / score_step
        fitness_step = min(fitness_step, step.max_fitness)
        total_fitness += fitness_step
        if fitness_step < step.min_fitness or stop:
            return total_fitness
        previous_time = step.end_time
        
    return total_fitness

In [16]:
def mean_sd_fitness(prun, grn, run=3):
    fitnesses = [fitness_multistep(prun, grn) for i in range(run)]
    return np.mean(fitnesses), np.std(fitnesses)

In [17]:
def do_init_pop(prun):
    return [prun.individual_generator() for i in range(prun.pop_size)]

def do_fitness(prun, pop):
    # fitness = [fitness_func(prun, sol.grn, score_func) for sol in pop]
    # fitness = [fitness_strategy(prun, sol.grn) for sol in pop]
    fitness, stats = zip(*[fitness_multistep(prun, sol.grn, prun.steps) for sol in pop])
    return fitness, stats

def do_selection(prun, pop_fit, pop):
    # print("Fit score : ", pop_fit)
    acc = list(accumulate(pop_fit))
    best = max(pop_fit)
    best_id = pop_fit.index(best)
    
    # print("Total fitness :", acc[-1])
    
    pop_sel, history_sel = weighted_selection(pop, pop_fit, prun.individual_generator, new_fitness=0.3)
        
    return pop_sel, history_sel, best_id

def do_mutation(prun, pop_sel):
    [p.mutate() for p in pop_sel]
    return pop_sel

In [18]:
class ObjectiveStep(O):
    end_time = 0
    max_fitness = 4
    min_fitness = 1
    
example_steps = [
    ObjectiveStep(end_time=53),
    ObjectiveStep(end_time=56),
    ObjectiveStep(end_time=59),
    ObjectiveStep(end_time=62),
    ObjectiveStep(end_time=65),
    ObjectiveStep(end_time=68),
    ObjectiveStep(end_time=71),
    ObjectiveStep(end_time=74),
    ObjectiveStep(end_time=77),
    ObjectiveStep(end_time=80),
    ObjectiveStep(end_time=83),
    ObjectiveStep(end_time=86)
]

class ParamRun(O):
    pop_size = 10
    n_gen = 30
    current_gen = 0
    end_time = 83
    ref = REF

args = ParamRun()
args.steps = example_steps

In [19]:
def main(prun):
    # exporter = Exporter()
    best = 0
    pop = do_init_pop(prun)
    for generation in range(prun.n_gen):
        # args.generation = generation
        # objective.new_trial()
        fit, stats = do_fitness(prun, pop)
        # objective.best_current(max(fit))
        
        # TODO get the stats associated with the best scores
        sel, history_sel, best_id = do_selection(prun, fit, pop)
        if fit[best_id] > best:
            # print(f"++ Best {fit[best_id]}")
            best = fit[best_id]
            
        pop = do_mutation(prun, sel)
        
        if best > 10:
            break
            
    print(generation)
    return generation

In [20]:
args.individual_generator = indiv_gen_provider(1)
res_1 = [main(args) for i in range(10)]

23
4
9
17
10
22
29
2
10
29


In [21]:
args.individual_generator = indiv_gen_provider(2)
res_2 = [main(args) for i in range(10)]

10
9
6
1
5
27
6
19
2
29


In [22]:
args.individual_generator = indiv_gen_provider(3)
res_3 = [main(args) for i in range(10)]

12
14
11
21
22
27
5
15
13
1


In [23]:
args.individual_generator = indiv_gen_provider(5)
res_5 = [main(args) for i in range(10)]

19
4
5
1
9
7
4
7
1
9


In [26]:
all_res = [res_1, res_2, res_3, res_5]
all_res

[[23, 4, 9, 17, 10, 22, 29, 2, 10, 29],
 [10, 9, 6, 1, 5, 27, 6, 19, 2, 29],
 [12, 14, 11, 21, 22, 27, 5, 15, 13, 1],
 [19, 4, 5, 1, 9, 7, 4, 7, 1, 9]]

In [27]:
for res in all_res:
    print(np.mean(res), "\t", np.std(res))

15.5 	 9.394147114027968
11.4 	 9.562426470305537
14.1 	 7.395268757793729
6.6 	 4.94368283772331


Conclusion : 1 appears to be the less efficient, while 5 appears the most. However, the value of 2 and 3 seems inconsistent with the fact that the more (below 5) there are mutations, the quicker it converges.
But we have to keep in mind that the population was small here and that the number of genes also.
That's why I suggest putting a variable mutation rate as a poisson function for a value that scales with the number of genes

In [31]:
import sys
from pathlib import Path as P

In [36]:
def is_local_module(module):
    try:
        root = str(P(os.path.realpath(__file__)).parent)
    except NameError:
        root = str(os.getcwd())
    try:
        res = str(module.__file__).startswith(root)
    except AttributeError:
        res = False
        
    return res


In [37]:
local_modules = list(filter(lambda x: is_local_module(x), sys.modules.values()))

In [39]:
local_modules[0]

<module 'lib' from '/home/nathan/other/thesis_nathan/EmbryonicCortexModelling/grn/lib/__init__.py'>

In [40]:
mod = local_modules[0]

In [44]:
import inspect
inspect.getsourcefile(mod)

'/home/nathan/other/thesis_nathan/EmbryonicCortexModelling/grn/lib/__init__.py'