In [1]:
# https://msprime.readthedocs.io/en/stable/tutorial.html#demography

In [2]:
from math import (exp, log)
import numpy as np
import msprime

In [3]:
RHO_HUMAN = 1.6*10e-9
MU_HUMAN = 1.25*10e-8
RHO_LIMIT = (log(RHO_HUMAN)-100, log(RHO_HUMAN)+100)
MU_LIMIT = (log(MU_HUMAN)-100, log(MU_HUMAN)+100)

LENGTH_NORMALIZE_CONST = 4
ZIPPED = False
NUMBER_OF_EVENTS_LIMITS = (1, 20)
MAX_T_LIMITS = (0.01, 30)
LAMBDA_EXP = 1.0
POPULATION_LIMITS = (250, 100000)
POPULATION = 5000

IS_SEMMETRIC = True

N = 20

RANDOM_SEED = 42
np.random.seed(RANDOM_SEED)

In [48]:
population_configurations = [
    msprime.PopulationConfiguration(
        sample_size=1, initial_size=100),
    msprime.PopulationConfiguration(
        sample_size=1, initial_size=200, growth_rate=-0.01),
    msprime.PopulationConfiguration(
        sample_size=1, initial_size=100),
]
# What's wrong with growth_rate? Why population does growth only if it's negative?

In [62]:
def create_simple_N_populations(number_of_populations: int, is_equale = True, sample_size=1) -> list:
    low, high = POPULATION_LIMITS
    if is_equale:
        pop_num = np.random.randint(low=low, high=high)
        pop = [msprime.PopulationConfiguration(sample_size=sample_size,initial_size=pop_num) 
               for i in range(number_of_populations)]
    else:
        pop = [msprime.PopulationConfiguration(sample_size=sample_size,
                                               initial_size=np.random.randint(low=low, high=high))
              for i in range(number_of_populations)]
    return pop

In [5]:
def create_N_population(populations_parametrs: dict) -> list:
    """
    population_parametrs: {population_name:int : [sample_size:int, initial_size:float, growth_rate:float]}
    """
    populations_configuration = []
    for population_name, parametrs in populations_parametrs.items():
        if len(parametrs)  == 3:
            sample_size, initial_size, growth_rate = parametrs
        elif len(parametrs)  == 2:
            sample_size, initial_size = parametrs
        else:
            raise f"Number ov parametrs error: expected 2 or 3, but got {len(parametrs)}"
        populations_configuration.append(
            msprime.PopulationConfiguration(
                sample_size=sample_size, 
                initial_size=initial_size, 
                growth_rate=growth_rate)
        )
    return populations_configuration

In [6]:
def create_random_migration_matrix(N: int, max_migration_rate, is_semmetric = IS_SEMMETRIC):
    migration_matrix = np.zeros(shape=(N,N))
    for i in range(N):
        for j in range(i + 1,N):
            migration_matrix[i][j] = np.random.random()*max_migration_rate
            if is_semmetric:
                migration_matrix[j][i] = migration_matrix[i][j]
            else:
                migration_matrix[j][i] = np.random.random()*max_migration_rate
    return migration_matrix

In [49]:
MAX_MIGRATION_RATE = 10
IS_SEMMETRIC = False
def create_MassMigration(time: float, number_of_populations: int, populations: list) -> list:
    if number_of_populations == 1:
        raise "Only one population"
    source, destination = 0,0
    while source == destination:
        source, destination = np.random.randint(number_of_populations,size=2)
    proportion = min(np.random.rand(), 1.0) # learn max of np.random.rand()
    return [msprime.MassMigration(time=time,source=source,destination=destination, proportion=proportion)]

def create_MigrationRateChange(time: float, number_of_populations: int, populations: list) -> list:
    rate = np.random.random()*MAX_MIGRATION_RATE
    i,j = np.random.randint(low=number_of_populations,size=(2))
    while i == j:
        i,j = np.random.randint(low=number_of_populations,size=(2))
    migration = [msprime.MigrationRateChange(time=time,rate=rate,matrix_index=(i,j))]
    if IS_SEMMETRIC:
        migration.append(msprime.MigrationRateChange(time=time,rate=rate,matrix_index=(j,i)))
    return migration
    
def create_PopulationParametersChange(time: float, number_of_populations: int, populations: list) -> list:
    population_id = np.random.randint(number_of_populations)
    size = int(np.random.beta(a=2, b=5) * populations[population_id])
    while size <= 0:
        size = int(np.random.beta(a=2, b=5) * populations[population_id])
    return [msprime.PopulationParametersChange(time=time, initial_size=size, population_id=population_id)]

events = {
    "MassMigration": create_MassMigration,
    "MigrationRateChange": create_MigrationRateChange,
    "PopulationParametersChange": create_PopulationParametersChange,
}

events = [create_MassMigration,create_MigrationRateChange,create_PopulationParametersChange]

def create_demographic_events(number_of_populations: int, populations: list) -> list:
    """
    """
    low, high = NUMBER_OF_EVENTS_LIMITS
    number_of_events = np.random.randint(low=low, high=high)
    times = sorted(np.random.exponential(LAMBDA_EXP, size=number_of_events))
    low, high = MAX_T_LIMITS
    max_t = np.random.uniform(low=low, high=high)
    
    alpha = 1.0
    beta = np.log(max_t + 1)/times[-1]

    def to_exp_time(time: float) -> float:
        # time -> exponentional time
        return alpha*(np.exp(beta*time) - 1)
    
    exp_times = [to_exp_time(t) for t in times]
    
    demographic_events = []
    for time in exp_times:
        event = np.random.choice(events)(time, number_of_populations, populations)
        for ev in event:
            demographic_events.append(ev)
    return demographic_events

In [50]:
np.random.randint(low=4,size=(2))

array([0, 3])

In [51]:
create_demographic_events(4,[10,10,10,10])

[{'type': 'population_parameters_change', 'time': 0.01956859662892252, 'growth_rate': None, 'initial_size': 3, 'population': 2},
 {'type': 'migration_rate_change', 'time': 0.022988039024166795, 'rate': 8.849983565232812, 'matrix_index': (2, 3)},
 {'type': 'migration_rate_change', 'time': 0.05395594590070085, 'rate': 7.247667715287615, 'matrix_index': (3, 2)},
 {'type': 'population_parameters_change', 'time': 0.05664579892530064, 'growth_rate': None, 'initial_size': 4, 'population': 3},
 {'type': 'mass_migration', 'time': 0.09612919038762624, 'source': 1, 'dest': 3, 'proportion': 0.23295947475363377},
 {'type': 'population_parameters_change', 'time': 0.1746582467285307, 'growth_rate': None, 'initial_size': 3, 'population': 2},
 {'type': 'population_parameters_change', 'time': 0.26492347044265974, 'growth_rate': None, 'initial_size': 3, 'population': 1},
 {'type': 'mass_migration', 'time': 0.31651754316587843, 'source': 3, 'dest': 2, 'proportion': 0.39828738397563557},
 {'type': 'migrati

In [52]:
[list(i) for i in create_random_migration_matrix(4,1)]

[[0.0, 0.7433528228936241, 0.06498640948289403, 0.40158023642548746],
 [0.7433528228936241, 0.0, 0.8378257352832454, 0.230569195805019],
 [0.06498640948289403, 0.8378257352832454, 0.0, 0.8306425177506246],
 [0.40158023642548746, 0.230569195805019, 0.8306425177506246, 0.0]]

In [63]:
create_simple_N_populations(NB)

[<msprime.simulations.PopulationConfiguration at 0x7fe849ff06d8>,
 <msprime.simulations.PopulationConfiguration at 0x7fe849ff04e0>,
 <msprime.simulations.PopulationConfiguration at 0x7fe849ff01d0>,
 <msprime.simulations.PopulationConfiguration at 0x7fe849ff00b8>]

In [67]:
NB = 10
dd = msprime.DemographyDebugger(
    population_configurations=create_simple_N_populations(NB),
    migration_matrix=create_random_migration_matrix(NB,1),
    demographic_events=create_demographic_events(NB,[10 for i in range(NB)])
)
dd.print_history()

Model =  hudson(reference_size=1)
Epoch: 0 -- 0.0009210617686823408 generations
     start     end      growth_rate |     0        1        2        3        4        5        6        7        8        9    
   -------- --------       -------- | -------- -------- -------- -------- -------- -------- -------- -------- -------- -------- 
0 | 1.8e+04  1.8e+04              0 |     0      0.136    0.695    0.404    0.428    0.718    0.692    0.991    0.128    0.104  
1 | 1.8e+04  1.8e+04              0 |   0.136      0      0.724    0.578    0.274   0.0794   0.0857    0.894    0.192    0.323  
2 | 1.8e+04  1.8e+04              0 |   0.695    0.724      0      0.227    0.355   0.0694    0.519   0.0676     0.8     0.234  
3 | 1.8e+04  1.8e+04              0 |   0.404    0.578    0.227      0      0.54     0.88     0.651    0.533    0.324    0.333  
4 | 1.8e+04  1.8e+04              0 |   0.428    0.274    0.355    0.54       0      0.669    0.994    0.662    0.558    0.731  
5 | 1.8e+04  1.8e

     start     end      growth_rate |     0        1        2        3        4        5        6        7        8        9    
   -------- --------       -------- | -------- -------- -------- -------- -------- -------- -------- -------- -------- -------- 
0 | 1.8e+04  1.8e+04              0 |     0      0.136    0.695    0.404    0.428    0.718    0.692    0.991    0.128    0.104  
1 |    4        4                 0 |   0.136      0      0.724    0.578    0.274    5.06    0.0857    0.894    0.192    0.323  
2 |    2        2                 0 |   0.695    0.724      0      0.227    0.355   0.0694    0.519   0.0676     0.8     0.234  
3 |    2        2                 0 |   1.59     0.578    0.227      0      0.54     0.88     0.651    0.533    0.324    0.333  
4 | 1.8e+04  1.8e+04              0 |   0.428    0.274    0.355    0.54       0      0.669    0.994    0.662    0.558    0.731  
5 |    1        1                 0 |   0.718   0.0794    8.68     1.39     0.669      0      0.4

In [47]:
generator = msprime.simulate(
        #sample_size=2,
        recombination_rate=RHO_HUMAN,
        mutation_rate=0.1,
        random_seed=42,
        model="hudson",
        length=20,
        num_replicates=1,
        population_configurations=population_configurations,
        migration_matrix=create_random_migration_matrix(3,0.001),
        demographic_events=create_demographic_events(3,[10,10,10])
)

ValueError: migration matrix must be a N x N square matrix encoded as a list-of-lists, where N is the number of populations defined in the population_configurations. The diagonal elements of this matrix must be zero. For example, a valid matrix for a 3 population system is [[0, 1, 1], [1, 0, 1], [1, 1, 0]]

In [None]:
tr = []
for i in generator:
    tr.append(i)

In [None]:
for variant in tr[0].variants():
    print(
        variant.site.id, variant.site.position,
        variant.alleles, variant.genotypes, sep="\t")

In [None]:
i,j = np.random.randint(low=2,size=(2))

In [None]:
i,j

In [None]:
a = []
b = [1,2]
a.append(*b)