In [1]:
# https://msprime.readthedocs.io/en/stable/tutorial.html#demography

In [1]:
from math import (exp, log)
import numpy as np
import msprime

In [2]:
RHO_HUMAN = 1.6*10e-9
MU_HUMAN = 1.25*10e-8
RHO_LIMIT = (log(RHO_HUMAN)-100, log(RHO_HUMAN)+100)
MU_LIMIT = (log(MU_HUMAN)-100, log(MU_HUMAN)+100)

LENGTH_NORMALIZE_CONST = 4
ZIPPED = False
NUMBER_OF_EVENTS_LIMITS = (1, 20)
MAX_T_LIMITS = (0.01, 30)
LAMBDA_EXP = 1.0
POPULATION_LIMITS = (250, 100000)
POPULATION = 5000

IS_SEMMETRIC = True

N = 20

RANDOM_SEED = 42
np.random.seed(RANDOM_SEED)

In [3]:
population_configurations = [
    msprime.PopulationConfiguration(
        sample_size=1, initial_size=100),
    msprime.PopulationConfiguration(
        sample_size=1, initial_size=200, growth_rate=-0.01),
    msprime.PopulationConfiguration(
        sample_size=1, initial_size=100),
]
# What's wrong with growth_rate? Why population does growth only if it's negative?

In [4]:
def create_simple_N_populations(number_of_populations: int, is_equale = True, sample_size=1) -> list:
    low, high = POPULATION_LIMITS
    if is_equale:
        pop_num = np.random.randint(low=low, high=high)
        pop = [msprime.PopulationConfiguration(sample_size=sample_size,initial_size=pop_num) 
               for i in range(number_of_populations)]
    else:
        pop = [msprime.PopulationConfiguration(sample_size=sample_size,
                                               initial_size=np.random.randint(low=low, high=high))
              for i in range(number_of_populations)]
    return pop

In [5]:
def create_N_population(populations_parametrs: dict) -> list:
    """
    population_parametrs: {population_name:int : [sample_size:int, initial_size:float, growth_rate:float]}
    """
    populations_configuration = []
    for population_name, parametrs in populations_parametrs.items():
        if len(parametrs)  == 3:
            sample_size, initial_size, growth_rate = parametrs
        elif len(parametrs)  == 2:
            sample_size, initial_size = parametrs
        else:
            raise f"Number ov parametrs error: expected 2 or 3, but got {len(parametrs)}"
        populations_configuration.append(
            msprime.PopulationConfiguration(
                sample_size=sample_size, 
                initial_size=initial_size, 
                growth_rate=growth_rate)
        )
    return populations_configuration

In [6]:
def create_random_migration_matrix(N: int, max_migration_rate, is_semmetric = IS_SEMMETRIC):
    migration_matrix = np.zeros(shape=(N,N))
    for i in range(N):
        for j in range(i + 1,N):
            migration_matrix[i][j] = np.random.random()*max_migration_rate
            if is_semmetric:
                migration_matrix[j][i] = migration_matrix[i][j]
            else:
                migration_matrix[j][i] = np.random.random()*max_migration_rate
    return migration_matrix

In [7]:
MAX_MIGRATION_RATE = 10
IS_SEMMETRIC = False
def create_MassMigration(time: float, number_of_populations: int, populations: list) -> list:
    if number_of_populations == 1:
        raise "Only one population"
    source, destination = 0,0
    while source == destination:
        source, destination = np.random.randint(number_of_populations,size=2)
    proportion = min(np.random.rand(), 1.0) # learn max of np.random.rand()
    return [msprime.MassMigration(time=time,source=source,destination=destination, proportion=proportion)]

def create_MigrationRateChange(time: float, number_of_populations: int, populations: list) -> list:
    rate = np.random.random()*MAX_MIGRATION_RATE
    i,j = np.random.randint(low=number_of_populations,size=(2))
    while i == j:
        i,j = np.random.randint(low=number_of_populations,size=(2))
    migration = [msprime.MigrationRateChange(time=time,rate=rate,matrix_index=(i,j))]
    if IS_SEMMETRIC:
        migration.append(msprime.MigrationRateChange(time=time,rate=rate,matrix_index=(j,i)))
    return migration
    
def create_PopulationParametersChange(time: float, number_of_populations: int, populations: list) -> list:
    population_id = np.random.randint(number_of_populations)
    size = int(np.random.beta(a=2, b=5) * populations[population_id])
    while size <= 0:
        size = int(np.random.beta(a=2, b=5) * populations[population_id])
    return [msprime.PopulationParametersChange(time=time, initial_size=size, population_id=population_id)]

events = {
    "MassMigration": create_MassMigration,
    "MigrationRateChange": create_MigrationRateChange,
    "PopulationParametersChange": create_PopulationParametersChange,
}

events = [create_MassMigration,create_MigrationRateChange,create_PopulationParametersChange]

def create_demographic_events(number_of_populations: int, populations: list) -> list:
    """
    """
    low, high = NUMBER_OF_EVENTS_LIMITS
    number_of_events = np.random.randint(low=low, high=high)
    times = sorted(np.random.exponential(LAMBDA_EXP, size=number_of_events))
    low, high = MAX_T_LIMITS
    max_t = np.random.uniform(low=low, high=high)
    
    alpha = 1.0
    beta = np.log(max_t + 1)/times[-1]

    def to_exp_time(time: float) -> float:
        # time -> exponentional time
        return alpha*(np.exp(beta*time) - 1)
    
    exp_times = [to_exp_time(t) for t in times]
    
    demographic_events = []
    for time in exp_times:
        event = np.random.choice(events)(time, number_of_populations, populations)
        for ev in event:
            demographic_events.append(ev)
    return demographic_events

In [8]:
np.random.randint(low=4,size=(2))

array([2, 3])

In [9]:
create_demographic_events(4,[10,10,10,10])

[{'type': 'mass_migration', 'time': 0.01687161377800961, 'source': 0, 'dest': 2, 'proportion': 0.6118528947223795},
 {'type': 'migration_rate_change', 'time': 0.04931128917693961, 'rate': 0.46665663213615427, 'matrix_index': (2, 1)},
 {'type': 'migration_rate_change', 'time': 0.14616603215110335, 'rate': 7.851759613930136, 'matrix_index': (2, 3)},
 {'type': 'population_parameters_change', 'time': 0.14619238103918386, 'growth_rate': None, 'initial_size': 1, 'population': 3},
 {'type': 'population_parameters_change', 'time': 0.17518463071828227, 'growth_rate': None, 'initial_size': 1, 'population': 1},
 {'type': 'population_parameters_change', 'time': 0.17701280666056074, 'growth_rate': None, 'initial_size': 3, 'population': 3},
 {'type': 'migration_rate_change', 'time': 0.21166959909811922, 'rate': 6.62522284353982, 'matrix_index': (1, 3)},
 {'type': 'migration_rate_change', 'time': 0.33883181466079626, 'rate': 0.31313292455558583, 'matrix_index': (3, 1)},
 {'type': 'migration_rate_chan

In [10]:
[list(i) for i in create_random_migration_matrix(4,1)]

[[0.0, 0.8631034258755935, 0.6232981268275579, 0.3308980248526492],
 [0.8631034258755935, 0.0, 0.06355835028602363, 0.3109823217156622],
 [0.6232981268275579, 0.06355835028602363, 0.0, 0.32518332202674705],
 [0.3308980248526492, 0.3109823217156622, 0.32518332202674705, 0.0]]

In [11]:
create_simple_N_populations(NB)

NameError: name 'NB' is not defined

In [12]:
NB = 10
dd = msprime.DemographyDebugger(
    population_configurations=create_simple_N_populations(NB),
    migration_matrix=create_random_migration_matrix(NB,1),
    demographic_events=create_demographic_events(NB,[10 for i in range(NB)])
)
dd.print_history()

Model =  hudson(reference_size=1)
Epoch: 0 -- 10.1350789903948 generations
     start     end      growth_rate |     0        1        2        3        4        5        6        7        8        9    
   -------- --------       -------- | -------- -------- -------- -------- -------- -------- -------- -------- -------- -------- 
0 |9.66e+04 9.66e+04              0 |     0      0.591    0.275    0.561    0.383    0.972    0.849    0.722    0.236    0.256  
1 |9.66e+04 9.66e+04              0 |   0.591      0     0.0404    0.711    0.111    0.439    0.202    0.896    0.475    0.563  
2 |9.66e+04 9.66e+04              0 |   0.275   0.0404      0      0.696    0.139    0.604    0.54     0.203    0.943    0.599  
3 |9.66e+04 9.66e+04              0 |   0.561    0.711    0.696      0      0.695    0.88     0.624    0.296    0.105    0.457  
4 |9.66e+04 9.66e+04              0 |   0.383    0.111    0.139    0.695      0      0.218    0.417    0.883    0.324    0.122  
5 |9.66e+04 9.66e+04  

In [43]:
dem = create_demographic_events(2,[1000,1000])
dem

[{'type': 'mass_migration', 'time': 0.03514325061615642, 'source': 0, 'dest': 1, 'proportion': 0.7896181427945539},
 {'type': 'mass_migration', 'time': 0.05616072165193242, 'source': 0, 'dest': 1, 'proportion': 0.9742482085344102},
 {'type': 'migration_rate_change', 'time': 0.06230201216744047, 'rate': 5.495288823237355, 'matrix_index': (1, 0)},
 {'type': 'mass_migration', 'time': 0.06843301296487625, 'source': 1, 'dest': 0, 'proportion': 0.10112267612279024},
 {'type': 'population_parameters_change', 'time': 0.24856982826748175, 'growth_rate': None, 'initial_size': 185, 'population': 1},
 {'type': 'mass_migration', 'time': 0.31711506408833134, 'source': 1, 'dest': 0, 'proportion': 0.5813054172267823},
 {'type': 'mass_migration', 'time': 0.3636972258143254, 'source': 0, 'dest': 1, 'proportion': 0.8127995672575026},
 {'type': 'population_parameters_change', 'time': 0.512782485733168, 'growth_rate': None, 'initial_size': 175, 'population': 0},
 {'type': 'population_parameters_change', 't

In [44]:
NB = 2
L = 100
generator = msprime.simulate(
        #sample_size=2,
        recombination_rate=RHO_HUMAN,
        mutation_rate=MU_HUMAN,
        random_seed=42,
        model="hudson",
        length=L,
        num_replicates=1,
        population_configurations=create_simple_N_populations(2),
        migration_matrix=create_random_migration_matrix(2,0.001),
        demographic_events=dem
)

In [45]:
try:
    replica = next(generator)
except StopIteration:
    raise StopIteration

# TODO Защита от записи в один и тот же участок генома 
haplotype = [0] * L

for mutation in replica.mutations():
    point = round(mutation.position)
    if point < L:
        haplotype[point] = 1
    else:
        haplotype[point - 1] = 1

recombination_points = []
coal_times = []
for tree in replica.trees():
    point = round(tree.get_interval()[0])
    if point not in recombination_points:
        recombination_points.append(point)
        coal_times.append(tree.total_branch_length /
                          LENGTH_NORMALIZE_CONST)

# haplotype = "".join([str(h) for h in haplotype])
times = [.0] * len(haplotype)
j_point = 0
j_time = -1
time = None
for i, _ in enumerate(times):
    if j_point < len(recombination_points):
        if i == recombination_points[j_point]:
            j_point += 1
            j_time += 1
            time = coal_times[j_time]
    times[i] = time

min_t = min(times)
max_t = max(times)

a = (-np.log(max_t) + N*np.log(min_t))/(N-1)
B = (-np.log(min_t) + np.log(max_t))/(N-1)

def to_T(time):
    return round((np.log(time)-a)/B)

step_of_discratization = max(times)/N
def discretization(t):
    return min(int(t/step_of_discratization) + 1, N)

#d_times = [discretization(t) for t in times]
d_times = [to_T(t) for t in times]



In [46]:
np.array(haplotype)

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

In [47]:
d_times

[inf,
 inf,
 inf,
 inf,
 inf,
 inf,
 inf,
 inf,
 inf,
 inf,
 inf,
 inf,
 inf,
 inf,
 inf,
 inf,
 inf,
 inf,
 inf,
 inf,
 inf,
 inf,
 inf,
 inf,
 inf,
 inf,
 inf,
 inf,
 inf,
 inf,
 inf,
 inf,
 inf,
 inf,
 inf,
 inf,
 inf,
 inf,
 inf,
 inf,
 inf,
 inf,
 inf,
 inf,
 inf,
 inf,
 inf,
 inf,
 inf,
 inf,
 inf,
 inf,
 inf,
 inf,
 inf,
 inf,
 inf,
 inf,
 inf,
 inf,
 inf,
 inf,
 inf,
 inf,
 inf,
 inf,
 inf,
 inf,
 inf,
 inf,
 inf,
 inf,
 inf,
 inf,
 inf,
 inf,
 inf,
 inf,
 inf,
 inf,
 inf,
 inf,
 inf,
 inf,
 inf,
 inf,
 inf,
 inf,
 inf,
 inf,
 inf,
 inf,
 inf,
 inf,
 inf,
 inf,
 inf,
 inf,
 inf,
 inf]

In [None]:
tr = []
for i in generator:
    tr.append(i)

In [None]:
for variant in tr[0].variants():
    print(
        variant.site.id, variant.site.position,
        variant.alleles, variant.genotypes, sep="\t")

In [None]:
i,j = np.random.randint(low=2,size=(2))

In [None]:
i,j

In [None]:
a = []
b = [1,2]
a.append(*b)

In [76]:
def give_two_populations(
    initial_size_1: int, initial_size_2: int,
    sample_size_1 = 1, sample_size_2 = 1,
    growth_rate_1 = None, growth_rate_2=None):
    
    return [
    msprime.PopulationConfiguration(
        sample_size=sample_size_1, 
        initial_size=initial_size_1,
        growth_rate=growth_rate_1
    ),
    msprime.PopulationConfiguration(
        sample_size=sample_size_2, 
        initial_size=initial_size_2,
        growth_rate=growth_rate_2
    ),
    ]

In [78]:
def give_initial_migration_matrix(a_b: float, b_a=None):
    if b_a is None:
        b_a = a_b
    return [[0, a_b],
           [b_a, 0]]

In [81]:
def give_mass_migration(time, source, destination, proportion):
    return msprime.MassMigration(
            time=time, source=source, destination=destination, proportion=proportion)

def give_migration_change_rate(time, rate, index=None):
    return msprime.MigrationRateChange(time=time, rete=rate, matrix_index=index)

def give_population_parametrs_change(time, initial_size=None,growth_rate=None,population_id=None):
    return  msprime.PopulationParametersChange(
        time=time, initial_size=initial_size, 
        growth_rate=growth_rate, population_id=population_id)

In [77]:
give_two_populations(100, None)

[<msprime.simulations.PopulationConfiguration at 0x7fe84a2d6eb8>,
 <msprime.simulations.PopulationConfiguration at 0x7fe84a2d6940>]

In [80]:
give_initial_migration_matrix(10,9)

[[0, 10], [9, 0]]