In [1]:
import msprime
import numpy as np
import statistics
import math
import allel
import pandas as pd
import statsmodels.api as sm
from scipy import (stats,ndimage)

In [3]:
print("loading data files")
#load adjacency matrix
#this will be the base for our migration matrix
adj_mat=np.genfromtxt("../../../ukmap/uk_nuts2_adj.txt",delimiter=",")

#load column names and rownames for matrix
mig_mat_id=pd.read_csv("../../../ukmap/uk_nuts2_adj_ids.txt",header=None,names=["deme_id"],sep="\t")

#multiply with scaling factor - the migration rate
mig_mat=0.08*adj_mat

#add columns and rows for ancient populations (with zeros)
mig_mat = np.vstack(( mig_mat, np.zeros( (3,35) )))
mig_mat = np.column_stack( (mig_mat, np.zeros((38,3)) ))

bplace_summary=pd.read_csv("../../../gwas/ukb/popfiles/ukb_ss250_d35.weighted.pop",sep="\t")

#count number of individuals in each deme
bplace_summary=bplace_summary.groupby("deme").size().reset_index(name="ninds")

#merge migmat_ids with bplace_summary to preserve the same order as the migration matrix
bplace_summary2=mig_mat_id.merge(bplace_summary,left_on="deme_id",right_on="deme")

#number of demes. should be the same as rows of blace_summary
d=35


loading data files


In [4]:
sample_sizes = bplace_summary2['ninds']

population_configurations = [
msprime.PopulationConfiguration(sample_size=k)
for k in sample_sizes]

population_configurations.extend([msprime.PopulationConfiguration(sample_size=0)]*3)

In [6]:
np.sum(sample_sizes)

17514

In [7]:

############ set up the demography

demog_list=[
    #change migration rate to 0 on the 100th generation
    [msprime.MigrationRateChange(time=100,rate=0)],
    #move lineages to the north (deme = 0) or south (deme = 35) to create N-S gradient
    [msprime.MassMigration(time=100, source=i, destination=0, proportion=1.0) for i in range(1,6)],
    [msprime.MassMigration(time=100, source=i, destination=0, proportion=0.8) for i in range(6,12)],
    [msprime.MassMigration(time=100, source=i, destination=0, proportion=0.6) for i in range(12,18)],
    [msprime.MassMigration(time=100, source=i, destination=0, proportion=0.4) for i in range(18,24)],
    [msprime.MassMigration(time=100, source=i, destination=0, proportion=0.2) for i in range(24,30)],
    [msprime.MassMigration(time=100.1, source=i, destination=34, proportion=1.0) for i in range(6,12)],
    [msprime.MassMigration(time=100.1, source=i, destination=34, proportion=1.0) for i in range(12,18)],
    [msprime.MassMigration(time=100.1, source=i, destination=34, proportion=1.0) for i in range(18,24)],
    [msprime.MassMigration(time=100.1, source=i, destination=34, proportion=1.0) for i in range(24,30)],
    [msprime.MassMigration(time=100.1, source=i, destination=34, proportion=1.0) for i in range(30,34)],

    #ancient history
    #t 4500: Migrate lineages from 35 > 36 (WHG-south to Steppe) & from 0 > 36 (WHG1 to steppe)
    [msprime.MassMigration(time=4500, source=34, destination=35, proportion=0.2)],
    [msprime.MassMigration(time=4501, source=0, destination=35, proportion=0.5)],

    #t 7510: Migrate lineages from 35 > 38 (WHG-south > EF) & 0>38 (WHG-n > EF)
    [msprime.MassMigration(time=7510, source=34, destination=37, proportion=0.75)],
    [msprime.MassMigration(time=7511, source=0, destination=37, proportion=0.4)],

    #t 9000: Migrate lineages 36 > 37: steppe to EF & WHG1 and WHG2 merge
    [msprime.MassMigration(time=9000, source=35, destination=36, proportion=0.5)],
    [msprime.MassMigration(time=9001, source=34, destination=0, proportion=1)],

    #t 25k: Migrate lineages 36 > 38, steppe & EF merge
    [msprime.MassMigration(time=25000, source=35, destination=37, proportion=1)],

    #t 30k: Migrate lineages 0 > 37: HG and basal Eurasians merge
    [msprime.MassMigration(time=30000, source=0, destination=36, proportion=1)],

    #t 45k: Migrate lineages 37 > 38: HG and basal Eurasians merge
    [msprime.MassMigration(time=45000, source=36, destination=37, proportion=1)]]

demog = [item for sublist in demog_list for item in sublist]

demog = [item for sublist in demog_list for item in sublist]



In [8]:
ts=msprime.simulate(Ne=10000,
                      population_configurations=population_configurations,
                      migration_matrix=mig_mat,
                      mutation_rate=1e-08,
                      recombination_rate=1e-08,
                      length=100000,
                   demographic_events=demog)

In [9]:
ts.num_samples

17514

In [12]:
import pandas as pd
bplace_summary=pd.read_csv("../../../gwas/ukb/popfiles/ukb_ss250_d35.weighted.pop",sep="\t")

In [13]:
bplace_summary.groupby("deme").size().reset_index(name="ninds")

Unnamed: 0,deme,ninds
0,UKC1,956
1,UKC2,1460
2,UKD1,100
3,UKD3,1516
4,UKD4,356
5,UKD6,264
6,UKD7,1248
7,UKE1,150
8,UKE2,226
9,UKE3,1004
