## Setup

In [1]:
%run setup.ipynb
%matplotlib notebook

In [2]:
# setup callset - needed for variant annotations
callset_pass = phase1_ar31.callset_pass
pos_pass = allel.SortedIndex(callset_pass['2L/variants/POS'])
ann_pass = callset_pass['2L/variants/ANN'][:][['Annotation', 'HGVS_p']]

In [3]:
# setup haplotypes
callset_phased = phase1_ar31.callset_phased
genotypes_phased = allel.GenotypeDaskArray(callset_phased['2L/calldata/genotype'])
pos_phased = allel.SortedIndex(callset_phased['2L/variants/POS'])
ref_phased = callset_phased['2L/variants/REF']
alt_phased = callset_phased['2L/variants/ALT']


In [4]:
pos_kdr_s = 2422651
pos_kdr_f = 2422652

In [5]:
# define region for EHH analysis
loc_region = pos_phased.locate_range(0, 4000000)
pos_phased_region = pos_phased[loc_region]
ref_phased_region = ref_phased[loc_region]
alt_phased_region = alt_phased[loc_region]
pos_phased_region

0,1,2,3,4,...,163958,163959,163960,163961,163962
44688,44691,44732,44736,44756,...,3997372,3997373,3997378,3997381,3997386


In [6]:
# locate the kdr variants within the haplotypes callset
idx_kdr_s = pos_phased_region.locate_key(pos_kdr_s)
idx_kdr_f = pos_phased_region.locate_key(pos_kdr_f)
idx_kdr_s, idx_kdr_f

(26000, 26001)

In [7]:
# locate the intersection - needed to tie in annotations
loc1, _ = pos_pass.locate_intersection(pos_phased_region)
np.count_nonzero(loc1)

163963

In [8]:
ann_phased_region = ann_pass[loc1]
ann_phased_region

array([(b'intergenic_region', b'.'), (b'intergenic_region', b'.'),
       (b'intergenic_region', b'.'), ...,
       (b'downstream_gene_variant', b'.'),
       (b'downstream_gene_variant', b'.'),
       (b'downstream_gene_variant', b'.')], 
      dtype=[('Annotation', 'S34'), ('HGVS_p', 'S14')])

In [9]:
collections.Counter(ann_phased_region['Annotation'])

Counter({b'3_prime_UTR_variant': 2941,
         b'5_prime_UTR_premature_start_codon_': 306,
         b'5_prime_UTR_variant': 1677,
         b'downstream_gene_variant': 18539,
         b'initiator_codon_variant': 3,
         b'intergenic_region': 54849,
         b'intragenic_variant': 48,
         b'intron_variant': 32362,
         b'missense_variant': 5805,
         b'missense_variant&splice_region_var': 70,
         b'splice_acceptor_variant&intron_var': 24,
         b'splice_donor_variant&intron_varian': 27,
         b'splice_region_variant': 36,
         b'splice_region_variant&intron_varia': 649,
         b'splice_region_variant&stop_retaine': 5,
         b'splice_region_variant&synonymous_v': 87,
         b'start_lost': 9,
         b'stop_gained': 37,
         b'stop_lost&splice_region_variant': 4,
         b'stop_retained_variant': 5,
         b'synonymous_variant': 8636,
         b'upstream_gene_variant': 37844})

In [10]:
# exclude cross parents
haps_phased_region = genotypes_phased[loc_region].to_haplotypes()[:, :-16].compute()
haps_phased_region

Unnamed: 0,0,1,2,3,4,...,1525,1526,1527,1528,1529,Unnamed: 12
0,0,0,0,0,0,...,0,0,0,0,0,
1,0,0,0,0,0,...,0,0,0,0,0,
2,0,0,0,0,0,...,0,0,0,0,0,
...,...,...,...,...,...,...,...,...,...,...,...,...
163960,0,0,0,0,0,...,0,0,0,0,0,
163961,0,0,0,0,0,...,0,0,0,0,0,
163962,0,0,0,0,0,...,0,0,0,0,0,


In [11]:
loc_haps_kdr_s = haps_phased_region[idx_kdr_s] == 1
np.count_nonzero(loc_haps_kdr_s)

430

In [12]:
loc_haps_kdr_f = haps_phased_region[idx_kdr_f] == 1
np.count_nonzero(loc_haps_kdr_f)

775

In [13]:
loc_haps_kdr_l = ~loc_haps_kdr_s & ~loc_haps_kdr_f
np.count_nonzero(loc_haps_kdr_l)

325

In [14]:
ac_phased_region = haps_phased_region.count_alleles(max_allele=1)
ac_phased_region

Unnamed: 0,0,1,Unnamed: 3
0,1529,1,
1,1528,2,
2,1528,2,
...,...,...,...
163960,1527,3,
163961,1529,1,
163962,1527,3,


In [15]:
region_vgsc = SeqFeature('2L', 2358158, 2431617, label='Vgsc')
region_vgsc

<SeqFeature 'Vgsc' 2L:2358158-2431617>

In [16]:
loc_vgsc = pos_phased_region.locate_range(region_vgsc.start, region_vgsc.end)
loc_vgsc

slice(24471, 26181, None)

In [17]:
haps_vgsc = haps_phased_region[loc_vgsc]
haps_vgsc

Unnamed: 0,0,1,2,3,4,...,1525,1526,1527,1528,1529,Unnamed: 12
0,0,0,0,0,0,...,0,0,0,0,0,
1,0,0,0,0,0,...,0,0,0,0,0,
2,0,0,0,0,0,...,0,0,0,0,0,
...,...,...,...,...,...,...,...,...,...,...,...,...
1707,0,0,0,0,0,...,0,0,0,0,0,
1708,0,0,0,0,0,...,0,0,0,0,0,
1709,0,0,0,0,0,...,0,0,0,0,0,


In [18]:
ac_vgsc = haps_vgsc.count_alleles(max_allele=1)
ac_vgsc

Unnamed: 0,0,1,Unnamed: 3
0,1529,1,
1,1457,73,
2,1528,2,
...,...,...,...
1707,1513,17,
1708,1525,5,
1709,1519,11,


In [19]:
ann_vgsc = ann_phased_region[loc_vgsc]

In [20]:
loc_vgsc_missense = (ann_vgsc['Annotation'] == b'missense_variant') & (ac_vgsc[:, 1] > 7)
np.count_nonzero(loc_vgsc_missense)

16

In [21]:
haps_vgsc_missense = haps_vgsc[loc_vgsc_missense]
haps_vgsc_missense

Unnamed: 0,0,1,2,3,4,...,1525,1526,1527,1528,1529,Unnamed: 12
0,0,0,0,0,0,...,0,0,0,0,0,
1,0,0,0,0,0,...,0,0,0,0,0,
2,0,0,0,0,0,...,0,0,0,0,0,
...,...,...,...,...,...,...,...,...,...,...,...,...
13,0,0,0,0,0,...,0,0,0,0,0,
14,0,0,0,1,0,...,0,0,0,0,0,
15,0,0,0,0,0,...,0,0,0,0,0,


In [22]:
lbl_vgsc_missense = [l[2:] for l in ann_vgsc[loc_vgsc_missense]['HGVS_p'].astype('U')]
lbl_vgsc_missense

['Arg254Lys',
 'Asp466His',
 'Thr791Met',
 'Leu995Ser',
 'Leu995Phe',
 'Ala1125Val',
 'Ile1527Thr',
 'Glu1597Gly',
 'Ala1746Ser',
 'Val1853Ile',
 'Ile1868Thr',
 'Pro1874Ser',
 'Pro1874Leu',
 'Phe1920Ser',
 'Ala1934Val',
 'Ile1940Thr']

In [23]:
def plot_haplotypes(ax, h, mut_labels):
    mycol = ['w', 'k'] 
    cake = mpl.colors.ListedColormap(mycol, name='mymap', N=3)
    ax.pcolormesh(np.asarray(h[::-1]), cmap=cake, vmin=0, vmax=2)
    ax.set_yticks(np.arange(h.shape[0])+.5)
    ax.set_yticklabels(mut_labels[::-1], family='monospace')
    ax.hlines(np.arange(h.shape[0]+1), 0, h.shape[1], color='k', lw=.5)
    ax.set_xlim(0, h.shape[1])
    ax.set_ylim(0, h.shape[0])


In [24]:
fig, ax = plt.subplots()
plot_haplotypes(ax, haps_vgsc_missense, lbl_vgsc_missense)

<IPython.core.display.Javascript object>

In [25]:
# define types of variants to include in EHH analysis - should be mostly neutral
loc_type_neutral = ((ann_phased_region['Annotation'] == b'intergenic_region') | 
                    (ann_phased_region['Annotation'] == b'intron_variant') |
                    (ann_phased_region['Annotation'] == b'downstream_gene_variant') |
                    (ann_phased_region['Annotation'] == b'upstream_gene_variant') |
                    (ann_phased_region['Annotation'] == b'synonymous_variant') |
                    (ann_phased_region['Annotation'] == b'3_prime_UTR_variant') |
                    (ann_phased_region['Annotation'] == b'5_prime_UTR_variant') 
                    )
np.count_nonzero(loc_type_neutral), loc_type_neutral.shape

(156848, (163963,))

In [26]:
# locate singletons - will exclude from EHH analysis
loc_sgl = ac_phased_region.min(axis=1) == 1
loc_nosgl = ac_phased_region.min(axis=1) > 1
np.count_nonzero(loc_sgl), np.count_nonzero(loc_nosgl), loc_nosgl.shape

(52221, 111611, (163963,))

In [27]:
# these are the variants to use for EHH
loc_ehh = loc_type_neutral & loc_nosgl
# patch back in kdr
loc_ehh[idx_kdr_s] = True
loc_ehh[idx_kdr_f] = True
np.count_nonzero(loc_ehh), loc_ehh.shape

(107533, (163963,))

In [28]:
# these are the variants to use for mutational distance
loc_mut = loc_sgl
# include non-neutral mutations
# loc_mut = loc_sgl | ~loc_type_neutral
# patch back in kdr
loc_mut[idx_kdr_s] = True
loc_mut[idx_kdr_f] = True
np.count_nonzero(loc_mut), loc_mut.shape

(52223, (163963,))

In [29]:
haps_mut = haps_phased_region[loc_mut]
haps_mut

Unnamed: 0,0,1,2,3,4,...,1525,1526,1527,1528,1529,Unnamed: 12
0,0,0,0,0,0,...,0,0,0,0,0,
1,0,0,0,0,0,...,0,0,0,0,0,
2,0,0,0,0,0,...,0,0,0,0,0,
...,...,...,...,...,...,...,...,...,...,...,...,...
52220,0,0,0,0,0,...,0,0,0,0,0,
52221,0,0,0,0,0,...,0,0,0,0,0,
52222,0,0,0,0,0,...,0,0,0,0,0,


In [30]:
haps_ehh = haps_phased_region[loc_ehh]
haps_ehh

Unnamed: 0,0,1,2,3,4,...,1525,1526,1527,1528,1529,Unnamed: 12
0,0,0,0,0,0,...,0,0,0,0,0,
1,0,0,0,0,0,...,0,0,0,0,0,
2,1,1,1,1,1,...,1,1,1,1,1,
...,...,...,...,...,...,...,...,...,...,...,...,...
107530,0,0,0,0,0,...,0,0,0,0,0,
107531,0,0,0,0,0,...,0,0,0,0,0,
107532,0,0,0,0,0,...,0,0,0,0,0,


In [32]:
pos_ehh = pos_phased_region[loc_ehh]
ref_ehh = ref_phased_region[loc_ehh]
alt_ehh = alt_phased_region[loc_ehh]
pos_ehh

0,1,2,3,4,...,107528,107529,107530,107531,107532
44691,44732,44756,44767,44872,...,3997366,3997367,3997373,3997378,3997386


In [33]:
pos_mut = pos_phased_region[loc_mut]
ref_mut = ref_phased_region[loc_mut]
alt_mut = alt_phased_region[loc_mut]
pos_mut

0,1,2,3,4,...,52218,52219,52220,52221,52222
44688,44736,44887,44904,44910,...,3997357,3997362,3997371,3997372,3997381


In [34]:
idx_kdr_s_ehh = pos_ehh.locate_key(pos_kdr_s)
idx_kdr_f_ehh = pos_ehh.locate_key(pos_kdr_f)
idx_kdr_s_ehh, idx_kdr_f_ehh

(15754, 15755)

In [35]:
idx_kdr_s_mut = pos_mut.locate_key(pos_kdr_s)
idx_kdr_f_mut = pos_mut.locate_key(pos_kdr_f)
idx_kdr_s_mut, idx_kdr_f_mut

(9840, 9841)

In [36]:
# read in haplotype metadata to get population
df_haplotypes = phase1_ar31.df_haplotypes
df_haplotypes = df_haplotypes[df_haplotypes.population != 'colony']
df_haplotypes.head()

Unnamed: 0_level_0,label,ox_code,population,label_aug,country,region,sex,m_s,kt_2la,kt_2rb
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
0,AB0085-Ca,AB0085-C,BFS,"AB0085-Ca [Burkina Faso, Pala, S, F]",Burkina Faso,Pala,F,S,2.0,2.0
1,AB0085-Cb,AB0085-C,BFS,"AB0085-Cb [Burkina Faso, Pala, S, F]",Burkina Faso,Pala,F,S,2.0,2.0
2,AB0087-Ca,AB0087-C,BFM,"AB0087-Ca [Burkina Faso, Bana, M, F]",Burkina Faso,Bana,F,M,2.0,1.0
3,AB0087-Cb,AB0087-C,BFM,"AB0087-Cb [Burkina Faso, Bana, M, F]",Burkina Faso,Bana,F,M,2.0,1.0
4,AB0088-Ca,AB0088-C,BFM,"AB0088-Ca [Burkina Faso, Bana, M, F]",Burkina Faso,Bana,F,M,2.0,0.0


In [37]:
# don't include KDR in EHH data
loc_ehh_left = slice(0, idx_kdr_s_ehh)
loc_ehh_right = slice(idx_kdr_f_ehh+1, None)
loc_ehh_left, loc_ehh_right

(slice(0, 15754, None), slice(15756, None, None))

In [38]:
# include KDR in mutations data
loc_mut_left = slice(0, idx_kdr_s_mut+1)
loc_mut_right = slice(idx_kdr_f_mut, None)
loc_mut_left, loc_mut_right

(slice(0, 9841, None), slice(9841, None, None))

In [39]:
haps_ehh_left = haps_ehh[loc_ehh_left]
pos_ehh_left = pos_ehh[loc_ehh_left]
haps_ehh_right = haps_ehh[loc_ehh_right]
pos_ehh_right = pos_ehh[loc_ehh_right]

In [40]:
haps_mut_left = haps_mut[loc_mut_left]
pos_mut_left = pos_mut[loc_mut_left]
haps_mut_right = haps_mut[loc_mut_right]
pos_mut_right = pos_mut[loc_mut_right]

In [41]:
haps_ehh_left.shape, haps_ehh_right.shape, haps_mut_left.shape, haps_mut_right.shape

((15754, 1530), (91777, 1530), (9841, 1530), (42382, 1530))

## Shared haplotype length

In [42]:
idx_right_sorted = haps_ehh_right.prefix_argsort()
haps_ehh_right_sorted = haps_ehh_right[:, idx_right_sorted]
haps_ehh_right_sorted

Unnamed: 0,0,1,2,3,4,...,1525,1526,1527,1528,1529,Unnamed: 12
0,0,0,0,0,0,...,1,1,1,1,1,
1,0,0,0,0,0,...,0,0,0,0,0,
2,0,0,0,0,0,...,0,0,0,0,0,
...,...,...,...,...,...,...,...,...,...,...,...,...
91774,0,0,0,0,0,...,0,0,0,0,0,
91775,0,0,0,0,0,...,0,0,0,0,0,
91776,0,0,0,0,0,...,0,0,0,0,0,


In [43]:
nspl_right = allel.opt.stats.neighbour_shared_prefix_lengths_int8(np.asarray(haps_ehh_right_sorted))
nspl_right

array([  99,  130,   40, ...,  680,    7, 1026], dtype=int32)

In [44]:
nspd_right = np.asarray(pos_ehh_right)[nspl_right] - pos_ehh_right[0]
nspd_right

array([  9105,  10166,   3537, ...,  97977,    851, 116145], dtype=int32)

In [45]:
n_muts_right = np.zeros_like(nspl_right)
for i in range(len(df_haplotypes) - 1):
    # neighbour shared prefix length
    nspl = nspl_right[i]
    # position where haplotypes diverge
    pos_div = pos_ehh_right[nspl]
    # index into mutations array where haplotypes diverge
    idx_mut_div = bisect.bisect_right(pos_mut_right, pos_div)
    # TODO check if nspl is at or beyond point of divergence
    # number of mutations
    n_muts = np.count_nonzero(haps_mut_right[:idx_mut_div, i] != haps_mut_right[:idx_mut_div, i+1])
    n_muts_right[i] = n_muts
    #print(i, nspl, pos_div, idx_mut_div, pos_mut_right[idx_mut_div-1], n_muts)

In [47]:
# 1 cM/Mb convert to M/bp
1 / (1e2 * 1e6)

1e-08

In [48]:
# assume constant recombination rate
rr = 1e-8
rr

1e-08

In [49]:
mu = 3.5e-9
mu

3.5e-09

In [51]:
fig = plt.figure(figsize=(8, 5))
gs = mpl.gridspec.GridSpec(nrows=4, ncols=1, height_ratios=(6, 3, .5, 6))

ax = fig.add_subplot(gs[0])
ax.set_title('Downstream flank')
sns.despine(ax=ax, offset=5, bottom=True)
# N.B., values are distances shared between haplotypes
x = np.arange(0, len(df_haplotypes))[:-1] + .5
y = nspd_right
ax.plot(x, y, color='k', lw=.5)
ax.set_yscale('log')
ax.set_ylim(1e3, 2e6)
ax.set_xlim(0, len(df_haplotypes))
ax.set_xticks([])
ax.set_ylabel('Shared haplotype length (bp)')

ax = fig.add_subplot(gs[1])
sns.despine(ax=ax, offset=5, bottom=True)
x = np.arange(0, len(df_haplotypes))[:-1] + .5
t_hat = (1 + n_muts_right) / (2 * (nspd_right * rr + nspd_right * mu))
ax.bar(x, t_hat, width=1, align='edge', color='k')
ax.set_xlim(0, len(df_haplotypes))
ax.set_xticks([])
ax.set_yscale('log')
ax.set_ylim(2e1, 2e5)
ax.set_ylabel('$\hat{t}$', rotation=0, ha='right', va='center')

ax = fig.add_subplot(gs[2])
sns.despine(ax=ax, left=True, bottom=True)
pops_right = df_haplotypes.population[idx_right_sorted]
pop_colors_right = [phase1_ar3.pop_colors[p] for p in pops_right]
ax.broken_barh([(i, 1) for i in range(len(df_haplotypes))], yrange=(0, 1), color=pop_colors_right)
ax.set_xlim(0, len(df_haplotypes))
ax.set_xticks([])
ax.set_yticks([])
ax.set_ylabel('Population', rotation=0, ha='right', va='center')

ax = fig.add_subplot(gs[3])
plot_haplotypes(ax, haps_vgsc_missense[:, idx_right_sorted], lbl_vgsc_missense)
ax.set_xlim(0, len(df_haplotypes))
ax.set_xlabel('Haplotypes (sorted by prefix)')

fig.tight_layout(h_pad=0);

<IPython.core.display.Javascript object>



In [52]:
idx_left_sorted = haps_ehh_left[::-1].prefix_argsort()
haps_ehh_left_sorted = haps_ehh_left[::-1, idx_left_sorted]
haps_ehh_left_sorted

Unnamed: 0,0,1,2,3,4,...,1525,1526,1527,1528,1529,Unnamed: 12
0,0,0,0,0,0,...,0,0,0,1,1,
1,0,0,0,0,0,...,0,1,1,0,0,
2,0,0,0,0,0,...,1,0,0,0,0,
...,...,...,...,...,...,...,...,...,...,...,...,...
15751,0,0,0,0,0,...,1,1,1,0,0,
15752,0,0,0,0,0,...,0,0,0,0,0,
15753,0,0,0,0,0,...,0,0,0,0,0,


In [53]:
nspl_left = allel.opt.stats.neighbour_shared_prefix_lengths_int8(np.asarray(haps_ehh_left_sorted))
nspl_left

array([14900, 10152,  8213, ...,  5231,     0,   212], dtype=int32)

In [54]:
nspd_left = pos_ehh_left[-1] - np.asarray(pos_ehh_left)[pos_ehh_left.shape[0] - nspl_left.clip(min=1)]
nspd_left

array([2165613, 1337241,  930527, ...,  581041,       0,   15826], dtype=int32)

In [55]:
n_muts_left = np.zeros_like(nspl_left)
for i in range(len(df_haplotypes) - 1):
    # neighbour shared prefix length
    nspl = nspl_left[i].clip(min=1)
    # position where haplotypes diverge
    pos_div = pos_ehh_left[-nspl]
    # index into mutations array where haplotypes diverge
    idx_mut_div = bisect.bisect_right(pos_mut_left, pos_div)
    # TODO check if nspl is at or beyond point of divergence
    # number of mutations
    n_muts = np.count_nonzero(haps_mut_left[idx_mut_div:, i] != haps_mut_left[idx_mut_div:, i+1])
    n_muts_left[i] = n_muts
#     print(i, nspl, pos_div, idx_mut_div, pos_mut_left[idx_mut_div], n_muts)

In [56]:
fig = plt.figure(figsize=(8, 5))
gs = mpl.gridspec.GridSpec(nrows=4, ncols=1, height_ratios=(6, 3, .5, 6))

ax = fig.add_subplot(gs[0])
ax.set_title('Upstream flank')
sns.despine(ax=ax, offset=5, bottom=True)
# N.B., values are distances shared between haplotypes
x = np.arange(0, len(df_haplotypes))[:-1] + .5
y = nspd_left
ax.plot(x, y, color='k', lw=.5)
ax.set_yscale('log')
ax.set_ylim(1e3, 3e6)
ax.set_xlim(0, len(df_haplotypes))
ax.set_xticks([])
ax.set_ylabel('Shared haplotype length (bp)')

ax = fig.add_subplot(gs[1])
sns.despine(ax=ax, offset=5, bottom=True)
x = np.arange(0, len(df_haplotypes))[:-1] + .5
t_hat = (1 + n_muts_left) / (2 * (nspd_left * rr + nspd_left * mu))
ax.bar(x, t_hat, width=1, align='edge', color='k')
ax.set_xlim(0, len(df_haplotypes))
ax.set_xticks([])
ax.set_yscale('log')
ax.set_ylim(2e1, 2e5)
ax.set_ylabel('$\hat{t}$', rotation=0, ha='right', va='center')

ax = fig.add_subplot(gs[2])
sns.despine(ax=ax, left=True, bottom=True)
pops_left = df_haplotypes.population[idx_left_sorted]
pop_colors_left = [phase1_ar3.pop_colors[p] for p in pops_left]
ax.broken_barh([(i, 1) for i in range(len(df_haplotypes))], yrange=(0, 1), color=pop_colors_left)
ax.set_xlim(0, len(df_haplotypes))
ax.set_yticks([])
ax.set_ylabel('Population', rotation=0, ha='right', va='center')
ax.set_xticks([])

ax = fig.add_subplot(gs[3])
plot_haplotypes(ax, haps_vgsc_missense[:, idx_left_sorted], lbl_vgsc_missense)
ax.set_xlim(0, len(df_haplotypes))
ax.set_xlabel('Haplotypes (sorted by suffix)')

fig.tight_layout(h_pad=0);

<IPython.core.display.Javascript object>



In [57]:
fig = plt.figure()
ax = fig.add_subplot(111)

x = nspd_right
t_hat = (1 + n_muts_right) / (2 * (nspd_right * rr + nspd_right * mu))
ax.plot(x, t_hat, marker='o', mfc='none', mec='g', markersize=3, linestyle=' ', label='Downstream')

x = nspd_left
t_hat = (1 + n_muts_left) / (2 * (nspd_left * rr + nspd_left * mu))
ax.plot(x, t_hat, marker='o', mfc='none', mec='b', markersize=3, linestyle=' ', label='Upstream')

ax.set_xscale('log')
ax.set_yscale('log')
ax.set_xlabel('Shared haplotype length (bp)')
ax.set_ylabel('$\hat{t}$', rotation=0, ha='right')
ax.legend();


<IPython.core.display.Javascript object>

  """
  if __name__ == '__main__':


In [58]:
fig = plt.figure()
ax = fig.add_subplot(111)
x = nspd_left
t_hat = (1 + n_muts_left) / (2 * (nspd_left * rr + nspd_left * mu))
ax.hist(t_hat[(nspd_left > 0)], bins=np.logspace(1, 6, 80), histtype='step', lw=1, label='Upstream')
t_hat = (1 + n_muts_right) / (2 * (nspd_right * rr + nspd_right * mu))
ax.hist(t_hat[(nspd_right > 0)], bins=np.logspace(1, 6, 80), histtype='step', lw=1, label='Downstream')
ax.set_xscale('log')
ax.set_xlabel('$\hat{t}$')
ax.set_ylabel('Frequency')
ax.legend();


<IPython.core.display.Javascript object>

  after removing the cwd from sys.path.
  


## Sandbox