# 7FEB23 Figure Code
Module: Introduction to Coalescent Theory & Tree-Thinking \
Topic: The Wright-Fisher model and the standard coalescent.

In [20]:
# Import modules.
import ipcoal
import numpy as np
import msprime
import toyplot
import toyplot.svg
import toytree
import helper_functions as hf
# Print versions.
print('toyplot', toyplot.__version__)
print('numpy', np.__version__)

toyplot 1.0.3
numpy 1.23.5


## Plot the WF Process
Time in this model flows from time = 0 generations ago (ie the present) to time = 0 + _n_ generations ago (ie the past) and tracks the ancestors of each gene copy, as well as all other gene copies that did not leave descendants in each past generation.

In [2]:
# Plot an empty WF diagram.
wf = hf.WrightFisherPlot(seed=42, time=3, popsize=5, width=700, height=500)
wf.add_diploids()
wf.add_haploids(color=toyplot.color.Palette()[0], size=8)

In [3]:
# Save as a svg.
toyplot.svg.render(wf.canvas, '../figures/7feb23_wf_3gens_empty.svg')

In [4]:
# Overlay the inheritance from generation to genertaion.
wf = hf.WrightFisherPlot(seed=42, time=3, popsize=5, width=700, height=500)
wf.add_diploids()
wf.add_lines(sort=True)
wf.add_haploids(color=toyplot.color.Palette()[0], size=8)

In [5]:
# Save as a svg.
toyplot.svg.render(wf.canvas, '../figures/7feb23_wf_3gens_sorted.svg')

In [6]:
# Overlay A genaology where coalescence occurs in the next generation.
wf = hf.WrightFisherPlot(seed=42, time=3, popsize=5, width=700, height=500)
wf.add_diploids()
wf.add_lines(sort=True, eopacity=0.25)
wf.add_sampled_lines(samples=[4, 5], ecolor='black', ewidth=2.5, vsize=10)
wf.add_haploids(color=toyplot.color.Palette()[0], size=8, opacity=0.25)

In [7]:
# Save as a svg.
toyplot.svg.render(wf.canvas, '../figures/7feb23_coal_1gen_ago.svg')

In [8]:
# Overlay A genaology where coalescence occurs in the last generation.
wf = hf.WrightFisherPlot(seed=42, time=3, popsize=5, width=700, height=500)
wf.add_diploids()
wf.add_lines(sort=True, eopacity=0.25)
wf.add_sampled_lines(samples=[4, 6], ecolor='black', ewidth=2.5, vsize=10)
wf.add_haploids(color=toyplot.color.Palette()[0], size=8, opacity=0.25)

In [9]:
# Save as a svg.
toyplot.svg.render(wf.canvas, '../figures/7feb23_coal_2gens_ago.svg')

## Simulate and visualize the allele frequency change over 100 generations for a population of 100 diploids for 100 replicate simulations

In [10]:
# Intialize the number of replicate simulations.
reps = 100
# Intialize the population size.
N = 100
# Intialize the starting frequency.
p = 0.5
# Intialize the number of generations to simulate.
gens = 100
# Intialize a random seed for reporducibility.
np.random.seed(42)
# Intailize a matrix to store the simulated results.
freq_mat = np.zeros((reps, gens))
# Set the first generation to the starting frequency.
freq_mat[:, 0] = p
# For every replicate...
for rep in range(reps):
    # For every generation...
    for gen in range(gens - 1):
        # Sample two alleles per diploid sample.
        genotypes = np.random.binomial(n=2, p=freq_mat[rep, gen], size=2*N)
        # Determine the allele frequency in the current generation.
        freq = genotypes.sum() / (genotypes.size * 2)
        # Update the results matrix.
        freq_mat[rep, gen + 1] = freq

In [11]:
# Intialize a canvas.
canvas = toyplot.Canvas(
    width=700, height=500,
)
# Intialize and label the axes.
axes = canvas.cartesian(
    xlabel='Time (generations)',
    ylabel='Allele Frequency (<i>p</i>)',
)
# Plot the change in allele frequency over time.
axes.plot(freq_mat.T)
# Relabel the x-ticks.
axes.x.ticks.locator = toyplot.locator.Explicit([1, 50, 100])
# Title the plot.
axes.label.text = '<i>p<sub>0</sub></i> = 0.5'
# Resize all the text.
axes.x.ticks.labels.style['font-size'] = 14
axes.y.ticks.labels.style['font-size'] = 14
axes.x.label.style['font-size'] = 16
axes.y.label.style['font-size'] = 16
axes.label.style['font-size'] = 18

In [12]:
# Save as a svg.
toyplot.svg.render(canvas, '../figures/7feb23_drift.svg')

## Coalescent Theory

In [123]:
def e_t_mrca(n_samples):
    return (2 * np.sum(1 / np.arange(2, n_samples+1)))
def e_t_tot(n_samples):
    return (2 * np.sum(1 / np.arange(1, n_samples)))

In [126]:
n_tmrca = np.arange(2, 101)
e_tmrca = np.array([e_t_mrca(n) for n in n_tmrca])

In [137]:
# Intialize a canvas.
canvas = toyplot.Canvas(
    width=700, height=500,
)
# Intialize and label the axes.
axes = canvas.cartesian(
    ylabel='<i>2N</i> Generations',
    xlabel='<i>n</i>',
)
# Plot the change in allele frequency over time.
axes.plot(np.arange(2, 101), e_tmrca)
# Relabel the x-ticks.
axes.x.ticks.locator = toyplot.locator.Explicit((2, 50, 100))
axes.y.ticks.locator = toyplot.locator.Explicit((1, 3, 6, 9))
# Title the plot.
axes.label.text = '<i>T<sub>MRCA</sub></i>'
# Resize all the text.
axes.x.ticks.labels.style['font-size'] = 14
axes.y.ticks.labels.style['font-size'] = 14
axes.x.label.style['font-size'] = 16
axes.y.label.style['font-size'] = 16
axes.label.style['font-size'] = 18

In [138]:
# Save as a svg.
toyplot.svg.render(canvas, '../figures/7feb23_tmrca.svg')

In [134]:
n_ttot = np.arange(1, 101)
e_ttot = np.array([e_t_tot(n) for n in n_ttot])

In [139]:
# Intialize a canvas.
canvas = toyplot.Canvas(
    width=700, height=500,
)
# Intialize and label the axes.
axes = canvas.cartesian(
    ylabel='<i>2N</i> Generations',
    xlabel='<i>n</i>',
)
# Plot the change in allele frequency over time.
axes.plot(np.arange(1, 101), e_ttot)
# Relabel the x-ticks.
axes.x.ticks.locator = toyplot.locator.Explicit((2, 50, 100))
axes.y.ticks.locator = toyplot.locator.Explicit((1, 5, 10))
# Title the plot.
axes.label.text = '<i>T<sub>tot</sub></i>'
# Resize all the text.
axes.x.ticks.labels.style['font-size'] = 14
axes.y.ticks.labels.style['font-size'] = 14
axes.x.label.style['font-size'] = 16
axes.y.label.style['font-size'] = 16
axes.label.style['font-size'] = 18

In [140]:
# Save as a svg.
toyplot.svg.render(canvas, '../figures/7feb23_ttot.svg')

## Trees

In [156]:
ts = msprime.sim_ancestry(
    samples=[msprime.SampleSet(5, ploidy=1)],
    population_size=1e6,
    random_seed=97,
)
newick = ts.first().newick()
tree = toytree.tree(newick, tree_format=0)
for node in tree.treenode.traverse():
    if node.is_leaf():
        node.add_feature('color', toytree.colors[0])
    else:
        node.add_feature('color', toytree.colors[1])
colors = tree.get_node_values('color', show_root=1, show_tips=1)
e_widths = [5 for i in tree.get_edge_values('idx')]
e_colors = tree.get_edge_values_mapped({
    0: toytree.colors[5],
    1: toytree.colors[5]
})
canvas = toyplot.Canvas(width=700, height=500)
axes = canvas.cartesian()
mark = tree.draw(
    ts='p',
    edge_colors=e_colors,
    edge_widths=e_widths,
    node_colors=colors,
    node_labels=False,
    tip_labels=False,
    scalebar=False,
    axes=axes,
);
axes.show = False

In [157]:
# Save as a svg.
toyplot.svg.render(canvas, '../figures/7feb23_coal_i2.svg')

In [153]:
ts = msprime.sim_ancestry(
    samples=[msprime.SampleSet(5, ploidy=1)],
    population_size=1e6,
    random_seed=97,
)
newick = ts.first().newick()
tree = toytree.tree(newick, tree_format=0)
for node in tree.treenode.traverse():
    if node.is_leaf():
        node.add_feature('color', toytree.colors[0])
    else:
        node.add_feature('color', toytree.colors[1])
colors = tree.get_node_values('color', show_root=1, show_tips=1)
e_widths = [5 for i in tree.get_edge_values('idx')]
e_colors = tree.get_edge_values_mapped({
    5: toytree.colors[5],
    2: toytree.colors[5],
})
canvas = toyplot.Canvas(width=700, height=500)
axes = canvas.cartesian()
mark = tree.draw(
    ts='p',
    edge_colors=e_colors,
    edge_widths=e_widths,
    node_colors=colors,
    node_labels=False,
    tip_labels=False,
    scalebar=False,
    axes=axes,
);
axes.show = False

In [154]:
# Save as a svg.
toyplot.svg.render(canvas, '../figures/7feb23_coal_i3.svg')

In [152]:
canvas

(None,
 <toyplot.coordinates.Cartesian at 0x17995a640>,
 <toytree.Render.ToytreeMark at 0x1790f02b0>)

In [158]:
ts = msprime.sim_ancestry(
    samples=[msprime.SampleSet(5, ploidy=1)],
    population_size=1e6,
    random_seed=97,
)
newick = ts.first().newick()
tree = toytree.tree(newick, tree_format=0)
for node in tree.treenode.traverse():
    if node.is_leaf():
        node.add_feature('color', toytree.colors[0])
    else:
        node.add_feature('color', toytree.colors[1])
colors = tree.get_node_values('color', show_root=1, show_tips=1)
e_widths = [5 for i in tree.get_edge_values('idx')]
e_colors = tree.get_edge_values_mapped({
    6: toytree.colors[5],
    3: toytree.colors[5],
})
canvas = toyplot.Canvas(width=700, height=500)
axes = canvas.cartesian()
mark = tree.draw(
    ts='p',
    edge_colors=e_colors,
    edge_widths=e_widths,
    node_colors=colors,
    node_labels=False,
    tip_labels=False,
    scalebar=False,
    axes=axes,
);
axes.show=False

In [159]:
# Save as a svg.
toyplot.svg.render(canvas, '../figures/7feb23_coal_i4.svg')

In [160]:
ts = msprime.sim_ancestry(
    samples=[msprime.SampleSet(5, ploidy=1)],
    population_size=1e6,
    random_seed=97,
)
newick = ts.first().newick()
tree = toytree.tree(newick, tree_format=0)
for node in tree.treenode.traverse():
    if node.is_leaf():
        node.add_feature('color', toytree.colors[0])
    else:
        node.add_feature('color', toytree.colors[1])
colors = tree.get_node_values('color', show_root=1, show_tips=1)
e_widths = [5 for i in tree.get_edge_values('idx')]
e_colors = tree.get_edge_values_mapped({
    8: toytree.colors[5],
})
canvas = toyplot.Canvas(width=700, height=500)
axes = canvas.cartesian()
mark = tree.draw(
    ts='p',
    edge_colors=e_colors,
    edge_widths=e_widths,
    node_colors=colors,
    node_labels=False,
    tip_labels=False,
    scalebar=False,
    axes=axes,
);
axes.show=False

In [161]:
# Save as a svg.
toyplot.svg.render(canvas, '../figures/7feb23_coal_i5.svg')