In [1]:
import ipyrad.analysis as ipa
import toytree
import toyplot
import pandas as pd

Tetrad test
======

In [1]:
data = "/moto/eaton/users/hnl2109/analysis-ipyrad/robur_ref_basic_outfiles/robur_ref_basic.snps.hdf5"

In [3]:
tet = ipa.tetrad(
    name="robur-ref-basic",
    data=data,
    nquartets=1e6,
    nboots=16,
)

loading snps array [261 taxa x 2253184 snps]
max unlinked SNPs per quartet [nloci]: 167724
quartet sampler [random]: 1000000 / 188939205


In [6]:
tet.run(auto=True)

Parallel connection | t108: 24 cores
initializing quartet sets database
[####################] 100% 0:26:24 | full tree * | avg SNPs/qrt: 7609 
[####################] 100% 0:26:32 | boot rep. 1 | avg SNPs/qrt: 7766 
[####################] 100% 0:26:56 | boot rep. 2 | avg SNPs/qrt: 7684 
[####################] 100% 0:26:21 | boot rep. 3 | avg SNPs/qrt: 7554 
[####################] 100% 0:26:33 | boot rep. 4 | avg SNPs/qrt: 7460 
[####################] 100% 0:26:24 | boot rep. 5 | avg SNPs/qrt: 7556 
[####################] 100% 0:26:26 | boot rep. 6 | avg SNPs/qrt: 7645 
[####################] 100% 0:26:22 | boot rep. 7 | avg SNPs/qrt: 7724 
[####################] 100% 0:26:45 | boot rep. 8 | avg SNPs/qrt: 7671 
[####################] 100% 0:26:28 | boot rep. 9 | avg SNPs/qrt: 7664 
[####################] 100% 0:26:16 | boot rep. 10 | avg SNPs/qrt: 7663 
[####################] 100% 0:26:57 | boot rep. 11 | avg SNPs/qrt: 7647 
[####################] 100% 0:26:41 | boot rep. 12 | avg SNPs/

In [5]:
# Import dataframe of binomials with associated SRA tags.
df = pd.read_csv("../SRA-to-binomial.csv")
df

Unnamed: 0,Binomial,SRA
0,Quercus calophylla,SRR5632418
1,Quercus crassipes,SRR5632552
2,Quercus michauxii,SRR5284351
3,Quercus pagoda,SRR5632579
4,Quercus macrocarpa,SRR5284355
...,...,...
256,Quercus rugosa,SRR8860622
257,Quercus calliprinos,SRR8860659
258,Quercus ilex,SRR8860607
259,Quercus salicina,SRR8860621


In [2]:
# First attemtpt at drawing tree.
tre1a = toytree.tree("/moto/eaton/users/hnl2109/analysis-tetrad/robur-ref-basic.tree")
tre1a.draw(width = 500, height = 3000, node_labels="support", use_edge_lengths=False);

In [6]:
# Get tip labels and save to an object.
tips = tre1a.get_tip_labels()

# Compare each tip to the dataframe values, and when a match is detected, substitute the relevant binomial.  Add the
# binomials to a new list object.
tips_order = []
for tip in tips: 
    for i in range(len(df["SRA"])):
        if tip == df["SRA"][i]:
            tip = df["Binomial"][i]
            tips_order.append(tip)
            
# If these two values are not equal, there is a discrepancy between the tree labels and the dataframe.  The list of
# binomials would not properly map onto the tree.
print(len(tips), len(tips_order))

261 261


In [7]:
# Redraw the tree with the changed tip labels.  Since the labels were retrieved from the earlier object, they remain
# in the correct order following the substitution of binomial names.
tre1b = toytree.tree("/moto/eaton/users/hnl2109/analysis-tetrad/robur-ref-basic.tree")
tre1b.draw(width = 500, height = 3000, node_labels="support", tip_labels = tips_order, use_edge_lengths=False);

RAxML test
========

Using the window extracter tool, attempt to infer a topology for subsets of the assembly.

In [26]:
wex = ipa.window_extracter(
    name = "test50",
    data = "/moto/eaton/users/hnl2109/analysis-ipyrad/robur_ref_basic_outfiles/robur_ref_basic.seqs.hdf5",
    workdir = "/moto/eaton/users/hnl2109/analysis-window_extracter",
    scaffold_idxs = [0],
    mincov = 0.5,
    rmincov = 0.1
)

In [27]:
wex.stats

Unnamed: 0,scaffold,start,end,sites,snps,missing,samples
0,concatenated,0,46762,46762,6509,0.166,261


In [28]:
wex.run(force = False, nexus = False)

Wrote data to /moto/home/hnl2109/oaks-thesis/notebooks/analysis-window_extracter/test50.phy


In [34]:
rax = ipa.raxml(
    name = "test50",
    data = wex.outfile,
    workdir = "/moto/eaton/users/hnl2109/analysis-raxml",
    N = 100,
    T = 12,
)

In [35]:
rax.command

'/moto/home/hnl2109/miniconda3/bin/raxmlHPC-PTHREADS-AVX2 -f a -T 12 -m GTRGAMMA -n test50 -w /moto/home/hnl2109/oaks-thesis/notebooks/analysis-raxml -s /moto/home/hnl2109/oaks-thesis/notebooks/analysis-window_extracter/test50.phy -p 54321 -N 100 -x 12345'

In [36]:
rax.run()

job test50 finished successfully


In [13]:
# Draw a tree for the RAxML result.
tre00 = toytree.tree("/moto/eaton/users/hnl2109/analysis-raxml/RAxML_bipartitions.test50")
canvas, axes, mark = tre00.draw(width = 500, height = 3000, node_labels="support", use_edge_lengths=False);

In [15]:
# Save the tree as an SVG file.
import toyplot.svg
toyplot.svg.render(canvas, "/moto/eaton/users/hnl2109/svg-trees/basic_scaffold00.svg")