In [1]:
import numpy as np
import matplotlib.pyplot as plt
from Bio import SeqIO
from Bio import Phylo
from Bio.Phylo.PhyloXML import Phyloxml
from phylogeny_to_levels import phylogeny_to_levels, parse_location, location_pattern, extract_features_from_leaves, SeqFeaturesByContiguity
from Bio.Phylo import PhyloXMLIO
from Bio.SeqFeature import SeqFeature, SimpleLocation
from loops import find_loops

In [2]:
phyloXml: Phyloxml = PhyloXMLIO.read('data/phylogeny.xml')

In [3]:
phylogeny = phyloXml.phylogenies[0]

In [4]:
loc_str = phylogeny.get_terminals()[0].sequences[0].location

In [5]:
loc_str

'chr14:21209-17708563[6104094:6104286](+)'

In [6]:
parse_location(loc_str)

SimpleLocation(ExactPosition(6104094), ExactPosition(6104286), strand=1, ref='chr14:21209-17708563')

In [7]:
SimpleLocation.fromstring('[6104094:6104286](+)')

In [8]:
phylogeny.get_terminals()[0].name

'chr14:6125303-6125495'

In [9]:
levels_res = phylogeny_to_levels(phylogeny=phylogeny)

Reorder all
Indexes built, now reordering feature list...


In [10]:
len(levels_res.clades_by_level)

85

In [11]:
levels_res.clades_by_level

[[Clade(branch_length=9.0, name='chr14:6125303-6125495'),
  Clade(branch_length=9.0, name='chr14:7870715-7870908'),
  Clade(branch_length=14.0, name='chr12:34359807-34359951'),
  Clade(branch_length=14.0, name='chr12:37765243-37765401'),
  Clade(branch_length=16.5, name='chr11:54958821-54958989'),
  Clade(branch_length=8.0, name='chr10:39307606-39307767'),
  Clade(branch_length=8.0, name='chr10:42016934-42017098'),
  Clade(branch_length=16.5, name='chr11:54996586-54996733'),
  Clade(branch_length=14.5, name='chr13:13140556-13140712'),
  Clade(branch_length=14.5, name='chr11:54849353-54849500'),
  Clade(branch_length=3.5, name='chr14:6337843-6338005'),
  Clade(branch_length=3.5, name='chr15:15755222-15755386'),
  Clade(branch_length=5.0, name='chr15:14119008-14119167'),
  Clade(branch_length=6.5, name='chr14:9305642-9305800'),
  Clade(branch_length=6.5, name='chr14:6374862-6375004'),
  Clade(branch_length=6.5, name='chr14:9343971-9344120'),
  Clade(branch_length=11.0, name='chr14:936070

In [12]:
levels_res.clade_contraction_by_level

[[0,
  0,
  1,
  1,
  2,
  3,
  3,
  4,
  5,
  5,
  6,
  6,
  7,
  8,
  9,
  9,
  10,
  11,
  12,
  13,
  14,
  14,
  15,
  16,
  16,
  16,
  17,
  17,
  18,
  18,
  19,
  20,
  20,
  21,
  22,
  22,
  23,
  24,
  24,
  25,
  26,
  27,
  28,
  29,
  30,
  31,
  31,
  32,
  33,
  34,
  35,
  35,
  35,
  36,
  37,
  38,
  39,
  39,
  40,
  40,
  41,
  42,
  43,
  43,
  44,
  44,
  45,
  46,
  47,
  48,
  48,
  48,
  49,
  49,
  50,
  51,
  51,
  52,
  52,
  53,
  54,
  55,
  55,
  56,
  57,
  58,
  59,
  60,
  60,
  61,
  61,
  62,
  63,
  63,
  64,
  65,
  66,
  67,
  67,
  68,
  68,
  69,
  70,
  71,
  71,
  71,
  72,
  72,
  73,
  73,
  74,
  75,
  76,
  76,
  77,
  78,
  79,
  80,
  81,
  82,
  83,
  84,
  85,
  86,
  86,
  87,
  88,
  89,
  90,
  90,
  91,
  92,
  93,
  94,
  94,
  95,
  96,
  96,
  97,
  97,
  98,
  99,
  99,
  100,
  101,
  102,
  102,
  103,
  103,
  104,
  105,
  106,
  107,
  108,
  108,
  109,
  110,
  110,
  111,
  111,
  112,
  113,
  113,
  114,
  114,
  11

In [13]:
sfbc = SeqFeaturesByContiguity(
    seq_features=extract_features_from_leaves(phylogeny)
)

Reorder all
Indexes built, now reordering feature list...


In [22]:
len(phylogeny.get_terminals())

103784

In [20]:
len(sfbc.gap_indices)

12333

In [21]:
len(sfbc.sorted_seq_features)

103784

In [24]:
find_loops(whole_seq=levels_res.labelled_items_by_level[84], gap_indices=sfbc.gap_indices)

[]

In [15]:
levels_res.labelled_items_by_level[84]

[0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
