In [1]:
import numpy as np
import matplotlib.pyplot as plt
from Bio import SeqIO
from Bio import Phylo
from featureUtils import BED_file_to_features, feature_to_seq
from Bio.Phylo.PhyloXML import Phyloxml
from hierarchical_clustering import hierarchical_clustering, build_phylogeny, get_matrices, save_phylogeny, save_matrices, load_phylogeny, load_matrices

In [2]:
references = {seq.id : seq for seq in SeqIO.parse("./data/monomers/chr2_centromere_plus2Mb.fasta", "fasta")}

In [3]:
monomers_as_features = BED_file_to_features("./data/monomers/chr2_StringDecMon2extr_noMorgante.bed")

In [4]:
# sample_size = len(monomers_as_features)
sample_size = 10

In [5]:
monomers_as_features = monomers_as_features[0:sample_size]

In [6]:
monomers_as_seqs = [feature_to_seq(feature, references) for feature in monomers_as_features]

In [7]:
dist_matrix = np.load('data/dist_matrix.npy')

In [8]:
dist_matrix = dist_matrix[0:sample_size,0:sample_size]

In [9]:
phylogeny = hierarchical_clustering(dist_matrix=dist_matrix)
save_phylogeny(phylogeny=phylogeny, filename='data/phylo')

In [10]:
matrices_res = get_matrices(phylogeny)
save_matrices(matrices_result=matrices_res, base_dir='data/phylo_matrices/')

In [12]:
phyloXml = Phyloxml(phylogenies=[build_phylogeny(simple_phylogeny=phylogeny, features=monomers_as_features)], attributes=None)
Phylo.write(phyloXml, 'data/phylogeny.xml', format='phyloxml')

1