/
PhyloXML.py
1479 lines (1259 loc) · 45.7 KB
/
PhyloXML.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
# Copyright (C) 2009 by Eric Talevich (eric.talevich@gmail.com)
#
# This file is part of the Biopython distribution and governed by your
# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
# Please see the LICENSE file that should have been included as part of this
# package.
"""Classes corresponding to phyloXML elements.
See Also
--------
Official specification:
http://phyloxml.org/
Journal article:
Han and Zmasek (2009), https://doi.org/10.1186/1471-2105-10-356
"""
import re
import warnings
from Bio.Align import Alignment, MultipleSeqAlignment
from Bio.Seq import Seq
from Bio.SeqFeature import SeqFeature, SimpleLocation
from Bio.SeqRecord import SeqRecord
from Bio import BiopythonWarning
from Bio.Phylo import BaseTree
class PhyloXMLWarning(BiopythonWarning):
"""Warning for non-compliance with the phyloXML specification."""
def _check_str(text, testfunc):
"""Check a string using testfunc, and warn if there's no match (PRIVATE)."""
if text is not None and not testfunc(text):
warnings.warn(
f"String {text} doesn't match the given regexp",
PhyloXMLWarning,
stacklevel=2,
)
# Core elements
class PhyloElement(BaseTree.TreeElement):
"""Base class for all PhyloXML objects."""
class Phyloxml(PhyloElement):
"""Root node of the PhyloXML document.
Contains an arbitrary number of Phylogeny elements, possibly followed by
elements from other namespaces.
:Parameters:
attributes : dict
(XML namespace definitions)
phylogenies : list
The phylogenetic trees
other : list
Arbitrary non-phyloXML elements, if any
"""
def __init__(self, attributes, phylogenies=None, other=None):
"""Initialize parameters for PhyloXML object."""
self.attributes = {
# standard
"xmlns:xsi": "http://www.w3.org/2001/XMLSchema-instance",
"xmlns": "http://www.phyloxml.org",
"xsi:schemaLocation": "http://www.phyloxml.org http://www.phyloxml.org/1.10/phyloxml.xsd",
}
if attributes:
self.attributes.update(attributes)
self.phylogenies = phylogenies or []
self.other = other or []
def __getitem__(self, index):
"""Get a phylogeny by index or name."""
if isinstance(index, (int, slice)):
return self.phylogenies[index]
if not isinstance(index, str):
raise KeyError(f"can't use {type(index)} as an index")
for tree in self.phylogenies:
if tree.name == index:
return tree
else:
raise KeyError(f"no phylogeny found with name {index!r}")
def __iter__(self):
"""Iterate through the phylogenetic trees in this object."""
return iter(self.phylogenies)
def __len__(self):
"""Return the number of phylogenetic trees in this object."""
return len(self.phylogenies)
def __str__(self):
"""Return name of phylogenies in the object."""
return "%s([%s])" % (
self.__class__.__name__,
",\n".join(map(str, self.phylogenies)),
)
class Other(PhyloElement):
"""Container for non-phyloXML elements in the tree.
Usually, an Other object will have either a 'value' or a non-empty list
of 'children', but not both. This is not enforced here, though.
:Parameters:
tag : string
local tag for the XML node
namespace : string
XML namespace for the node -- should not be the default phyloXML
namespace.
attributes : dict of strings
attributes on the XML node
value : string
text contained directly within this XML node
children : list
child nodes, if any (also ``Other`` instances)
"""
def __init__(self, tag, namespace=None, attributes=None, value=None, children=None):
"""Initialize values for non-phyloXML elements."""
self.tag = tag
self.namespace = namespace
self.attributes = attributes or {}
self.value = value
self.children = children or []
def __iter__(self):
"""Iterate through the children of this object (if any)."""
return iter(self.children)
class Phylogeny(PhyloElement, BaseTree.Tree):
"""A phylogenetic tree.
:Parameters:
root : Clade
the root node/clade of this tree
rooted : bool
True if this tree is rooted
rerootable : bool
True if this tree is rerootable
branch_length_unit : string
unit for branch_length values on clades
name : string
identifier for this tree, not required to be unique
id : Id
unique identifier for this tree
description : string
plain-text description
date : Date
date for the root node of this tree
confidences : list
Confidence objects for this tree
clade_relations : list
CladeRelation objects
sequence_relations : list
SequenceRelation objects
properties : list
Property objects
other : list
non-phyloXML elements (type ``Other``)
"""
def __init__(
self,
root=None,
rooted=True,
rerootable=None,
branch_length_unit=None,
type=None,
# Child nodes
name=None,
id=None,
description=None,
date=None,
# Collections
confidences=None,
clade_relations=None,
sequence_relations=None,
properties=None,
other=None,
):
"""Initialize values for phylogenetic tree object."""
assert isinstance(rooted, bool)
self.root = root
self.rooted = rooted
self.rerootable = rerootable
self.branch_length_unit = branch_length_unit
self.type = type
self.name = name
self.id = id
self.description = description
self.date = date
self.confidences = confidences or []
self.clade_relations = clade_relations or []
self.sequence_relations = sequence_relations or []
self.properties = properties or []
self.other = other or []
@classmethod
def from_tree(cls, tree, **kwargs):
"""Create a new Phylogeny given a Tree (from Newick/Nexus or BaseTree).
Keyword arguments are the usual ``Phylogeny`` constructor parameters.
"""
phy = cls(
root=Clade.from_clade(tree.root),
rooted=tree.rooted,
name=tree.name,
id=(tree.id is not None) and Id(str(tree.id)) or None,
)
phy.__dict__.update(kwargs)
return phy
@classmethod
def from_clade(cls, clade, **kwargs):
"""Create a new Phylogeny given a Newick or BaseTree Clade object.
Keyword arguments are the usual ``PhyloXML.Clade`` constructor parameters.
"""
return Clade.from_clade(clade).to_phylogeny(**kwargs)
def as_phyloxml(self):
"""Return this tree, a PhyloXML-compatible Phylogeny object.
Overrides the ``BaseTree`` method.
"""
return self
def to_phyloxml_container(self, **kwargs):
"""Create a new Phyloxml object containing just this phylogeny."""
return Phyloxml(kwargs, phylogenies=[self])
def to_alignment(self):
"""Construct a MultipleSeqAlignment from the aligned sequences in this tree."""
def is_aligned_seq(elem):
if isinstance(elem, Sequence) and elem.mol_seq.is_aligned:
return True
return False
seqs = self._filter_search(is_aligned_seq, "preorder", True)
records = (seq.to_seqrecord() for seq in seqs)
return MultipleSeqAlignment(records)
@property
def alignment(self):
"""Construct an Alignment object from the aligned sequences in this tree."""
def is_aligned_seq(elem):
if isinstance(elem, Sequence) and elem.mol_seq.is_aligned:
return True
return False
seqs = self._filter_search(is_aligned_seq, "preorder", True)
records = []
lines = []
for seq in seqs:
record = seq.to_seqrecord()
lines.append(bytes(record.seq))
records.append(record)
if lines:
sequences, coordinates = Alignment.parse_printed_alignment(lines)
for sequence, record in zip(sequences, records):
record.seq = Seq(sequence)
else:
coordinates = None
return Alignment(records, coordinates)
# Singular property for plural attribute
def _get_confidence(self):
"""Equivalent to self.confidences[0] if there is only 1 value (PRIVATE).
See Also: ``Clade.confidence``, ``Clade.taxonomy``
"""
if len(self.confidences) == 0:
return None
if len(self.confidences) > 1:
raise AttributeError(
"more than 1 confidence value available; use Phylogeny.confidences"
)
return self.confidences[0]
def _set_confidence(self, value):
if value is None:
# Special case: mirror the behavior of _get_confidence
self.confidences = []
return
if isinstance(value, (float, int)):
value = Confidence(value)
elif not isinstance(value, Confidence):
raise ValueError("value must be a number or Confidence instance")
if len(self.confidences) == 0:
self.confidences.append(value)
elif len(self.confidences) == 1:
self.confidences[0] = value
else:
raise ValueError(
"multiple confidence values already exist; "
"use Phylogeny.confidences instead"
)
def _del_confidence(self):
self.confidences = []
confidence = property(_get_confidence, _set_confidence, _del_confidence)
class Clade(PhyloElement, BaseTree.Clade):
"""Describes a branch of the current phylogenetic tree.
Used recursively, describes the topology of a phylogenetic tree.
Both ``color`` and ``width`` elements should be interpreted by client code
as applying to the whole clade, including all descendents, unless
overwritten in-sub clades. This module doesn't automatically assign these
attributes to sub-clades to achieve this cascade -- and neither should you.
:Parameters:
branch_length
parent branch length of this clade
id_source
link other elements to a clade (on the xml-level)
name : string
short label for this clade
confidences : list of Confidence objects
used to indicate the support for a clade/parent branch.
width : float
branch width for this clade (including branch from parent)
color : BranchColor
color used for graphical display of this clade
node_id
unique identifier for the root node of this clade
taxonomies : list
Taxonomy objects
sequences : list
Sequence objects
events : Events
describe such events as gene-duplications at the root node/parent
branch of this clade
binary_characters : BinaryCharacters
binary characters
distributions : list of Distribution objects
distribution(s) of this clade
date : Date
a date for the root node of this clade
references : list
Reference objects
properties : list
Property objects
clades : list Clade objects
Sub-clades
other : list of Other objects
non-phyloXML objects
"""
def __init__(
self,
# Attributes
branch_length=None,
id_source=None,
# Child nodes
name=None,
width=None,
color=None,
node_id=None,
events=None,
binary_characters=None,
date=None,
# Collections
confidences=None,
taxonomies=None,
sequences=None,
distributions=None,
references=None,
properties=None,
clades=None,
other=None,
):
"""Initialize value for the Clade object."""
self.branch_length = branch_length
self.id_source = id_source
self.name = name
self.width = width
self.color = color
self.node_id = node_id
self.events = events
self.binary_characters = binary_characters
self.date = date
self.confidences = confidences or []
self.taxonomies = taxonomies or []
self.sequences = sequences or []
self.distributions = distributions or []
self.references = references or []
self.properties = properties or []
self.clades = clades or []
self.other = other or []
@classmethod
def from_clade(cls, clade, **kwargs):
"""Create a new PhyloXML Clade from a Newick or BaseTree Clade object.
Keyword arguments are the usual PhyloXML Clade constructor parameters.
"""
new_clade = cls(branch_length=clade.branch_length, name=clade.name)
new_clade.clades = [cls.from_clade(c) for c in clade]
new_clade.confidence = clade.confidence
new_clade.width = clade.width
new_clade.color = (
BranchColor(clade.color.red, clade.color.green, clade.color.blue)
if clade.color
else None
)
new_clade.__dict__.update(kwargs)
return new_clade
def to_phylogeny(self, **kwargs):
"""Create a new phylogeny containing just this clade."""
phy = Phylogeny(root=self, date=self.date)
phy.__dict__.update(kwargs)
return phy
# Shortcuts for list attributes that are usually only 1 item
# NB: Duplicated from Phylogeny class
def _get_confidence(self):
"""Return confidence values (PRIVATE)."""
if len(self.confidences) == 0:
return None
if len(self.confidences) > 1:
raise AttributeError(
"more than 1 confidence value available; use Clade.confidences"
)
return self.confidences[0]
def _set_confidence(self, value):
"""Set the confidence value (PRIVATE)."""
if value is None:
# Special case: mirror the behavior of _get_confidence
self.confidences = []
return
if isinstance(value, (float, int)):
value = Confidence(value)
elif not isinstance(value, Confidence):
raise ValueError("value must be a number or Confidence instance")
if len(self.confidences) == 0:
self.confidences.append(value)
elif len(self.confidences) == 1:
self.confidences[0] = value
else:
raise ValueError(
"multiple confidence values already exist; "
"use Phylogeny.confidences instead"
)
def _del_confidence(self):
"""Delete confidences values (PRIVATE)."""
self.confidences = []
confidence = property(_get_confidence, _set_confidence, _del_confidence)
def _get_taxonomy(self):
"""Get taxonomy list for the clade (PRIVATE)."""
if len(self.taxonomies) == 0:
return None
if len(self.taxonomies) > 1:
raise AttributeError(
"more than 1 taxonomy value available; use Clade.taxonomies"
)
return self.taxonomies[0]
def _set_taxonomy(self, value):
"""Set a taxonomy for the clade (PRIVATE)."""
if not isinstance(value, Taxonomy):
raise ValueError("assigned value must be a Taxonomy instance")
if len(self.taxonomies) == 0:
self.taxonomies.append(value)
elif len(self.taxonomies) == 1:
self.taxonomies[0] = value
else:
raise ValueError(
"multiple taxonomy values already exist; "
"use Phylogeny.taxonomies instead"
)
taxonomy = property(_get_taxonomy, _set_taxonomy)
# PhyloXML wrapper for a special BaseTree attribute
class BranchColor(PhyloElement, BaseTree.BranchColor):
"""Manage Tree branch's color."""
def __init__(self, *args, **kwargs):
"""Initialize parameters for the BranchColor object."""
BaseTree.BranchColor.__init__(self, *args, **kwargs)
# PhyloXML-specific complex types
class Accession(PhyloElement):
"""Captures the local part in a sequence identifier.
Example: In ``UniProtKB:P17304``, the Accession instance attribute ``value``
is 'P17304' and the ``source`` attribute is 'UniProtKB'.
"""
def __init__(self, value, source):
"""Initialize value for Accession object."""
self.value = value
self.source = source
def __str__(self):
"""Show the class name and an identifying attribute."""
return f"{self.source}:{self.value}"
class Annotation(PhyloElement):
"""The annotation of a molecular sequence.
It is recommended to annotate by using the optional 'ref' attribute.
:Parameters:
ref : string
reference string, e.g. 'GO:0008270',
'KEGG:Tetrachloroethene degradation', 'EC:1.1.1.1'
source : string
plain-text source for this annotation
evidence : str
describe evidence as free text (e.g. 'experimental')
desc : string
free text description
confidence : Confidence
state the type and value of support (type Confidence)
properties : list
typed and referenced annotations from external resources
uri : Uri
link
"""
re_ref = re.compile(r"[a-zA-Z0-9_]+:[a-zA-Z0-9_\.\-\s]+")
def __init__(
self,
# Attributes
ref=None,
source=None,
evidence=None,
type=None,
# Child nodes
desc=None,
confidence=None,
uri=None,
# Collection
properties=None,
):
"""Initialize value for the Annotation object."""
_check_str(ref, self.re_ref.match)
self.ref = ref
self.source = source
self.evidence = evidence
self.type = type
self.desc = desc
self.confidence = confidence
self.uri = uri
self.properties = properties or []
class BinaryCharacters(PhyloElement):
"""Binary characters at the root of a clade.
The names and/or counts of binary characters present, gained, and lost
at the root of a clade.
"""
def __init__(
self,
# Attributes
type=None,
gained_count=None,
lost_count=None,
present_count=None,
absent_count=None,
# Child nodes (flattened into collections)
gained=None,
lost=None,
present=None,
absent=None,
):
"""Initialize values for the BinaryCharacters object."""
self.type = type
self.gained_count = gained_count
self.lost_count = lost_count
self.present_count = present_count
self.absent_count = absent_count
self.gained = gained or []
self.lost = lost or []
self.present = present or []
self.absent = absent or []
class CladeRelation(PhyloElement):
"""Expresses a typed relationship between two clades.
For example, this could be used to describe multiple parents of a clade.
:type id_ref_0: str
:type id_ref_1: str
:type distance: str
:type type: str
:type confidence: Confidence
"""
def __init__(self, type, id_ref_0, id_ref_1, distance=None, confidence=None):
"""Initialize values for the CladeRelation object."""
self.distance = distance
self.type = type
self.id_ref_0 = id_ref_0
self.id_ref_1 = id_ref_1
self.confidence = confidence
class Confidence(float, PhyloElement):
"""A general purpose confidence element.
For example, this can be used to express the bootstrap support value of a
clade (in which case the ``type`` attribute is 'bootstrap').
:Parameters:
value : float
confidence value
type : string
label for the type of confidence, e.g. 'bootstrap'
"""
def __new__(cls, value, type="unknown"):
"""Create and return a Confidence object with the specified value and type."""
obj = super().__new__(cls, value)
obj.type = type
return obj
@property
def value(self):
"""Return the float value of the Confidence object."""
return float(self)
class Date(PhyloElement):
"""A date associated with a clade/node.
Its value can be numerical by using the 'value' element and/or free text
with the 'desc' element' (e.g. 'Silurian'). If a numerical value is used, it
is recommended to employ the 'unit' attribute.
:Parameters:
unit : string
type of numerical value (e.g. 'mya' for 'million years ago')
value : float
the date value
desc : string
plain-text description of the date
minimum : float
lower bound on the date value
maximum : float
upper bound on the date value
"""
def __init__(self, value=None, unit=None, desc=None, minimum=None, maximum=None):
"""Initialize values of the Date object."""
self.value = value
self.unit = unit
self.desc = desc
self.minimum = minimum
self.maximum = maximum
def __str__(self):
"""Show the class name and the human-readable date."""
if self.unit and self.value is not None:
return f"{self.value} {self.unit}"
if self.desc is not None:
return self.desc
return self.__class__.__name__
class Distribution(PhyloElement):
"""Geographic distribution of the items of a clade (species, sequences).
Intended for phylogeographic applications.
:Parameters:
desc : string
free-text description of the location
points : list of ``Point`` objects
coordinates (similar to the 'Point' element in Google's KML format)
polygons : list of ``Polygon`` objects
coordinate sets defining geographic regions
"""
def __init__(self, desc=None, points=None, polygons=None):
"""Initialize values of Distribution object."""
self.desc = desc
self.points = points or []
self.polygons = polygons or []
class DomainArchitecture(PhyloElement):
"""Domain architecture of a protein.
:Parameters:
length : int
total length of the protein sequence
domains : list ProteinDomain objects
the domains within this protein
"""
def __init__(self, length=None, domains=None):
"""Initialize values of the DomainArchitecture object."""
self.length = length
self.domains = domains
class Events(PhyloElement):
"""Events at the root node of a clade (e.g. one gene duplication).
All attributes are set to None by default, but this object can also be
treated as a dictionary, in which case None values are treated as missing
keys and deleting a key resets that attribute's value back to None.
"""
ok_type = {
"transfer",
"fusion",
"speciation_or_duplication",
"other",
"mixed",
"unassigned",
}
def __init__(
self,
type=None,
duplications=None,
speciations=None,
losses=None,
confidence=None,
):
"""Initialize values of the Events object."""
_check_str(type, self.ok_type.__contains__)
self.type = type
self.duplications = duplications
self.speciations = speciations
self.losses = losses
self.confidence = confidence
def items(self):
"""Return Event's items."""
return [(k, v) for k, v in self.__dict__.items() if v is not None]
def keys(self):
"""Return Event's keys."""
return [k for k, v in self.__dict__.items() if v is not None]
def values(self):
"""Return values from a key-value pair in an Events dict."""
return [v for v in self.__dict__.values() if v is not None]
def __len__(self):
"""Return number of Events."""
# TODO - Better way to do this?
return len(self.values())
def __getitem__(self, key):
"""Get value of Event with the given key."""
try:
val = getattr(self, key)
except AttributeError:
raise KeyError(key) from None
if val is None:
raise KeyError(f"{key!r} has not been set in this object")
return val
def __setitem__(self, key, val):
"""Add item to Event dict."""
setattr(self, key, val)
def __delitem__(self, key):
"""Delete Event with given key."""
setattr(self, key, None)
def __iter__(self):
"""Iterate over the keys present in a Events dict."""
return iter(self.keys())
def __contains__(self, key):
"""Return True if Event dict contains key."""
try:
return getattr(self, key) is not None
except AttributeError:
return False
class Id(PhyloElement):
"""A general-purpose identifier element.
Allows to indicate the provider (or authority) of an identifier, e.g. NCBI,
along with the value itself.
"""
def __init__(self, value, provider=None):
"""Initialize values for the identifier object."""
self.value = value
self.provider = provider
def __str__(self):
"""Return identifier as a string."""
if self.provider is not None:
return f"{self.provider}:{self.value}"
return self.value
class MolSeq(PhyloElement):
"""Store a molecular sequence.
:Parameters:
value : string
the sequence itself
is_aligned : bool
True if this sequence is aligned with the others (usually meaning
all aligned seqs are the same length and gaps may be present)
"""
re_value = re.compile(r"[a-zA-Z\.\-\?\*_]+")
def __init__(self, value, is_aligned=None):
"""Initialize parameters for the MolSeq object."""
_check_str(value, self.re_value.match)
self.value = value
self.is_aligned = is_aligned
def __str__(self):
"""Return the value of the Molecular Sequence object."""
return self.value
class Point(PhyloElement):
"""Geographic coordinates of a point, with an optional altitude.
Used by element 'Distribution'.
:Parameters:
geodetic_datum : string, required
the geodetic datum (also called 'map datum'). For example, Google's
KML uses 'WGS84'.
lat : numeric
latitude
long : numeric
longitude
alt : numeric
altitude
alt_unit : string
unit for the altitude (e.g. 'meter')
"""
def __init__(self, geodetic_datum, lat, long, alt=None, alt_unit=None):
"""Initialize value for the Point object."""
self.geodetic_datum = geodetic_datum
self.lat = lat
self.long = long
self.alt = alt
self.alt_unit = alt_unit
class Polygon(PhyloElement):
"""A polygon defined by a list of 'Points' (used by element 'Distribution').
:param points: list of 3 or more points representing vertices.
"""
def __init__(self, points=None):
"""Initialize value for the Polygon object."""
self.points = points or []
def __str__(self):
"""Return list of points as a string."""
return "%s([%s])" % (self.__class__.__name__, ",\n".join(map(str, self.points)))
class Property(PhyloElement):
"""A typed and referenced property from an external resources.
Can be attached to ``Phylogeny``, ``Clade``, and ``Annotation`` objects.
:Parameters:
value : string
the value of the property
ref : string
reference to an external resource, e.g. "NOAA:depth"
applies_to : string
indicates the item to which a property applies to (e.g. 'node' for
the parent node of a clade, 'parent_branch' for the parent branch of
a clade, or just 'clade').
datatype : string
the type of a property; limited to xsd-datatypes
(e.g. 'xsd:string', 'xsd:boolean', 'xsd:integer', 'xsd:decimal',
'xsd:float', 'xsd:double', 'xsd:date', 'xsd:anyURI').
unit : string (optional)
the unit of the property, e.g. "METRIC:m"
id_ref : Id (optional)
allows to attached a property specifically to one element (on the
xml-level)
"""
re_ref = re.compile(r"[a-zA-Z0-9_]+:[a-zA-Z0-9_\.\-\s]+")
ok_applies_to = {
"phylogeny",
"clade",
"node",
"annotation",
"parent_branch",
"other",
}
ok_datatype = {
"xsd:string",
"xsd:boolean",
"xsd:decimal",
"xsd:float",
"xsd:double",
"xsd:duration",
"xsd:dateTime",
"xsd:time",
"xsd:date",
"xsd:gYearMonth",
"xsd:gYear",
"xsd:gMonthDay",
"xsd:gDay",
"xsd:gMonth",
"xsd:hexBinary",
"xsd:base64Binary",
"xsd:anyURI",
"xsd:normalizedString",
"xsd:token",
"xsd:integer",
"xsd:nonPositiveInteger",
"xsd:negativeInteger",
"xsd:long",
"xsd:int",
"xsd:short",
"xsd:byte",
"xsd:nonNegativeInteger",
"xsd:unsignedLong",
"xsd:unsignedInt",
"xsd:unsignedShort",
"xsd:unsignedByte",
"xsd:positiveInteger",
}
def __init__(self, value, ref, applies_to, datatype, unit=None, id_ref=None):
"""Initialize value for the Property object."""
_check_str(ref, self.re_ref.match)
_check_str(applies_to, self.ok_applies_to.__contains__)
_check_str(datatype, self.ok_datatype.__contains__)
_check_str(unit, self.re_ref.match)
self.unit = unit
self.id_ref = id_ref
self.value = value
self.ref = ref
self.applies_to = applies_to
self.datatype = datatype
class ProteinDomain(PhyloElement):
"""Represents an individual domain in a domain architecture.
The locations use 0-based indexing, as most Python objects including
SeqFeature do, rather than the usual biological convention starting at 1.