-
Notifications
You must be signed in to change notification settings - Fork 11
/
StickyEndSeq.py
181 lines (158 loc) · 7.04 KB
/
StickyEndSeq.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
from Bio.Seq import Seq
try:
# Biopython <1.78
from Bio.Alphabet import DNAAlphabet
has_dna_alphabet = True
except ImportError:
# Biopython >=1.78
has_dna_alphabet = False
from .StickyEnd import StickyEnd
class StickyEndSeq(Seq):
"""Represent sequences with sticky ends.
It is used like a Biopython Seq, but with additional flanking sequences
``left_end`` and ``right_end``, which are ``StickyEnd`` objects.
"""
def __init__(self, data, left_end=None, right_end=None, **k):
Seq.__init__(self, str(data), **k)
self.left_end = left_end
self.right_end = right_end
def reverse_complement(self):
"""The reverse is a StickyEndSeq with reversed ends
left-right versions are interchanged and reverse complemented.
"""
if has_dna_alphabet: # Biopython <1.78
sticky_end_seq = StickyEndSeq(
str(Seq.reverse_complement(self)),
left_end=None
if self.right_end is None
else self.right_end.reverse_complement(),
right_end=None
if self.left_end is None
else self.left_end.reverse_complement(),
alphabet=self.alphabet,
)
else:
sticky_end_seq = StickyEndSeq(
str(Seq.reverse_complement(self)),
left_end=None
if self.right_end is None
else self.right_end.reverse_complement(),
right_end=None
if self.left_end is None
else self.left_end.reverse_complement(),
)
return sticky_end_seq
def will_clip_in_this_order_with(self, other):
"""Return whether this sequence will clip in this order with another.
"""
return (self.right_end is not None) and self.right_end.will_clip_directly_with(
other.left_end
)
def __repr__(self):
content = Seq.__str__(self)
if len(content) > 15:
content = (
content[:5].lower() + ("(%d)" % len(content)) + content[-5:].lower()
)
return "(%s-%s-%s)" % (repr(self.left_end), content, repr(self.right_end),)
def __add__(self, other):
assert self.will_clip_in_this_order_with(other)
return StickyEndSeq(
str(self) + str(self.right_end) + str(other),
left_end=self.left_end,
right_end=other.right_end,
)
def to_standard_sequence(self, discard_sticky_ends=False):
"""Return a string, basically left + middle + right"""
if discard_sticky_ends:
return Seq(str(self))
else:
left = str(self.left_end) if self.left_end else ""
middle = self.to_standard_sequence(discard_sticky_ends=True)
right = str(self.right_end) if self.right_end else ""
return left + middle + right
@staticmethod
def list_from_sequence_digestion(sequence, enzyme, linear=True):
"""Compute the StickyEndSeqs resulting from a sequence digestion.
This is one of the most important methods in DNA Cauldron. It uses
Biopython to find restriction sites, and deals with the case where
the sequence is circular, or the sequence is already sticky-ended.
That was painful to write but hopefully the tests ensure it works well.
"""
if isinstance(enzyme, (list, tuple)):
if len(enzyme) == 1:
enzyme = enzyme[0]
else:
enzyme, other_enzymes = enzyme[0], enzyme[1:]
sticky_fragments = StickyEndSeq.list_from_sequence_digestion(
sequence=sequence, enzyme=other_enzymes, linear=linear
)
if not (sticky_fragments[0] is sequence):
linear = True
return [
fragment
for sticky in sticky_fragments
for fragment in StickyEndSeq.list_from_sequence_digestion(
sequence=sticky, enzyme=enzyme, linear=linear
)
]
n_cuts = len(enzyme.search(sequence, linear=linear))
if n_cuts == 0:
return [sequence]
overhang = abs(enzyme.ovhg)
right_end_sign = +1 if enzyme.is_3overhang() else -1
if linear:
fragments = enzyme.catalyse(sequence, linear=True)
else:
fragments = enzyme.catalyse(sequence + sequence, linear=True)
fragments = fragments[1 : n_cuts + 1]
if right_end_sign == -1:
if not linear:
overhang_bit = fragments[0][:overhang]
new_fragment_seq = fragments[0][overhang:]
last_right_end = StickyEnd(overhang_bit, right_end_sign)
first_left_end = StickyEnd(overhang_bit, -right_end_sign)
sticky_fragments = [
StickyEndSeq(new_fragment_seq, left_end=first_left_end)
]
else:
sticky_fragments = [StickyEndSeq(fragments[0])]
for f in fragments[1:]:
overhang_bit, new_fragment_seq = f[:overhang], f[overhang:]
sticky_fragments[-1].right_end = StickyEnd(overhang_bit, right_end_sign)
new_fragment = StickyEndSeq(
new_fragment_seq, left_end=StickyEnd(overhang_bit, -right_end_sign),
)
sticky_fragments.append(new_fragment)
if not linear:
sticky_fragments[-1].right_end = last_right_end
else:
left_end = None
for f in fragments[:-1]:
new_fragment_seq, overhang_bit = f[:-overhang], f[-overhang:]
right_end = StickyEnd(overhang_bit, right_end_sign)
new_fragment = StickyEndSeq(
new_fragment_seq, left_end=left_end, right_end=right_end
)
sticky_fragments.append(new_fragment)
left_end = StickyEnd(overhang_bit, -right_end_sign)
if not linear:
new_fragment_seq = fragments[-1][:-overhang]
overhang_bit = fragments[-1][-overhang:]
first_left_end = StickyEnd(overhang_bit, -right_end_sign)
last_right_end = StickyEnd(overhang_bit, right_end_sign)
sticky_fragments[0].left_end = first_left_end
sticky_fragments = [
StickyEndSeq(
new_fragment_seq, left_end=left_end, right_end=last_right_end,
)
]
else:
sticky_fragments.append(StickyEndSeq(fragments[-1], left_end=left_end))
if hasattr(sequence, "left_end") and sticky_fragments[0].left_end is None:
sticky_fragments[0].left_end = sequence.left_end
if hasattr(sequence, "right_end") and sticky_fragments[-1].right_end is None:
sticky_fragments[-1].right_end = sequence.right_end
return sticky_fragments
def ends_tuple(self):
return (str(self.left_end), str(self.right_end))