Skip to content

Commit

Permalink
Merge branch 'dev' into main
Browse files Browse the repository at this point in the history
  • Loading branch information
veghp committed Oct 25, 2020
2 parents 067938e + 2e5807d commit 420cae3
Show file tree
Hide file tree
Showing 3 changed files with 105 additions and 1 deletion.
2 changes: 1 addition & 1 deletion polymera/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from .polymera import Polymer, Alphabet, Sequence, Segment
from .polymera import Polymer, Alphabet, Sequence, Segment, hamming
from .bio import dna

from .version import __version__
56 changes: 56 additions & 0 deletions polymera/polymera.py
Original file line number Diff line number Diff line change
Expand Up @@ -323,3 +323,59 @@ def __init__(self, choices):
self.choices = choices
if len(set(choices)) != len(choices):
raise DuplicateChoiceError("Choices of a segment must be unique.")


class UnequalLengthError(ValueError):
pass


def hamming(seq1, seq2, comparison="options"):
if seq1.get_length() != seq2.get_length():
raise UnequalLengthError("The two sequences must be the same length!")

if comparison == "options":
distance = hamming_options(seq1, seq2)
elif comparison == "uncertainty":
distance = 0
else:
raise ValueError("Parameter comparison must be 'options' or 'uncertainty'!")

return distance


def break_segment(segment):
"""Return a list of n 1-length Segments from a Segment of length n."""
new_segments = []
for i in range(0, len(segment.choices[0])):
new_choices = []
for choice in segment.choices:
new_choices += choice[i]
new_choices = list(set(new_choices)) # remove duplicate letters
new_segment = Segment(choices=new_choices)
new_segments += [new_segment]

return new_segments


def convert_to_nosegment(seq):
"""Convert Sequence segments to length 1 segments."""
new_segments = []
for segment in seq.segments:
new_subsegments = break_segment(segment)
new_segments += new_subsegments

sequence = Sequence(segments=new_segments)
return sequence


def hamming_options(seq1, seq2):
sequence1 = convert_to_nosegment(seq1)
sequence2 = convert_to_nosegment(seq2)

distance = 0
for i, segment1 in enumerate(sequence1.segments):
segment2 = sequence2.segments[i]
if set(segment1.choices) & set(segment2.choices) == set():
distance += 1

return distance
48 changes: 48 additions & 0 deletions tests/test_polymera.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import pytest
import polymera
from polymera.polymera import break_segment, convert_to_nosegment, hamming_options


def test_segment():
Expand Down Expand Up @@ -62,3 +63,50 @@ def test_polymer():
polymer.get_information_content(method="wrong parameter")
assert polymer.get_information_content(method="option") == 2
assert polymer.get_information_content(method="uncertainty") == 1


def test_break_segment():
segment = polymera.Segment(["AAT", "AAC"])
new_segments = break_segment(segment)
assert len(new_segments) == 3
assert len(new_segments[2].choices) == 2


def test_convert_to_nosegment():
seq1 = polymera.Sequence()
seq1.add_sequence_from_string("T,C,G|CCC")
new_seq = convert_to_nosegment(seq1)
assert len(new_seq.segments) == 4


def test_hamming_options():
seq1 = polymera.Sequence()
seq1.add_sequence_from_string("T,C,G|CCC")

seq2 = polymera.Sequence()
seq2.add_sequence_from_string("A|GGG")

assert hamming_options(seq1, seq2) == 4


def test_hamming():
seq1 = polymera.Sequence()
seq1.add_sequence_from_string("T,C,G|CCG")

seq2 = polymera.Sequence()
seq2.add_sequence_from_string("A|GGG")

# Options
assert polymera.hamming(seq1, seq2, comparison="options") == 3

# Uncertainty
polymera.hamming(seq1, seq2, comparison="uncertainty")

# Wrong parameter
with pytest.raises(ValueError):
polymera.hamming(seq1, seq2, comparison="wrong_parameter")

# Unequal length
seq2.add_sequence_from_string("GGG")
with pytest.raises(ValueError):
polymera.hamming(seq1, seq2, comparison="options")

0 comments on commit 420cae3

Please sign in to comment.