In [1]:
import sys
import unittest

from Bio.Seq import Seq
from Bio.SeqUtils import MeltingTemp as mt

sys.path.append("../oligo_designer_toolsuite")

from oligo_designer_toolsuite.oligo_property_filter._property_filter import (
    PropertyFilter,

)

In [2]:
from oligo_designer_toolsuite.oligo_property_filter._filter_base import (
    GCContent,
    MaskedSequences,
    MeltingTemperature,
    ConsecutiveRepeats,
    GCClamp
)

class TestPreFilterBase(unittest.TestCase):
    """Test that the filtering classes return the corrected output when given in input a sequence"""

    @classmethod
    def setUpClass(self):
        """Define the filter classes and their parameters"""

        self.ConsecutiveRepeats = ConsecutiveRepeats(3)

        filters = [
            self.ConsecutiveRepeats
        ]
        self.pre_filter = PropertyFilter(filters=filters)

    def test_negative_consecutive_repeats(self):
        "Tests that the consecutive repeat filter returns false"
        fulfills, _ = self.ConsecutiveRepeats.apply(Seq("CTTGGGCCTTTCCAAGCCCCCATTTGAGCT"))
        self.assertEqual(
            fulfills,
            False,
            "A sequence not fulfilling the consecutive repeat has been accepted!",
        )

    def test_positive_consecutive_repeats(self):
        "Tests that the consecutive repeat filter returns true"
        fulfills, _ = self.ConsecutiveRepeats.apply(Seq("CTTGGGCCTTTCCAAGCCCATTTGAGCT"))
        self.assertEqual(
            fulfills,
            True,
            "A sequence fulfilling the consecutive repeat has been accepted!",
        )


In [3]:
unittest.main(argv=[''], verbosity=2, exit=False)

test_negative_consecutive_repeats (__main__.TestPreFilterBase)
Tests that the consecutive repeat filter returns false ... ok
test_positive_consecutive_repeats (__main__.TestPreFilterBase)
Tests that the consecutive repeat filter returns true ... ok

----------------------------------------------------------------------
Ran 2 tests in 0.010s

OK


<unittest.main.TestProgram at 0x7faf07fbbac0>

In [6]:
from scipy.spatial.distance import hamming
from numpy import random


def generate_codebook(num_seq: int, encoding_scheme: str):
    '''
    encoding_scheme: MHD2 or MHD4
    This function is using generator generate_binary_sequences() to return a list of encoding codes

    Params:
    n_bit: Number of bits in a code
    n_one: Number of ones in a code
    n_seq: Number of codes in a list
    '''


    # Initialize the list of sequences
    sequences = []

    if (encoding_scheme == "MHD4") & (num_seq <= 140):  # upper limit is 140
        num_ones = 4
        num_bits = 16
        hamming_distance = 4
        # Generate sequences until the desired number of sequences is reached
        while len(sequences) < num_seq:
            # Generate a new sequence with the desired number of 1s
            sequence = [1] * num_ones + [0] * (num_bits - num_ones)
            random.shuffle(sequence)

            # Check if the new sequence meets the minimum distance requirement
            if all(hamming_distance >= hamming(sequence, seq) * num_bits for seq in sequences):
                # Add the new sequence to the list of sequences
                sequences.append(sequence)

    elif (encoding_scheme == "MHD2") & (num_seq <= 1001): # upper limit is 1001
        num_ones = 4
        num_bits = 14
        min_hamming_distance = 2
        # Generate sequences until the desired number of sequences is reached
        while len(sequences) < num_seq:
            # Generate a new sequence with the desired number of 1s
            sequence = [1] * num_ones + [0] * (num_bits - num_ones)
            random.shuffle(sequence)

            # Check if the new sequence meets the minimum distance requirement
            if all(min_hamming_distance <= hamming(sequence, seq) * num_bits for seq in sequences):
                # Add the new sequence to the list of sequences
                sequences.append(sequence)
    # Convert the list of sequences to a list of strings and return it
    return ["".join(str(bit) for bit in sequence) for sequence in sequences]


In [7]:
generate_codebook(10,"MHD2")

['10000010000101',
 '11010100000000',
 '10001000010100',
 '11010001000000',
 '00010001001100',
 '00100000001101',
 '01000000001110',
 '10100100001000',
 '10001001000100',
 '00110010000001']

In [4]:
from scipy.spatial.distance import hamming

#define arrays
x = [0, 1, 1, 1, 0, 1]
y = [0, 0, 1, 1, 0, 0]

#calculate Hamming distance between the two arrays
hamming(x, y) * len(x)



seq1 = [0,0,0,0,1,1,1,0,0,0,0,0,0,0,1,0]
seq2 = [0,0,0,0,0,1,0,0,1,1,0,0,0,0,1,0]
seq3 = [0,1,0,0,0,0,0,0,1,1,0,0,0,1,0,0]
seq4 = [1,1,0,0,0,0,0,0,1,0,1,0,0,0,0,0]
print(hamming(seq1, seq2)*len(seq1))
print(hamming(seq1, seq3)*len(seq1))
print(hamming(seq1, seq4)*len(seq1))
print(hamming(seq2, seq3)*len(seq1))
print(hamming(seq2, seq4)*len(seq1))
print(hamming(seq3, seq4)*len(seq1))

4.0
8.0
8.0
4.0
6.0
4.0
