# Test Property Filters

## Import Packages

In [1]:

from Bio.Seq import Seq
from Bio.SeqUtils import MeltingTemp as mt

from oligo_designer_toolsuite.oligo_property_filter import PropertyFilter

from oligo_designer_toolsuite.oligo_property_filter import (
    SoftMaskedSequenceFilter,
    HardMaskedSequenceFilter,
    ProhibitedSequenceFilter,
    HomopolymericRunsFilter,
    ThreePrimeSequenceFilter,
    FivePrimeSequenceFilter,
    GCContentFilter,
    GCClampFilter,
    MeltingTemperatureNNFilter,
    HomodimerFilter,
    SecondaryStructureFilter,
)
from oligo_designer_toolsuite.oligo_property_filter import PadlockArmsFilter

from oligo_designer_toolsuite.database import OligoDatabase
from oligo_designer_toolsuite.sequence_generator import OligoSequenceGenerator

## Define Parameters

In [2]:
## Melting temp parameters

Tm_parameters = {
    "check": True,
    "strict": True,
    "c_seq": None,
    "shift": 0,
    "nn_table": getattr(mt, "DNA_NN3"),
    "tmm_table": getattr(mt, "DNA_TMM1"),
    "imm_table": getattr(mt, "DNA_IMM1"),
    "de_table": getattr(mt, "DNA_DE1"),
    "dnac1": 50,  # [nM]
    "dnac2": 0,
    "selfcomp": False,
    "saltcorr": 7,
    "Na": 50,  # [mM]
    "K": 75,  # [mM]
    "Tris": 20,  # [mM]
    "Mg": 10,  # [mM]
    "dNTPs": 0,
}

Tm_chem_correction_parameters = {
    "DMSO": 0,
    "DMSOfactor": 0.75,
    "fmdfactor": 0.65,
    "fmdmethod": 1,
    "GC": None,
    "fmd": 20,
}

Tm_salt_correction_parameters = {
    "method": 7,
    "Na": 50,  # [mM]
    "K": 75,  # [mM]
    "Tris": 20,  # [mM]
    "Mg": 10,  # [mM]
    "dNTPs": 0,
}

## Test Masked Sequences

In [3]:
softmasked_sequence_filter = SoftMaskedSequenceFilter()

seq_remove = Seq("TGTCGGATCTCcTCAACAAGCTGGTCtTGA")
res, _ = softmasked_sequence_filter.apply(seq_remove)
assert (
    res == False
), f"error: A sequence ({seq_remove}) not fulfilling the condition has been accepted!"

seq_keep = Seq("TGTCGGATCTCTTCAACAAGCTGGTCATGA")
res, feature = softmasked_sequence_filter.apply(seq_keep)
assert (
    res == True
), f"error: A sequence ({seq_keep}) fulfilling the conditions has not been accepted!"
print(feature)

{}


In [4]:
hardmasked_sequence_filter1 = HardMaskedSequenceFilter(mask="N")

seq_remove = Seq("TGTCGGATCTCNTCAACAAGCTGGTCNTGA")
res, _ = hardmasked_sequence_filter1.apply(seq_remove)
assert (
    res == False
), f"error: A sequence ({seq_remove}) not fulfilling the condition has been accepted!"

seq_keep = Seq("TGTCGGATCTCTTCAACAAGCTGGTCATGA")
res, feature = hardmasked_sequence_filter1.apply(seq_keep)
assert (
    res == True
), f"error: A sequence ({seq_keep}) fulfilling the conditions has not been accepted!"
print(feature)


hardmasked_sequence_filter2 = HardMaskedSequenceFilter(mask="Q")

seq_remove = Seq("TGTCGGATCTCQTCAACAAGCTGGTCQTGA")
res, _ = hardmasked_sequence_filter2.apply(seq_remove)
assert (
    res == False
), f"error: A sequence ({seq_remove}) not fulfilling the condition has been accepted!"

seq_keep = Seq("TGTCGGATCTCTNNAACAAGCTGGTCATGA")
res, feature = hardmasked_sequence_filter2.apply(seq_keep)
assert (
    res == True
), f"error: A sequence ({seq_keep}) fulfilling the conditions has not been accepted!"
print(feature)

{}
{}


## Test Sequence Content

In [5]:
prohibited_sequence_filter = ProhibitedSequenceFilter(prohibited_sequence="ACT")

seq_remove = Seq("GGGGGGGGGGGGGGACT")
res, _ = prohibited_sequence_filter.apply(seq_remove)
assert (
    res == False
), f"error: A sequence ({seq_remove}) not fulfilling the condition has been accepted!"

seq_keep = Seq("GGGGGGGGGGGGGGATC")
res, feature = prohibited_sequence_filter.apply(seq_keep)
assert (
    res == True
), f"error: A sequence ({seq_keep}) fulfilling the conditions has not been accepted!"
print(feature)

{}


In [6]:
prohibited_sequence_filter2 = ProhibitedSequenceFilter(prohibited_sequence=["ACT", "GGG"])

seq_remove = Seq("GGGGGGGGGGGGGGACT")
res, _ = prohibited_sequence_filter2.apply(seq_remove)
assert (
    res == False
), f"error: A sequence ({seq_remove}) not fulfilling the condition has been accepted!"

seq_remove = Seq("GGGGGGGGGGGGGGATC")
res, _ = prohibited_sequence_filter2.apply(seq_remove)
assert (
    res == False
), f"error: A sequence ({seq_remove}) not fulfilling the condition has been accepted!"

seq_keep = Seq("GGAATC")
res, feature = prohibited_sequence_filter2.apply(seq_keep)
assert (
    res == True
), f"error: A sequence ({seq_keep}) fulfilling the conditions has not been accepted!"
print(feature)

{}


In [7]:
homopolymeric_run_filter = HomopolymericRunsFilter(base_n={"A": 4, "C": 5})

seq_remove = Seq("GGGGGGGGGGGGGGAAAAA")
res, _ = homopolymeric_run_filter.apply(seq_remove)
assert (
    res == False
), f"error: A sequence ({seq_remove}) not fulfilling the condition has been accepted!"

seq_keep = Seq("GGGGGGGGGGGGGGAAA")
res, feature = homopolymeric_run_filter.apply(seq_keep)
assert (
    res == True
), f"error: A sequence ({seq_keep}) fulfilling the conditions has not been accepted!"
print(feature)

{}


In [8]:
three_prime_filter = ThreePrimeSequenceFilter(three_prime_sequence="TT", remove=False)

seq_remove = Seq("GGGGGGGGGGGGGGAAAAA")
res, _ = three_prime_filter.apply(seq_remove)
assert (
    res == False
), f"error: A sequence ({seq_remove}) not fulfilling the condition has been accepted!"

seq_keep = Seq("GGGGGGGGGGGGGGAAATT")
res, feature = three_prime_filter.apply(seq_keep)
assert (
    res == True
), f"error: A sequence ({seq_keep}) fulfilling the conditions has not been accepted!"
print(feature)

{}


In [9]:
five_prime_filter = FivePrimeSequenceFilter(five_prime_sequence="TT", remove=True)

seq_remove = Seq("TTGGGGGGGGGGGGGGAAAAA")
res, _ = five_prime_filter.apply(seq_remove)
assert (
    res == False
), f"error: A sequence ({seq_remove}) not fulfilling the condition has been accepted!"

seq_keep = Seq("GGGGGGGGGGGGGGAAATT")
res, feature = five_prime_filter.apply(seq_keep)
assert (
    res == True
), f"error: A sequence ({seq_keep}) fulfilling the conditions has not been accepted!"
print(feature)

{}


## Test GC content

In [10]:
GC_content_filter = GCContentFilter(GC_content_min=40, GC_content_max=60)

seq_remove = Seq("TCGGGCGGGAGATCCAGGTGGCGCGCAAAG")
res, _ = GC_content_filter.apply(seq_remove)
assert (
    res == False
), f"error: A sequence ({seq_remove}) not fulfilling the condition has been accepted!"

seq_keep = Seq("TGTCGGATCTCTTCAACAAGCTGGTCATGA")
res, feature = GC_content_filter.apply(seq_keep)
assert (
    res == True
), f"error: A sequence ({seq_keep}) fulfilling the conditions has not been accepted!"
print(feature)

{'GC_content': 46.6667}


In [11]:
GC_clamp_filter = GCClampFilter(n_bases=3, n_GC=1)

seq_remove = Seq("TCGGGCGGGAGATCCAGGTGGCGCGCAAAAA")
res, _ = GC_clamp_filter.apply(seq_remove)
assert (
    res == False
), f"error: A sequence ({seq_remove}) not fulfilling the condition has been accepted!"

seq_keep = Seq("TGTCGGATCTCTTCAACAAGCTGGTCATGAA")
res, feature = GC_clamp_filter.apply(seq_keep)
assert (
    res == True
), f"error: A sequence ({seq_keep}) fulfilling the conditions has not been accepted!"
print(feature)

{}


## Test Sequence Structure

In [12]:
# Test if Tm filter works with default parameters
Tm_filter1 = MeltingTemperatureNNFilter(Tm_min=52, Tm_max=67, Tm_parameters={})

seq_remove = Seq("TGGCTTGGGCCTTTCCAAGCCCCCATTTGAGCT")
res, _ = Tm_filter1.apply(seq_remove)
assert (
    res == False
), f"error: A sequence ({seq_remove}) not fulfilling the condition with has been accepted!"

seq_keep = Seq("TGTCGGATCTCTTCAACAAGCTGGTCATGA")
res, feature = Tm_filter1.apply(seq_keep)
assert (
    res == True
), f"error: A sequence ({seq_keep}) fulfilling the conditions has not been accepted!"
print(feature)

# Test if Tm filter works with user-defined Tm parameters
Tm_filter2 = MeltingTemperatureNNFilter(
    Tm_min=52,
    Tm_max=67,
    Tm_parameters=Tm_parameters,
    Tm_chem_correction_parameters=Tm_chem_correction_parameters,
    Tm_salt_correction_parameters=Tm_salt_correction_parameters,
)

seq_remove = Seq("TGGCTTGGGCCTTTCCAAGCCCCCATTTGAGCT")
res, _ = Tm_filter2.apply(seq_remove)
assert (
    res == False
), f"error: A sequence ({seq_remove}) not fulfilling the condition with user-defined parameters has been accepted!"

seq_keep = Seq("TGGCTTGGGCCTTTCCAAGCCCCCATTTAAAAA")
res, feature = Tm_filter2.apply(seq_keep)
assert (
    res == True
), f"error: A sequence ({seq_keep}) fulfilling the conditions with user-defined parameters has not been accepted!"
print(feature)

{'melting_temperature': 61.405}
{'melting_temperature': 63.4241}


In [13]:
secondary_structure_filter = SecondaryStructureFilter(T=37, thr_DG=0)

seq_remove = Seq("TGGCTTGGGCCTTTCCAAGCCCCCATTTGAGCT")
res, _ = secondary_structure_filter.apply(seq_remove)
assert (
    res == False
), f"error: A sequence ({seq_remove}) not fulfilling the condition with has been accepted!"

seq_keep = Seq("TGTCGGATCTCTTCAACAAGCTGGTCATGA")
res, feature = secondary_structure_filter.apply(seq_keep)
assert (
    res == True
), f"error: A sequence ({seq_keep}) fulfilling the conditions has not been accepted!"
print(feature)

{'secondary_structure_DG': 0.5}


In [14]:
secondary_structure_filter = SecondaryStructureFilter(T=37, thr_DG=0)

seq_remove = Seq("TGGCTTGGGCCTTTCCAAGCCCCCATTTGAGCT")
res, _ = secondary_structure_filter.apply(seq_remove)
assert (
    res == False
), f"error: A sequence ({seq_remove}) not fulfilling the condition with has been accepted!"

seq_keep = Seq("TGTCGGATCTCTTCAACAAGCTGGTCATGA")
res, feature = secondary_structure_filter.apply(seq_keep)
assert (
    res == True
), f"error: A sequence ({seq_keep}) fulfilling the conditions has not been accepted!"
print(feature)

{'secondary_structure_DG': 0.5}


## Test Padlock Arms

In [15]:
padlock_arms_filter = PadlockArmsFilter(arm_length_min = 5, arm_Tm_dif_max = 5, arm_Tm_min = 40, arm_Tm_max = 60, Tm_parameters = Tm_parameters, Tm_salt_correction_parameters = Tm_salt_correction_parameters, Tm_chem_correction_parameters = Tm_chem_correction_parameters)

seq_remove = Seq("TGTCGGATCTCTTCAACAAGCTGGTCAT")
res, _ = padlock_arms_filter.apply(seq_remove)
assert (
    res == False
), f"error: A sequence ({seq_remove}) not fulfilling the condition with has been accepted!"

seq_keep = Seq("TGGCTTGGGCCTTTCCAAGCCCCCATTTGAGCT")
res, feature = padlock_arms_filter.apply(seq_keep)
assert (
    res == True
), f"error: A sequence ({seq_keep}) fulfilling the conditions has not been accepted!"
print(feature)

{'arm1_Tm': 51.3523, 'arm2_Tm': 50.375, 'arms_Tm_dif': 0.98, 'ligation_site': 16}


## Test PropertyFilter API

In [None]:
masked_sequences_filter = HardMaskedSequenceFilter(mask="N")
GC_content_filter = GCContentFilter(GC_content_min=40, GC_content_max=60)
GC_clamp_filter = GCClampFilter(n_bases=2, n_GC=1)

Tm_filter = MeltingTemperatureNNFilter(
    Tm_min=52,
    Tm_max=67,
    Tm_parameters=Tm_parameters,
    Tm_chem_correction_parameters=Tm_chem_correction_parameters,
)
secondary_structure_filter = SecondaryStructureFilter(37, -5)

Tm_arms_filter = PadlockArmsFilter(
    arm_length_min=5,
    arm_Tm_dif_max=5,
    arm_Tm_min=40,
    arm_Tm_max=60,
    Tm_parameters=Tm_parameters,
    Tm_salt_correction_parameters=Tm_salt_correction_parameters,
    Tm_chem_correction_parameters=Tm_chem_correction_parameters,
)

filters = [
    masked_sequences_filter,
    GC_content_filter,
    GC_clamp_filter,
    Tm_filter,
    secondary_structure_filter,
    Tm_arms_filter,
]
property_filter = PropertyFilter(filters=filters)

In [None]:
oligo_sequence_generator = OligoSequenceGenerator()

file_fasta_random_seqs1 = oligo_sequence_generator.create_sequences_random(
    filename_out="random_sequences1",
    length_sequences=30,
    num_sequences=100,
    name_sequences="random_sequences1",
    base_alphabet_with_probability={"A": 0.1, "C": 0.3, "G": 0.4, "T": 0.2},
)

oligos = OligoDatabase(min_oligos_per_region=2, write_regions_with_insufficient_oligos=True)

oligos.load_sequences_from_fasta(
    file_fasta_in=file_fasta_random_seqs1,
    sequence_type="oligo",
    region_ids=["random_sequences1"],
    database_overwrite=True,
)

property_filter.apply(sequence_type="oligo", oligo_database=oligos, n_jobs=2)