# Test Specificity Filters

In [None]:
import os
from pathlib import Path

from oligo_designer_toolsuite.database import OligoDatabase, ReferenceDatabase
from oligo_designer_toolsuite.oligo_specificity_filter import (
    ExactMatchFilter, 
    BlastNFilter, 
    BlastNSeedregionFilter, 
    BlastNSeedregionSiteFilter, 
    BowtieFilter, 
    Bowtie2Filter, 
    CrossHybridizationFilter, 
    RemoveByLargerRegionPolicy, 
    RemoveByDegreePolicy, 
    VariantsFilter,
    )

## Setup

In [None]:
# Output directory
dir_output = os.path.join(os.path.dirname(os.getcwd()), 'notebooks/output')
Path(dir_output).mkdir(parents=True, exist_ok=True)

In [None]:
# Files
file_database_oligos_exact_match = "../data/databases/database_oligos_tsv/database_oligos_exactmatch.tsv"
file_database_oligos_match = "../data/databases/database_oligos_tsv/database_oligos_match.tsv"
file_database_oligos_nomatch = "../data/databases/database_oligos_tsv/database_oligos_nomatch.tsv"

file_database_oligos_ligation_match = "../data/databases/database_oligos_tsv/database_oligos_ligation_match.tsv"
file_database_oligos_ligation_nomatch = "../data/databases/database_oligos_tsv/database_oligos_ligation_nomatch.tsv"

file_database_oligos_crosshyb = "../data/databases/database_oligos_tsv/database_oligos_crosshybridization.tsv"
solutions_larger_region = [f"../data/databases/expected_results/solution_crosshyb_larger_region_{i}.tsv" for i in range(3)]
solutions_degree = [f"../data/databases/expected_results/solution_crosshyb_degree_{i}.tsv" for i in range(8)]

file_database_reference = "../data/databases/database_reference/database_reference.fna"
file_database_reference_ligation = "../data/databases/database_reference/database_reference_ligation.fna"
file_database_reference_variants = "../data/databases/database_reference/database_reference_variants.vcf"

In [None]:
# Metadata
metadata_ncbi = {
    "files_source": "NCBI",
    "species": "Homo_sapiens",
    "annotation_release": "110",
    "genome_assembly": "GRCh38",
}

region_ids = ["AARS1", "DECR2", "FAM234A", "RHBDF1", "WASIR2"]

In [None]:
# Blast parameters
blast_search_parameters = {"perc_identity": 80, "strand": "plus", "word_size": 10}
 

blast_hit_parameters = {"coverage": 50}
blast_hit_parameters_crosshyb = {"coverage": 50}

# Bowtie parameters
bowtie_search_parameters = {"-n": 3, "-l": 5}
bowtie_search_parameters_crosshyb = {"-n": 3, "-l": 5, "--nofw": ""}

bowtie2_search_parameters = {"-N": 0}

# Parameters Cross-hybridization
blast_search_parameters_crosshyb = {
    "perc_identity": 80,
    "strand": "minus",
    "word_size": 10,
}


## Test Exact Match Filter

In [None]:
sequence_type = "oligo"

oligo_database_exact_match = OligoDatabase(min_oligos_per_region=2, write_regions_with_insufficient_oligos=True, dir_output=dir_output)
oligo_database_exact_match.load_database_from_table(file_database_oligos_exact_match, database_overwrite=True, merge_databases_on_sequence_type="oligo")

policy = RemoveByLargerRegionPolicy()
exactmatch_filter = ExactMatchFilter(policy)
res = exactmatch_filter.apply(sequence_type, oligo_database_exact_match, 2)

assert "WASH7P::2" not in res.database["WASH7P"].keys(), "A matching oligo has not been filtered from exact matches!"
assert "AGRN::1" in res.database["AGRN"].keys(), "A non-matching oligo has been filtered from exact mathces!"

## Test BlastN Filter

In [None]:
reference_database = ReferenceDatabase(dir_output=dir_output)
reference_database.load_database_from_file(files=file_database_reference,file_type="fasta",database_overwrite=True)

In [None]:
sequence_type = "target"

oligo_database_match = OligoDatabase(min_oligos_per_region=2, write_regions_with_insufficient_oligos=True, dir_output=dir_output)
oligo_database_match.load_database_from_table(file_database_oligos_match, database_overwrite=True, merge_databases_on_sequence_type="oligo")

blast_filter = BlastNFilter(blast_search_parameters, blast_hit_parameters, dir_output=dir_output)
blast_filter.set_reference_database(reference_database=reference_database)
print(type(blast_filter))
res = blast_filter.apply(sequence_type=sequence_type, oligo_database=oligo_database_match, remove_hits=True, n_jobs=2)

assert "WASH7P::1" not in res.database["WASH7P"].keys(), "A matching oligo has not been filtered by Blast!"

In [None]:
sequence_type = "target"

oligo_database_nomatch = OligoDatabase(min_oligos_per_region=2, write_regions_with_insufficient_oligos=True, dir_output=dir_output)
oligo_database_nomatch.load_database(file_database_oligos_nomatch)

blast_filter = BlastNFilter(blast_search_parameters, blast_hit_parameters, dir_output=dir_output)
res = blast_filter.apply(sequence_type, oligo_database_nomatch, 2, reference_database)

assert "AGRN::1" in res.database["AGRN"].keys(), "A non matching oligo has been filtered by Blast!"

In [None]:
reference_database_ligation = ReferenceDatabase(dir_output=dir_output)
reference_database_ligation.load_sequences_from_fasta(files_fasta=file_database_reference_ligation, database_overwrite=True)

In [None]:
from oligo_designer_toolsuite.oligo_specificity_filter import _filter_blastn
import inspect
sequence_type = "target"
seedregion_size = 10

oligo_database_ligation_match = OligoDatabase(min_oligos_per_region=2, write_regions_with_insufficient_oligos=True, dir_output=dir_output)
oligo_database_ligation_match.load_database(file_database_oligos_ligation_match)

blast_ligation_filter = BlastNSeedregionSiteFilter(seedregion_size, blast_search_parameters, blast_hit_parameters, dir_output=dir_output)
res = blast_ligation_filter.apply(sequence_type, oligo_database_ligation_match, 2, reference_database_ligation)

assert "WASH7P::1" not in res.database["WASH7P"].keys(), "A matching oligo has not been filtered by Blast!"

In [None]:
sequence_type = "target"
seedregion_size = 5

oligo_database_ligation_nomatch = OligoDatabase(min_oligos_per_region=2, write_regions_with_insufficient_oligos=True, dir_output=dir_output)
oligo_database_ligation_nomatch.load_database(file_database_oligos_ligation_nomatch)

blast_ligation_filter = BlastNSeedregionSiteFilter(seedregion_size, blast_search_parameters, blast_hit_parameters, dir_output=dir_output)
res = blast_ligation_filter.apply(sequence_type, oligo_database_ligation_nomatch, 2, reference_database_ligation)

assert "AGRN::1" in res.database["AGRN"].keys(), "A non matching oligo has been filtered by Blast!"

## Test Bowtie Filter

In [None]:
reference_database = ReferenceDatabase(dir_output=dir_output)
reference_database.load_database_from_file(files=file_database_reference, database_overwrite=True)

In [None]:
sequence_type = "target"

oligo_database_match = OligoDatabase(min_oligos_per_region=2, write_regions_with_insufficient_oligos=True, dir_output=dir_output)
oligo_database_match.load_database(file_database_oligos_match)

bowtie_filter = BowtieFilter(bowtie_search_parameters, dir_output=dir_output)
res = bowtie_filter.apply(sequence_type, oligo_database_match, 2, reference_database)

assert "WASH7P::1" not in res.database["WASH7P"].keys(), "A matching oligo has not been filtered by Bowtie!"

In [None]:
sequence_type = "target"

oligo_database_nomatch = OligoDatabase(min_oligos_per_region=2, write_regions_with_insufficient_oligos=True, dir_output=dir_output)
oligo_database_nomatch.load_database(file_database_oligos_nomatch)

bowtie_filter = BowtieFilter(bowtie_search_parameters, dir_output=dir_output)
res = bowtie_filter.apply(sequence_type, oligo_database_nomatch, 2, reference_database)

assert "AGRN::1" in res.database["AGRN"].keys(), "A non matching oligo has been filtered by Bowtie!"

In [None]:
sequence_type = "target"

oligo_database_match = OligoDatabase(min_oligos_per_region=2, write_regions_with_insufficient_oligos=True, dir_output=dir_output)
oligo_database_match.load_database(file_database_oligos_match)

bowtie_filter = Bowtie2Filter(bowtie2_search_parameters, dir_output=dir_output)
res = bowtie_filter.apply(sequence_type, oligo_database_match, 2, reference_database)

assert "WASH7P::1" not in res.database["WASH7P"].keys(), "A matching oligo has not been filtered by Bowtie2!"

In [None]:
sequence_type = "target"

oligo_database_nomatch = OligoDatabase(min_oligos_per_region=2, write_regions_with_insufficient_oligos=True, dir_output=dir_output)
oligo_database_nomatch.load_database(file_database_oligos_nomatch)

bowtie_filter = Bowtie2Filter(bowtie2_search_parameters, dir_output=dir_output)
res = bowtie_filter.apply(sequence_type, oligo_database_nomatch, 2, reference_database)

assert "AGRN::1" in res.database["AGRN"].keys(), "A non matching oligo has been filtered by Bowtie2!"

## Test Variants Filter

In [None]:
reference_database = ReferenceDatabase(dir_output=dir_output)
reference_database.load_database_from_file(files=file_database_reference_variants,file_type="vcf",database_overwrite=True)

In [None]:
oligo_database_match = OligoDatabase(min_oligos_per_region=2, write_regions_with_insufficient_oligos=True, dir_output=dir_output)
oligo_database_match.load_database_from_table(file_database_oligos_match, database_overwrite=True, merge_databases_on_sequence_type="oligo")

variants_filter = VariantsFilter(dir_output=dir_output)
  
variants_filter.set_reference_database(reference_database=reference_database)
print(type(variants_filter))
res = variants_filter.apply(oligo_database=oligo_database_match, remove_hits=False, n_jobs=2)


assert oligo_database_match.get_oligo_attribute_value(attribute="variants_filter", 
                                                      flatten=True, 
                                                      region_id="WASH7P", 
                                                      oligo_id="WASH7P::1") is not None, "A matching oligo has not been flagged!"

In [None]:
oligo_database_nomatch = OligoDatabase(min_oligos_per_region=2, write_regions_with_insufficient_oligos=True, dir_output=dir_output)
oligo_database_nomatch.load_database_from_table(file_database_oligos_nomatch, database_overwrite=True, merge_databases_on_sequence_type="oligo")

variants_filter = VariantsFilter(dir_output=dir_output)
  
variants_filter.set_reference_database(reference_database=reference_database)
print(type(variants_filter))
res = variants_filter.apply(oligo_database=oligo_database_nomatch, remove_hits=False, n_jobs=2)

assert oligo_database_nomatch.get_oligo_attribute_value(attribute="variants_filter", 
                                                        flatten=True, 
                                                        region_id="AGRN", 
                                                        oligo_id="AGRN::1") is None, "A non-matching oligo has been flagged!"

## Test Crosshybridization Filter

In [None]:
expected_oligos_larger_region = []
for i, solution_file in enumerate(solutions_larger_region):
    solution = OligoDatabase(
        min_oligos_per_region=2,
        write_regions_with_insufficient_oligos=True,
        dir_output=os.path.join(dir_output, f"oligo_database_solution_larger_region_{i}"),
    )
    solution.load_database(solution_file)
    expected_oligos_larger_region.append(solution.database)

expected_oligos_degree = []
for i, solution_file in enumerate(solutions_degree):
    solution = OligoDatabase(
        min_oligos_per_region=2,
        write_regions_with_insufficient_oligos=True,
        dir_output=os.path.join(dir_output, f"oligo_database_solution_larger_region_{i}"),
    )
    solution.load_database(solution_file)
    expected_oligos_degree.append(solution.database)

In [None]:
exactmatch_filter = ExactMatchFilter()
policy = RemoveByLargerRegionPolicy()

sequence_type = "oligo"

oligo_database_crosshyb = OligoDatabase(
            min_oligos_per_region=2,
            write_regions_with_insufficient_oligos=True,
            dir_output=dir_output,
        )
oligo_database_crosshyb.load_database(file_database_oligos_crosshyb)

cross_hyb_filter = CrossHybridizationFilter(policy, exactmatch_filter, dir_output)
res = cross_hyb_filter.apply(sequence_type, oligo_database_crosshyb, 2)

assert res.database in expected_oligos_larger_region, f"The cross-hybridization filter didn't return the expected oligos."

In [None]:
exactmatch_filter = ExactMatchFilter()
policy = RemoveByDegreePolicy()

sequence_type = "oligo"

oligo_database_crosshyb = OligoDatabase(
            min_oligos_per_region=2,
            write_regions_with_insufficient_oligos=True,
            dir_output=dir_output,
        )
oligo_database_crosshyb.load_database(file_database_oligos_crosshyb)

cross_hyb_filter = CrossHybridizationFilter(policy, exactmatch_filter, dir_output)
res = cross_hyb_filter.apply(sequence_type, oligo_database_crosshyb, 2)

assert res.database in expected_oligos_degree, f"The cross-hybridization filter didn't return the expected oligos."

In [None]:
blast_filter = BlastNFilter(blast_search_parameters_crosshyb, blast_hit_parameters_crosshyb, dir_output=dir_output)
policy = RemoveByLargerRegionPolicy()

sequence_type = "oligo"

oligo_database_crosshyb = OligoDatabase(
            min_oligos_per_region=2,
            write_regions_with_insufficient_oligos=True,
            dir_output=dir_output,
        )
oligo_database_crosshyb.load_database(file_database_oligos_crosshyb)

cross_hyb_filter = CrossHybridizationFilter(policy, blast_filter, dir_output)
res = cross_hyb_filter.apply(sequence_type, oligo_database_crosshyb, 2)

assert res.database in expected_oligos_larger_region, f"The cross-hybridization filter didn't return the expected oligos."

In [None]:
blast_filter = BlastNFilter(blast_search_parameters_crosshyb, blast_hit_parameters_crosshyb, dir_output=dir_output)
policy = RemoveByDegreePolicy()

sequence_type = "oligo"

oligo_database_crosshyb = OligoDatabase(
            min_oligos_per_region=2,
            write_regions_with_insufficient_oligos=True,
            dir_output=dir_output,
        )
oligo_database_crosshyb.load_database(file_database_oligos_crosshyb)

cross_hyb_filter = CrossHybridizationFilter(policy, blast_filter, dir_output)
res = cross_hyb_filter.apply(sequence_type, oligo_database_crosshyb, 2)

assert res.database in expected_oligos_degree, f"The cross-hybridization filter didn't return the expected oligos."

In [None]:
bowtie_filter = BowtieFilter(bowtie_search_parameters_crosshyb, dir_output=dir_output)
policy = RemoveByLargerRegionPolicy()

sequence_type = "oligo"

oligo_database_crosshyb = OligoDatabase(
            min_oligos_per_region=2,
            write_regions_with_insufficient_oligos=True,
            dir_output=dir_output,
        )
oligo_database_crosshyb.load_database(file_database_oligos_crosshyb)

cross_hyb_filter = CrossHybridizationFilter(policy, bowtie_filter, dir_output)
res = cross_hyb_filter.apply(sequence_type, oligo_database_crosshyb, 2)

assert res.database in expected_oligos_larger_region, f"The cross-hybridization filter didn't return the expected oligos."

In [None]:
bowtie_filter = BowtieFilter(bowtie_search_parameters_crosshyb, dir_output=dir_output)
policy = RemoveByDegreePolicy()

sequence_type = "oligo"

oligo_database_crosshyb = OligoDatabase(
            min_oligos_per_region=2,
            write_regions_with_insufficient_oligos=True,
            dir_output=dir_output,
        )
oligo_database_crosshyb.load_database(file_database_oligos_crosshyb)

cross_hyb_filter = CrossHybridizationFilter(policy, bowtie_filter, dir_output)
res = cross_hyb_filter.apply(sequence_type, oligo_database_crosshyb, 2)

assert res.database in expected_oligos_degree, f"The cross-hybridization filter didn't return the expected oligos."

## AI FIlters



In [None]:
def reverse_complement(sequence):
    nt = {"A": "T", "T": "A", "C": "G", "G":'C',"-":'-'}
    rc_seqeunce = []
    for n in sequence:
        rc_seqeunce = [nt[n]] + rc_seqeunce
    return "".join(rc_seqeunce)

Test blast in specific scenarious

In [None]:
blast_search_parameters = {"perc_identity": 80, "strand": "both", "word_size": 10}

# define the seqeunces
sequences = [
    "GCTCGGGCTTGTCCACAGGATGGACCCAGCTGAGCAAGCT", #100% match
    "TACAGGCATGACCCACCATGCCTGGCCAACTTACATTTTT", #2 mismatches
    "AAGGCCAAGGTCTCTGGGGGGCTGGACAAGCCGCCCTCAT",  #90% mismatches
    "TTTTGCACCAGCCCAGATCGCATCTTCTTTCACCTGTTTT",  #80% coverage
    "CCGCTCGGCTGCATGAAACCAAAACGGCTGTCCGGGGACA", #gaps on the target 
    "AACCCGGCATCACCAAGAGGAGGTTCAAGGGAACGCTGCA", #gaps on reference
    "TGCCCGCGCCGGAGTTCTCCCCAGCCGGAGTCCGGCAGGG", #gaps on the target, 80% cov
    "AACCTGGTTGCACCTCGGCCTGGTCCCAGCAGGTATGGTT", #gaps on reference, 80% cov
    "ACTGATTGCTGCAGACGCTCACCCCAGACACTCACTGCAC", #overflow start
    "TATATATTTTGCACACTTTAAAATATTGGGTTGTTTACCG" # overflow end
]
refrences = [
    "GCTCGGGCTTGTCCACAGGATGGACCCAGCTGAGCAAGCT",  #100% match
    "TACAGGCATGAGCCACCATGCCTGGCCAACTCACATTTTT", #2 mismatches
    "AAGGCCGGGGTCTCTGGGGGGCTGGAGAAGCCTCCCTCAT",  #90% mismatches
    "AGCAGCACCAGCCCAGATCGCATCTTCTTTCACCTGAACG",  #80% coverage
    "CCGCTACCGGCTGCATGACAACCAAAACGGCTGGTCCGGGGACA",  #gaps on the target
    "AACCCCATCACCAAGAGGAGGTTCAGGGAAGCTGCA", #gaps on reference
    "TGCCCGCGCCGGAGTTCTCCCCGGAGCCGGAGTCCGGCAGGG", #gaps on the target, 80% cov
    "TCCCTGGGCACCTCGGCCTGGTCCCAGCAGGTATGGGC", #gaps on reference, 80% cov
    "----ATTGCTGCAGACGCTCACCCCAGACACTCACTGCAC", #overflow start
    "TATATATTTTGCACACTTTAAAATATTGGGTTGTTT----", # oevrflow end
]
gapped_queries = [
    "GCTCGGGCTTGTCCACAGGATGGACCCAGCTGAGCAAGCT", #100% match
    "TACAGGCATGACCCACCATGCCTGGCCAACTTACATTTTT", #2 mismatches
    "AAGGCCAAGGTCTCTGGGGGGCTGGACAAGCCGCCCTCAT",  #90% mismatches
    "TTTTGCACCAGCCCAGATCGCATCTTCTTTCACCTGTTTT",  #80% coverage
    "CCGCT--CGGCTGCATGA-AACCAAAACGGCTG-TCCGGGGACA", #gaps on the target 
    "AACCCGGCATCACCAAGAGGAGGTTCAAGGGAACGCTGCA", #gaps on reference
    "TGCCCGCGCCGGAGTTCTCCCC--AGCCGGAGTCCGGCAGGG", #gaps on the target, 80% cov
    "AACCTGGTTGCACCTCGGCCTGGTCCCAGCAGGTATGGTT", #gaps on reference, 80% cov
    "ACTGATTGCTGCAGACGCTCACCCCAGACACTCACTGCAC", #overflow start
    "TATATATTTTGCACACTTTAAAATATTGGGTTGTTTACCG" # overflow end
]
gapped_references = [
    "GCTCGGGCTTGTCCACAGGATGGACCCAGCTGAGCAAGCT",  #100% match
    "TACAGGCATGAGCCACCATGCCTGGCCAACTCACATTTTT", #2 mismatches
    "AAGGCCGGGGTCTCTGGGGGGCTGGAGAAGCCTCCCTCAT",  #90% mismatches
    "AGCAGCACCAGCCCAGATCGCATCTTCTTTCACCTGAACG",  #80% coverage
    "CCGCTACCGGCTGCATGACAACCAAAACGGCTGGTCCGGGGACA",  #gaps on the target
    "AACCC--CATCACCAAGAGGAGGTTCA-GGGAA-GCTGCA", #gaps on reference
    "TGCCCGCGCCGGAGTTCTCCCCGGAGCCGGAGTCCGGCAGGG", #gaps on the target, 80% cov
    "TCCCTGG--GCACCTCGGCCTGGTCCCAGCAGGTATGGGC", #gaps on reference, 80% cov
    "----ATTGCTGCAGACGCTCACCCCAGACACTCACTGCAC", #overflow start
    "TATATATTTTGCACACTTTAAAATATTGGGTTGTTT----" # oevrflow end
]
# define dictionary
dict = {"region":{}}
for i, seq in enumerate(sequences):
    #add both the oligo and its reverse complement to search also on the minus strandnd
    dict["region"][f"region::{2*i}"] = {
        "oligo": reverse_complement(sequences[i]),
        "target": sequences[i],
        'chromosome': [None],
        'start': [None],
        'end': [None],
        'strand': [None],
        'regiontype': ['random_sequence']
    }
    dict["region"][f"region::{2*i + 1}"] = {
        "oligo": sequences[i],
        "target": reverse_complement(sequences[i]),
        'chromosome': [None],
        'start': [None],
        'end': [None],
        'strand': [None],
        'regiontype': ['random_sequence']
    }
oligo_database = OligoDatabase()
oligo_database.database = dict
oligo_database.save_database()
blast_filter = BlastNFilter(blast_search_parameters, blast_hit_parameters, dir_output=dir_output)
res = blast_filter.apply("target", oligo_database, 2, reference_database)
print(res.database)


Test bowtie in specific scenarious

In [None]:
dict = {"region":{}}
for i, seq in enumerate(sequences):
    #add both the oligo and its reverse complement to search also on the minus strandnd
    dict["region"][f"region::{2*i}"] = {
        "oligo": reverse_complement(sequences[i]),
        "target": sequences[i],
        'chromosome': [None],
        'start': [None],
        'end': [None],
        'strand': [None],
        'regiontype': ['random_sequence']
    }
    dict["region"][f"region::{2*i + 1}"] = {
        "oligo": sequences[i],
        "target": reverse_complement(sequences[i]),
        'chromosome': [None],
        'start': [None],
        'end': [None],
        'strand': [None],
        'regiontype': ['random_sequence']
    }
oligo_database = OligoDatabase()
oligo_database.database = dict
bowtie_filter = BowtieFilter(bowtie_search_parameters, dir_output=dir_output)
res = bowtie_filter.apply("target", oligo_database, 2, reference_database)
print(res.database)

In [None]:
from Bio.Seq import Seq
list_to_change = sequences
new_list = []
for elem in list_to_change:
    new_list.append(Seq(elem))
    new_list.append(Seq(reverse_complement(elem)))
print(new_list)

In [None]:
from oligo_designer_toolsuite.oligo_specificity_filter import HybridizationProbabilityFilter
from oligo_designer_toolsuite_ai_filters.api import APIBase
import numpy as np
import pandas as pd

FILE_DATABASE_REFERENCE = "../../data/tests/databases/database_reference/database_reference.fna"
FILE_DATABASE_OLIGOS_AI = "../../data/tests/databases/database_oligos_tsv/database_oligos_ai.tsv"
FILE_TABLE_HITS_BLAST_AI = "../../data/tests/table_hits/table_hits_blast_ai.tsv"
FILE_TABLE_HITS_BOWTIE_AI = "../../data/tests/table_hits/table_hits_bowtie_ai.tsv"

class DummyAPI(APIBase):
    # Class that considers real hits all the hits that have a 100% match
    def predict(self, queries,gapped_queries,references,gapped_references):
        predictions = np.ndarray(shape=(len(queries),), dtype=np.float32)
        for i, (q, r) in enumerate(zip(gapped_queries, gapped_references)):
            if q == r:
                predictions[i] = 1
            else:
                predictions[i] = 0
        return predictions
    
tmp_path = os.path.join(os.getcwd(), "tmp_hybridization_probability_outputs")
blast_search_parameters = blast_search_parameters = {"perc_identity": 80, "strand": "both", "word_size": 10}
blast_hit_parameters = {"coverage": 50}
alignment_filter = BlastNFilter(
    blast_search_parameters=blast_search_parameters, 
    blast_hit_parameters=blast_hit_parameters, 
    dir_output=tmp_path
)
filter = HybridizationProbabilityFilter(alignment_method=alignment_filter, threshold=0.1)
filter.model = DummyAPI()
database = OligoDatabase(dir_output=tmp_path)
database.load_database(FILE_DATABASE_OLIGOS_AI)
reference_database = ReferenceDatabase(dir_output=tmp_path)
reference_database.load_sequences_from_fasta(files_fasta=FILE_DATABASE_REFERENCE, database_overwrite=True)
table_hits = pd.read_csv(FILE_TABLE_HITS_BLAST_AI, sep="\t")
sequence_type = "target"
region_id = "region"

filtered_database = filter.apply(
    sequence_type=sequence_type,
    oligo_database=database,
    n_jobs=2,
    reference_database=reference_database,
)
returned_oligos = set(filtered_database.database["region"].keys())
expected_oligos = set(f"region::{i}" for i in range(2, 20))

assert returned_oligos == expected_oligos, f"The Blast ai filter didn't return the expected oligos. \n\nExpected:\n{expected_oligos}\n\nGot:\n{returned_oligos}"
