In [1]:
import os
import sys
from oligo_designer_toolsuite.oligo_property_filter import (
    PropertyFilter,
    MaskedSequences,
    GCContent,
    MeltingTemperature,
    PadlockArms,
    ConsecutiveRepeats
)
import pandas as pd

#sys.path.append(os.path.dirname(os.getcwd()))
ROOT_DIR = "/home/cw/PycharmProjects/oligo-designer-toolsuite_merfish"
os.chdir(ROOT_DIR)
import yaml
from Bio.SeqUtils import MeltingTemp as mt
os.getcwd()

'/home/cw/PycharmProjects/oligo-designer-toolsuite_merfish'

In [2]:
config_file = os.path.join(ROOT_DIR,"tutorials","configs","merfish_readout_designer_test.yaml")
config_file

with open(config_file, 'r') as yaml_file:
    config = yaml.safe_load(yaml_file)
dir_output = os.path.join(os.getcwd(),"test_merfish", config["dir_output"]) # create the complete path for the output directory
dir_output

'/home/cw/PycharmProjects/oligo-designer-toolsuite_merfish/test_merfish/output'

In [3]:

oligo_database = pd.read_csv('output/output/oligos_creation/oligo_DB_human_GRCh38_NCBI_release_110_transcripts.tsv',sep='\t')

In [10]:
# the melting temperature params need to be preprocessed
Tm_params = config["Tm_parameters"]["shared"].copy()
Tm_params.update(config["Tm_parameters"]["property_filter"])
Tm_params["nn_table"] = getattr(mt, Tm_params["nn_table"])
Tm_params["tmm_table"] = getattr(mt, Tm_params["tmm_table"])
Tm_params["imm_table"] = getattr(mt, Tm_params["imm_table"])
Tm_params["de_table"] = getattr(mt, Tm_params["de_table"])

Tm_params = config["Tm_parameters"]["shared"].copy()
Tm_correction_param = config["Tm_correction_parameters"]["shared"].copy()
Tm_correction_param.update(config["Tm_correction_parameters"]["property_filter"])

In [6]:
#property filter for primer
primers_param = config["primers_setup"].copy()
primers_param

{'GC_content_min': 40,
 'GC_content_max': 60,
 'Tm_min': 70,
 'Tm_max': 80,
 'Repeat_AA_max': 3}

In [7]:
#property filter for target sequences
target_param = config["targets_setup"].copy()
target_param

{'GC_content_min': 40,
 'GC_content_max': 60,
 'Tm_min': 70,
 'Tm_max': 80,
 'cross_hybridization_targets_Tm_min': 72,
 'internal_secondary_structures_Tm_min': 76,
 'internal_secondary_structures_threshold_deltaG': -9,
 'max_repeats_AA': 3}

In [8]:
#property filter for readout sequences
readout_param = config["readout_setup"].copy()
readout_param

{'GC_content_min': 40,
 'GC_content_max': 60,
 'Tm_min': 70,
 'Tm_max': 80,
 'Repeat_AA_max': 3}

In [12]:
# initialize the property filters for primer

gc_content_primer = GCContent(GC_content_min=primers_param["GC_content_min"], GC_content_max=primers_param["GC_content_max"])
melting_temperature_primer = MeltingTemperature(
    Tm_min=primers_param["Tm_min"],
    Tm_max=primers_param["Tm_max"],
    Tm_parameters=Tm_params,
    Tm_correction_parameters=Tm_correction_param
)
consecutive_repeats = ConsecutiveRepeats(primers_param["Repeat_AA_max"])


In [25]:



consecutive_repeats = ConsecutiveRepeats(3)


# create the list of filters
filters = [masked_sequences, gc_content, melting_temperature, padlock_arms]

# initialize the property filter class
property_filter = PropertyFilter(filters=filters)

# filter the database
oligo_database = property_filter.apply(oligo_database=oligo_database, n_jobs=config["n_jobs"])
# write the intermediate result in a file
if config["write_intermediate_steps"]:
    oligo_database.write_oligos_DB(format=config["file_format"], dir_oligos_DB="property_filter")

'/home/cw/PycharmProjects/output'

In [None]:
from oligo_designer_toolsuite.database import CustomReferenceDB, NcbiReferenceDB, EnsemblReferenceDB
from oligo_designer_toolsuite.oligo_specificity_filter import (
    SpecificityFilter,
    ExactMatches,
    LigationRegionCreation,
    BowtieSeedRegion,
    Blastn,
)

dir_specificity = os.path.join(dir_output, "specificity_temporary") # folder where the temporary files will be written

# generate the reference
reference_database = CustomReferenceDB(
    species=oligo_database.species,
    genome_assembly=oligo_database.genome_assembly,
    annotation_release=oligo_database.annotation_release,
    files_source=oligo_database.files_source,
    annotation_file=oligo_database.annotation_file,
    sequence_file=oligo_database.sequence_file,
    dir_output=dir_output
)
reference_database.create_reference_DB(block_size=config["block_size"]) # leave the standard parameters

# intialize the filter classes
exact_mathces = ExactMatches(dir_specificity=dir_specificity)
seed_ligation = LigationRegionCreation(ligation_region_size=config["ligation_region_size"])
seed_region = BowtieSeedRegion(dir_specificity=dir_specificity, seed_region_creation=seed_ligation)
blastn = Blastn(
    dir_specificity=dir_specificity,
    word_size=config["word_size"],
    percent_identity=config["percent_identity"],
    coverage=config["coverage"],
)
filters = [exact_mathces, seed_region, blastn]

# initialize the specificity filter class
specificity_filter = SpecificityFilter(filters=filters, write_genes_with_insufficient_oligos=config["write_removed_genes"])
# filte r the database
oligo_database = specificity_filter.apply(oligo_database=oligo_database, reference_database=reference_database, n_jobs=config["n_jobs"])
# write the intermediate result
if config["write_intermediate_steps"]:
    oligo_database.write_oligos_DB(format=config["file_format"], dir_oligos_DB="specificity_filter")