In [1]:
import os
import time
from pathlib import Path

import yaml

from oligo_designer_toolsuite.database import OligoDatabase, CustomGenomicRegionGenerator

from oligo_designer_toolsuite.oligo_specificity_filter import Blastn

from oligo_designer_toolsuite.pipelines import MerfishProbeDesigner
from argparse import ArgumentParser, RawDescriptionHelpFormatter

from oligo_designer_toolsuite.pipelines._utils import generate_config_file


In [2]:
import warnings
warnings.filterwarnings("ignore")

In [8]:

config_path = generate_config_file("merfish", "output_test", "custom")
with open(config_path, "r") as handle:
        config = yaml.safe_load(handle)

In [10]:
probe_designer = MerfishProbeDesigner(dir_output=config["output"])

2023-05-26 14:03:54,662 [INFO] Parameters Init:
2023-05-26 14:03:54,663 [INFO] dir_output = output_merfish
2023-05-26 14:03:54,663 [INFO] write_removed_genes = True
2023-05-26 14:03:54,663 [INFO] write_intermediate_steps = True


In [11]:
probe_designer.load_annotations(
    source=config["source"], source_params=config["source_params"]
)

2023-05-26 14:03:57,749 [INFO] Parameters Load Annotations:
2023-05-26 14:03:57,750 [INFO] source = custom
2023-05-26 14:03:57,751 [INFO] source_params = {'file_annotation': 'data/annotations/custom_GCF_000001405.40_GRCh38.p14_genomic_chr16.gtf', 'file_sequence': 'data/annotations/custom_GCF_000001405.40_GRCh38.p14_genomic_chr16.fna', 'files_source': 'NCBI', 'species': 'Homo_sapiens', 'annotation_release': 110, 'genome_assembly': 'GRCh38.p14'}
2023-05-26 14:04:03,318 [INFO] The following annotation files are used for GTF annotation of regions: data/annotations/custom_GCF_000001405.40_GRCh38.p14_genomic_chr16.gtf and for fasta sequence file: data/annotations/custom_GCF_000001405.40_GRCh38.p14_genomic_chr16.fna .
2023-05-26 14:04:03,318 [INFO] The annotations are from NCBI source, for the species: Homo_sapiens, release number: 110 and genome assembly: GRCh38.p14


In [12]:
with open(config["file_genes"]) as handle:
    lines = handle.readlines()
    genes = [line.rstrip() for line in lines]

In [13]:
probe_database, file_database = probe_designer.create_probe_database(
    genes=genes,
    probe_length_min=config["target_probe"]["probe_length_min"],
    probe_length_max=config["target_probe"]["probe_length_max"],
    min_probes_per_gene=config["min_probes_per_gene"],
    n_jobs=config["n_jobs"],
)

2023-05-26 14:04:06,780 [INFO] Parameters Create Database:
2023-05-26 14:04:06,781 [INFO] genes = ['AARS1', 'DECR2', 'FAM234A', 'RHBDF1', 'WASIR2']
2023-05-26 14:04:06,782 [INFO] probe_length_min = 30
2023-05-26 14:04:06,783 [INFO] probe_length_max = 30
2023-05-26 14:04:06,784 [INFO] min_probes_per_gene = 0
2023-05-26 14:04:06,785 [INFO] n_jobs = 1
2023-05-26 14:04:09,881 [INFO] Genes with <= 0 probes will be removed from the probe database and their names will be stored in 'output_merfish/regions_with_insufficient_oligos.txt'.
2023-05-26 14:04:09,882 [INFO] Step - Generate Probes: the database contains 15491 probes from 5 genes.


In [14]:
probe_database, file_database = probe_designer.filter_probes_by_property(probe_database, n_jobs=config["n_jobs"])

In [15]:
probe_database, file_database = probe_designer.filter_probes_by_specificity(probe_database, n_jobs=config["n_jobs"])

In [16]:
probe_database, file_database = probe_designer.filter_cross_hybridization_targets(probe_database, n_jobs=config["n_jobs"])

In [17]:
primer1, primer2, primer_file_database = probe_designer.design_primer_probes(config['primer_probe']['file_bc25mer'], n_jobs=config["n_jobs"])

FileNotFoundError: [Errno 2] No such file or directory: 'data/bc25mer/bc25mer.240k.fasta'

In [15]:
# with open('merfish_probe_designer.pickle', 'wb') as handle:
#     pickle.dump(probe_designer, handle, protocol=pickle.HIGHEST_PROTOCOL)

In [18]:
primer1

['GATATCACGTCTGTCGCGT']

In [19]:
readout_probes = probe_designer.design_readout_probes(primer_file_database, config['readout_probe']['file_bc25mer'], n_jobs=config["n_jobs"])

In [20]:
from oligo_designer_toolsuite.sequence_design import MerfishSequence

merfish_sequence_designer = MerfishSequence(n_genes=len(genes))

assembled_probes = merfish_sequence_designer.assemble_probes(
    probe_database,
    readout_probes,
    primer1,
    primer2
)

2
3
4
5


In [21]:
merfish_sequence_designer.write_final_probes(assembled_probes, readout_probes, readout_probe_length=30, primer_probe_length=19, num_bits=16)

In [None]:
config["Tm_parameters"]["shared"] | config["Tm_parameters"]["property_filter"]

{'check': True,
 'strict': True,
 'c_seq': None,
 'shift': 0,
 'nn_table': 'DNA_NN3',
 'tmm_table': 'DNA_TMM1',
 'imm_table': 'DNA_IMM1',
 'de_table': 'DNA_DE1',
 'dnac1': 50,
 'dnac2': 0,
 'selfcomp': False,
 'dNTPs': 0,
 'saltcorr': 7,
 'Na': 1000,
 'K': 0,
 'Tris': 0,
 'Mg': 0}

<iframe width="600" height="400" frameBorder="0" src="https://www.mindmeister.com/maps/public_map_shell/2774967174/scrinshot-pipeline?width=600&height=400&z=auto&no_share=1&no_logo=1" scrolling="no" style="overflow:hidden;margin-bottom:5px"></iframe>