# Imports

In [1]:
import os
import time
from pathlib import Path

import yaml

from oligo_designer_toolsuite.pipelines import SeqFishPlusProbeDesigner
from oligo_designer_toolsuite.database import OligoDatabase, CustomGenomicRegionGenerator

from oligo_designer_toolsuite.oligo_specificity_filter import Blastn

import pickle

# Paths

In [2]:
seqfish_config_path = "../tutorials/configs/probe_design_SPOT_custom.yaml"

In [3]:
with open(seqfish_config_path, "r") as handle:
        config = yaml.safe_load(handle)
        
dir_output = "output_seqfish"
Path(dir_output).mkdir(parents=True, exist_ok=True)

# Pipeline

In [4]:
# probe_designer = SeqFishPlusProbeDesigner(dir_output=dir_output)

In [5]:
# probe_designer.load_annotations(
#         source=config["source"], source_params=config["source_params"]
#     )


In [6]:
# with open('probe_designer.pickle', 'wb') as handle:
#     pickle.dump(probe_designer, handle, protocol=pickle.HIGHEST_PROTOCOL)

In [7]:
with open('probe_designer.pickle', 'rb') as handle:
    probe_designer = pickle.load(handle)

In [8]:
if config["file_genes"] is None:
    warnings.warn(
        "No gene list file was provided! All genes from fasta file are used to generate the probes. This chioce can use a lot of resources."
    )
    genes = None
else:
    with open(config["file_genes"]) as handle:
        lines = handle.readlines()
        genes = [line.rstrip() for line in lines]

In [9]:
##### create probe database #####
probe_database, file_database = probe_designer.create_probe_database(
    genes=genes,
    probe_length_min=config["oligo_length_min"],
    probe_length_max=config["oligo_length_max"],
    min_probes_per_gene=config["min_oligos_per_gene"],
    n_jobs=config["n_jobs"],
)



In [None]:
oligo_database.load_oligo_database("output_seqfish/oligo_database/probe_database_initial.tsv")

In [10]:
##### filter probes by property #####
probe_database, file_database = probe_designer.filter_probes_by_property(
    probe_database,
    GC_content_min=config["GC_content_min"],
    GC_content_max=config["GC_content_max"],
    number_consecutive=config["number_consecutive"],
    n_jobs=config["n_jobs"],
)

In [11]:
##### filter probes by specificity #####
probe_database, file_database = probe_designer.filter_probes_by_specificity(
    probe_database,
    probe_length_max=config["oligo_length_max"],
    word_size=config["word_size"],
    percent_identity=config["percent_identity"],
    coverage=config["coverage"],
    strand=config["strand"],
    n_jobs=config["n_jobs"],
)



In [12]:
probe_database.database

{}

In [13]:
blastn = Blastn(
    dir_specificity=os.path.join(
            dir_output, "specificity_temporary"
        ), 
    word_size=config["word_size"],
    percent_identity=config["percent_identity"],
    coverage=config["coverage"],
    strand=config["strand"],
    #strand='plus',
)

readout_probes = probe_designer.design_readout_probes(
    GC_content_min=config["GC_min_readout"],
    GC_content_max=config["GC_max_readout"],
    number_consecutive=config["number_consecutive_readout"],
    length=config["length_readout"],
    blast_filter=blastn,
    sequence_alphabet=["A", "T", "G", "C"],
    num_probes=config["num_pseudocolors"],
    reference_DB=probe_designer.reference,
)



In [14]:
from oligo_designer_toolsuite.utils import create_seqfish_plus_barcodes

def design_final_SeqFishPlus_probes(
        oligo_database,
        readout_database,
        n_pseudocolors,
        seed,
    ):
        barcodes = create_seqfish_plus_barcodes(
            n_pseudocolors=n_pseudocolors,
            seed=seed,
            num_genes=len(oligo_database.database.keys()),
        )
        print(barcodes)
        readout_sequences = readout_database.to_sequence_list()
        for i in oligo_database.database.keys():
            barcode = barcodes[i]
            left = readout_sequences[barcode[0]] + readout_sequences[barcode[1]]
            right = readout_sequences[barcode[2]] + readout_sequences[barcode[3]]
            
            for j in oligo_database[i].keys():
                seq = str(oligo_database[i][j]["sequence"])
                seq = left + seq
                seq = seq + right
                oligo_database[i][j]["sequence"] = Seq(seq)
        return oligo_database

In [15]:
olig_db = design_final_SeqFishPlus_probes(
        probe_database,
        readout_probes,
        n_pseudocolors=config["num_pseudocolors"],
        seed=0,  # put seed in config
    )

[]


In [16]:
probe_database.database

{}