# Test Filters

In [1]:
import os
import time
from Bio.SeqUtils import MeltingTemp as mt

os.chdir('../' )
cwd = os.getcwd()
print(cwd)

from IO._database import NcbiDB, EnsemblDB, CustomDB
from oligo_pre_filter._filter_base import MaskedSequences, GCContent, MeltingTemperature
from oligo_pre_filter._filter_padlock_probes import PadlockArms

dir_output  = os.path.join(cwd, 'output/annotations')
print(dir_output)

/home/chels/CRISPR/repos/oligo-designer-toolsuite
/home/chels/CRISPR/repos/oligo-designer-toolsuite/output/annotations


Get list of genes

In [2]:
from oligo_designer_toolsuite.IO._data_parser import read_gtf

gtf= cwd + "/output/annotations/GCF_000001405.40_GRCh38.p14_genomic.gtf"

df=read_gtf(gtf)

In [3]:
genes=df["gene_id"].unique().tolist()[:10]

Create a new Oligo_DB using the filters

In [4]:
Tm_parameters = {
        'check': True,
        'strict': True,
        'c_seq': None,
        'shift': 0,
        'nn_table': getattr(mt,'DNA_NN3'),
        'tmm_table': getattr(mt,'DNA_TMM1'),
        'imm_table': getattr(mt,'DNA_IMM1'),
        'de_table': getattr(mt, 'DNA_DE1'),
        'dnac1': 50, #[nM]
        'dnac2': 0,
        'selfcomp': False,
        'dNTPs': 0,
        'saltcorr': 7,
        'Na': 1.25, #[mM]
        'K': 75, #[mM]
        'Tris': 20, #[mM]
        'Mg': 10, #[mM] 
        }
Tm_correction_parameters = {
        'DMSO': 0,
        'DMSOfactor': 0.75,
        'fmdfactor': 0.65,
        'fmdmethod': 1,
        'GC': None,
        'fmd': 20}
annotation = dir_output+'/GCF_000001405.40_GRCh38.p14_genomic.gtf'
sequence = dir_output+'/GCF_000001405.40_GRCh38.p14_genomic.fna'
genes = ['WASH7P', 'DDX11L1', 'TRNT', 'NOC2L', 'PLEKHN1', 'AGRN','UBE2J2', 'DVL1', 'MIB2', 'LOC112268402_1' ]

masked_sequences = MaskedSequences()
GC_content = GCContent(GC_content_min = 40, GC_content_max = 60)
melting_temperature = MeltingTemperature(Tm_min = 52, Tm_max= 67, Tm_parameters = Tm_parameters, Tm_correction_parameters = Tm_correction_parameters)
arms_tm = PadlockArms(min_arm_length=10,max_Tm_dif=2,Tm_min=40,Tm_max=43, Tm_parameters = Tm_parameters, Tm_correction_parameters = Tm_correction_parameters)

filters = [masked_sequences, GC_content, melting_temperature, arms_tm]
custom = CustomDB(probe_length_min = 30, probe_length_max=40, file_annotation=annotation, file_sequence=sequence, filters=filters)

start_time = time.time()
custom.create_oligos_DB(genes = genes, number_batchs = 1, dir_output=dir_output, write = True)
print(" Creating and filtering the probes: --- %s seconds ---" % (time.time() - start_time))



the total number of probes found: 6367
 Creating and filtering the probes: --- 472.6519136428833 seconds ---


In [5]:
# analyze the oligos DB
print(custom.oligos_DB.keys())
gene = list(custom.oligos_DB.keys())[0]
seq = list(custom.oligos_DB[gene].keys())[0]
print(custom.oligos_DB[gene][seq])


dict_keys(['DDX11L1', 'WASH7P', 'MIR6859-1', 'MIR1302-2HG', 'MIR1302-2', 'FAM138A', 'OR4G4P', 'OR4G11P', 'OR4F5', 'LOC124903816', 'LOC124900384', 'SEPTIN14P18', 'CICP27', 'LOC729737', 'RNU6-1100P', 'GTF2IP10', 'LOC124903814', 'DDX11L17', 'LOC100996442', 'RPL23AP21', 'LOC124903815', 'RPL23AP24', 'LOC124900618', 'LOC112268260', 'WBP1LP7', 'OR4F29', 'SEPTIN14P13', 'CICP7', 'LOC100132287', 'LOC124904706', 'LOC105378947', 'LOC101928626', 'MTND1P23', 'MTND2P28', 'MTCO1P12', 'MIR12136', 'MTCO2P12', 'MTATP8P1', 'MTATP6P1', 'MTCO3P12', 'WBP1LP6', 'OR4F16', 'SEPTIN14P14', 'CICP3', 'LOC100288069', 'RNU6-1199P', 'LINC01409', 'LOC124903817', 'FAM87B', 'LINC00115', 'LINC01128', 'LOC107984850', 'FAM41C', 'TUBB8P11', 'LOC284600', 'LINC02593', 'LOC107985728', 'SAMD11', 'NOC2L', 'KLHL17', 'PLEKHN1'])
{'transcript_id': ['NR_046018.2'], 'exon_id': ['NR_046018.2_exon1'], 'chromosome': '1', 'start': [11873], 'end': [11903], 'strand': '+', 'GC_content': 50.0, 'melt_temp': 58.95, 'melt_temp_arm1': 40.2, 'melt