### Installation
For linux:
- install nupack and add 'export NUPACKHOME=/path/to/nupack3.2.1' to ~/.bashrc
- test that Nupack is in path with "echo $NUPACKHOME"
- run a Python 2.7 environment with the following additional packages installed:
 - matplotlib 2.2.5
 - Bio 1.76


# Orthogonal sequence optimizer
## USER INPUT:
- add sequences to be optimized as a dictionary of strings
- add sequences to be fixed (and avoided during optimization) as a dictionary of strings
- set the global physical parameters temperature, sodium concentration (mM), and magnesium concentration (mM)

In [1]:
import imp
from sequence_analysis_lib import sequence_analysis_lib as sq
import pynupack as nu

In [2]:
inp = {
    "p_red" : [sq.random_seq_of_length(20), 60],#"GACTGGCTATTCTGACATAC",
    "GR_OE" : [sq.random_seq_of_length(20), 60],#"CCTCCATTGTGACGTACCTT",
    "GR_overlap" : [sq.random_seq_of_length(20), 60],#"TATACTCAGCATCATATGCG",
    "Tar_overlap" : [sq.random_seq_of_length(20), 60],#"ACTGCTAGCTAGGTTCAGTA",
    "p_yel" : [sq.random_seq_of_length(20),60],#"ACCTACTTCAATCTTCAACG",
    "BY_OE" : [sq.random_seq_of_length(20),60],#"GCCTAAATCTAGTTATGCCC",
    "p_tar" : [sq.random_seq_of_length(20),60]#"CGCTAGTTAGTGTGTAGCCA",
    
}

fix = {
    "illumina1" : "CAAGCAGAAGAC",
    "illumina2" : "CAAGTTGTCA",
    "illumina3" : "GGCATACGAGAT",
    "illumina4" : "GTCTCGTGGGCTCGGAG",
    "illumina5" : "ATGTGTATAAGAGACAG",
    "illumina6" : "GTGTAGATCTCGG",
    "illumina7" : "TCATTGCACG",
    "illumina8" : "TGGTCGCCGTATCATT",
    "illumina9" : "CTGTCTCTTATACA",
   "illumina10" : "CATCTGACGCTGCCGACGA"    
}
xtr = {
        "bc_r1" : "NNNNNNNNNN",#"ACCTCCACCT",
        "bc_ri" : "CCACC",
        "bc_r2" : "NNNNNNNNNN",#"TCACTACTCA",
    
        "GR_evL" : "NNNNN",#"CACTA",
        "GR_evT" : "NNNNN",#"ACCTA",
    
        "bc_y1" : "NNNNNNNNNN",#"CACCTCACCT",
        "bc_yi" : "CCACT",
        "bc_y2" : "NNNNNNNNNN",#"ACTCCCACTC",
    
        "BY_evL" : "NNNNN",#"TACTT",
        "BY_evT" : "NNNNN",#"TACTA",
               
    "bc_tar" : "NNNNNNNNNNNNNNNNNNNN",#"TGTTCGTGTCTTGTTCTTGT"
}


global T, magnesium, sodium,tm_target
T = 60
magnesium = .2 
sodium = 1 # all monovalent Na+ and K+ and tris+
# dNTPs = .1
# percentDMSO = 5 #% has the effect of -.75degreesC per percentage dmso


In [3]:
imp.reload(sq)


### run the optimizer with small chance of reversion to escape local minima ###
### this means that sometimes a change that worsens the score will occur    ###
new_domains_coarse = sq.optimize_domains_quick(inp,
 fix,
 iterations=1000, 
 algorithm_reversion_probability= 0.1, 
 plot_interval=100,
 file_prefix = "coarse",
 T = T, magnesium = magnesium, sodium=sodium)

### run the optimizer without reversion - only improvements accepted ###
inp = sq.optimize_domains_quick(new_domains_coarse,
 fix,
 iterations=500,                              
 algorithm_reversion_probability= 0.0, 
 plot_interval=100,
 file_prefix = "fine",
 T = T, magnesium = magnesium, sodium=sodium)

strands_dict = {
"GR_amp_s" : inp["p_red"][0]+xtr["bc_r1"]+xtr["bc_ri"]+xtr["bc_r2"]+inp["GR_OE"][0]  ,
"GR_OEL_s" : sq.prime(inp["GR_overlap"][0])+sq.prime(xtr["GR_evL"])+sq.prime(inp["GR_OE"][0]),
"GR_OET_s" : inp["Tar_overlap"][0]+sq.prime(xtr["GR_evT"])+sq.prime(inp["GR_OE"][0]),
"GR_monL_s" : inp["p_red"][0]+xtr["bc_r1"]+xtr["bc_ri"]+xtr["bc_r2"]+inp["GR_OE"][0]+xtr["GR_evL"]+inp["GR_overlap"][0],
"GR_monT_s" : inp["p_red"][0]+xtr["bc_r1"]+xtr["bc_ri"]+xtr["bc_r2"]+inp["GR_OE"][0]+xtr["GR_evT"]+sq.prime(inp["Tar_overlap"][0]),
"BY_amp_s" : inp["p_yel"][0]+xtr["bc_y1"]+xtr["bc_yi"]+xtr["bc_y2"]+inp["BY_OE"][0],
"BY_OEL_s" :inp["GR_overlap"][0]+sq.prime(xtr["BY_evL"])+sq.prime(inp["BY_OE"][0]),
"BY_OET_s" : inp["Tar_overlap"][0]+sq.prime(xtr["BY_evT"])+sq.prime(inp["BY_OE"][0]),
"BY_monL_s" :inp["p_yel"][0]+xtr["bc_y1"]+xtr["bc_yi"]+xtr["bc_y2"]+inp["BY_OE"][0]+xtr["BY_evL"]+sq.prime(inp["GR_overlap"][0]),
"BY_monT_s" :inp["p_yel"][0]+xtr["bc_y1"]+xtr["bc_yi"]+xtr["bc_y2"]+inp["BY_OE"][0]+xtr["BY_evT"]+sq.prime(inp["Tar_overlap"][0]),
"Tar_mon_s" :  inp["p_tar"][0]+xtr["bc_tar"]+inp["Tar_overlap"][0],
"full_loc_r_s" :  inp["p_red"][0]+xtr["bc_r1"]+xtr["bc_ri"]+xtr["bc_r2"]+inp["GR_OE"][0]+xtr["GR_evL"]+inp["GR_overlap"][0]+sq.prime(xtr["BY_evL"])+ sq.prime(inp["BY_OE"][0])+ sq.prime(xtr["bc_y2"])+ sq.prime(xtr["bc_yi"])+ sq.prime(xtr["bc_y1"])+ sq.prime(inp["p_yel"][0]),
"full_loc_y_s" :inp["p_yel"][0]+xtr["bc_y1"]+xtr["bc_yi"]+xtr["bc_y2"]+inp["BY_OE"][0]+xtr["BY_evL"]+sq.prime(inp["GR_overlap"][0])+sq.prime(xtr["GR_evL"])+sq.prime(inp["GR_OE"][0])+sq.prime(xtr["bc_r2"])+sq.prime(xtr["bc_ri"])+sq.prime(xtr["bc_r1"])+sq.prime(inp["p_red"][0]),
"full_tar_r_s" :inp["p_red"][0]+xtr["bc_r1"]+xtr["bc_ri"]+xtr["bc_r2"]+inp["GR_OE"][0]+xtr["GR_evT"]+sq.prime(inp["Tar_overlap"][0])+sq.prime(xtr["bc_tar"])+sq.prime(inp["p_tar"][0]),
"full_tar_y_s" :inp["p_yel"][0]+xtr["bc_y1"]+xtr["bc_yi"]+xtr["bc_y2"]+inp["BY_OE"][0]+xtr["BY_evT"]+sq.prime(inp["Tar_overlap"][0])+sq.prime(xtr["bc_tar"])+sq.prime(inp["p_tar"][0]),
"full_tar_tarR_s" :inp["p_tar"][0]+xtr["bc_tar"]+inp["Tar_overlap"][0]+sq.prime(xtr["GR_evT"])+sq.prime(inp["GR_OE"][0])+sq.prime(xtr["bc_r2"])+sq.prime(xtr["bc_ri"])+sq.prime(xtr["bc_r1"])+sq.prime(inp["p_red"][0]),
"full_tar_tarY_s" :inp["p_tar"][0]+xtr["bc_tar"]+inp["Tar_overlap"][0]+sq.prime(xtr["BY_evT"])+sq.prime(inp["BY_OE"][0])+sq.prime(xtr["bc_y2"])+sq.prime(xtr["bc_yi"])+sq.prime(xtr["bc_y1"])+sq.prime(inp["p_yel"][0]),
    }

strands_order_dict = {
"p_red" : inp["p_red"][0],
"p_yel" : inp["p_yel"][0],
"GR_amp_s" : sq.prime(inp["p_red"][0]+xtr["bc_r1"]+xtr["bc_ri"]+xtr["bc_r2"]+inp["GR_OE"][0])  ,
"GR_OEL_s" : sq.prime(inp["GR_overlap"][0])+sq.prime(xtr["GR_evL"])+sq.prime(inp["GR_OE"][0]),
"GR_OET_s" : inp["Tar_overlap"][0]+sq.prime(xtr["GR_evT"])+sq.prime(inp["GR_OE"][0]),
# "GR_monL_s" : inp["p_red"][0]+xtr["bc_r1"]+xtr["bc_ri"]+xtr["bc_r2"]+inp["GR_OE"][0]+xtr["GR_evL"]+inp["GR_overlap"][0],
# "GR_monT_s" : inp["p_red"][0]+xtr["bc_r1"]+xtr["bc_ri"]+xtr["bc_r2"]+inp["GR_OE"][0]+xtr["GR_evT"]+sq.prime(inp["Tar_overlap"][0]),
"BY_amp_s" : sq.prime(inp["p_yel"][0]+xtr["bc_y1"]+xtr["bc_yi"]+xtr["bc_y2"]+inp["BY_OE"][0]),
"BY_OEL_s" :inp["GR_overlap"][0]+sq.prime(xtr["BY_evL"])+sq.prime(inp["BY_OE"][0]),
"BY_OET_s" : inp["Tar_overlap"][0]+sq.prime(xtr["BY_evT"])+sq.prime(inp["BY_OE"][0]),
# "BY_monL_s" :inp["p_yel"][0]+xtr["bc_y1"]+xtr["bc_yi"]+xtr["bc_y2"]+inp["BY_OE"][0]+xtr["BY_evL"]+sq.prime(inp["GR_overlap"][0]),
# "BY_monT_s" :inp["p_yel"][0]+xtr["bc_y1"]+xtr["bc_yi"]+xtr["bc_y2"]+inp["BY_OE"][0]+xtr["BY_evT"]+sq.prime(inp["Tar_overlap"][0]),
# "Tar_mon_s" :  inp["p_tar"][0]+xtr["bc_tar"]+inp["Tar_overlap"][0],
# "full_loc_r_s" :  inp["p_red"][0]+xtr["bc_r1"]+xtr["bc_ri"]+xtr["bc_r2"]+inp["GR_OE"][0]+xtr["GR_evL"]+inp["GR_overlap"][0]+sq.prime(xtr["BY_evL"])+ sq.prime(inp["BY_OE"][0])+ sq.prime(xtr["bc_y2"])+ sq.prime(xtr["bc_yi"])+ sq.prime(xtr["bc_y1"])+ sq.prime(inp["p_yel"][0]),
# "full_loc_y_s" :inp["p_yel"][0]+xtr["bc_y1"]+xtr["bc_yi"]+xtr["bc_y2"]+inp["BY_OE"][0]+xtr["BY_evL"]+sq.prime(inp["GR_overlap"][0])+sq.prime(xtr["GR_evL"])+sq.prime(inp["GR_OE"][0])+sq.prime(xtr["bc_r2"])+sq.prime(xtr["bc_ri"])+sq.prime(xtr["bc_r1"])+sq.prime(inp["p_red"][0]),
# "full_tar_r_s" :inp["p_red"][0]+xtr["bc_r1"]+xtr["bc_ri"]+xtr["bc_r2"]+inp["GR_OE"][0]+xtr["GR_evT"]+sq.prime(inp["Tar_overlap"][0])+sq.prime(xtr["bc_tar"])+sq.prime(inp["p_tar"][0]),
# "full_tar_y_s" :inp["p_yel"][0]+xtr["bc_y1"]+xtr["bc_yi"]+xtr["bc_y2"]+inp["BY_OE"][0]+xtr["BY_evT"]+sq.prime(inp["Tar_overlap"][0])+sq.prime(xtr["bc_tar"])+sq.prime(inp["p_tar"][0]),
# "full_tar_tarR_s" :inp["p_tar"][0]+xtr["bc_tar"]+inp["Tar_overlap"][0]+sq.prime(xtr["GR_evT"])+sq.prime(inp["GR_OE"][0])+sq.prime(xtr["bc_r2"])+sq.prime(xtr["bc_ri"])+sq.prime(xtr["bc_r1"])+sq.prime(inp["p_red"][0]),
# "full_tar_tarY_s" :inp["p_tar"][0]+xtr["bc_tar"]+inp["Tar_overlap"][0]+sq.prime(xtr["BY_evT"])+sq.prime(inp["BY_OE"][0])+sq.prime(xtr["bc_y2"])+sq.prime(xtr["bc_yi"])+sq.prime(xtr["bc_y1"])+sq.prime(inp["p_yel"][0]),
    }

(0, 1251.1065, 1184.827)
('new variable domains: ', ['TTGTTATTTTTTGATCACCT', 'CGTACATGGGCATTACCATC', 'CTTTTCCAAGGGTCATTATG', 'GTATGTGTGCGATGGGACAC', 'CAACGTTTTTCGGACGCAAC', 'TCGGTTAGCACGTTTCTGCA', 'CTTGCAACCTTTGAACCGCA'])
(1, 588.6858000000001, 1216.3498000000002)
(2, 547.1619999999999, 1551.9626000000003)
(3, 15065.8754, 12358.233999999999)
(4, 624.5452999999999, 2102.7052)
(5, 3759.709, 4015.187999999999)
(6, 1966.8266, 2049.9472)
(7, 1183.6510000000003, 3240.3515000000007)
(8, 588.6858000000001, 1845.9086)
(9, 547.1619999999999, 1750.8809999999996)
(10, 12358.233999999999, 9714.520500000002)
(11, 628.7455999999999, 1129.4612)
(12, 3781.783, 3730.5649999999996)
(13, 1965.4810999999997, 670.8681999999999)
(14, 1193.584, 1195.253)
(15, 588.9378, 1798.9726)
(16, 552.9471999999998, 584.9406)
(17, 9714.520500000002, 7798.872000000001)
(18, 626.9242999999999, 555.2436)
(19, 3748.765, 2248.4741999999997)
(20, 686.3158000000001, 743.6445)
(21, 1169.875, 2648.3268)
(22, 579.8298000000001, 115

In [4]:
### print out names and sequences of assembled strands defined as combinations of domains ###
print(strands_dict)

{'full_loc_r_s': 'CAGTACAGTTCAGTTCGTCGNNNNNNNNNNCCACCNNNNNNNNNNCTTGGATCAGACGTAGTACCNNNNNTCCTCAATGGTAGCTTGCAGNNNNNGCCCATCGGGTACATCTTAANNNNNNNNNNAGTGGNNNNNNNNNNGACTGATAGTGCATGTACGC', 'full_tar_tarR_s': 'TATCTCAAGCCGAATAGCCGNNNNNNNNNNNNNNNNNNNNCAGTCGTTATACCTCGTGTGNNNNNGGTACTACGTCTGATCCAAGNNNNNNNNNNGGTGGNNNNNNNNNNCGACGAACTGAACTGTACTG', 'GR_OEL_s': 'CTGCAAGCTACCATTGAGGANNNNNGGTACTACGTCTGATCCAAG', 'full_tar_r_s': 'CAGTACAGTTCAGTTCGTCGNNNNNNNNNNCCACCNNNNNNNNNNCTTGGATCAGACGTAGTACCNNNNNCACACGAGGTATAACGACTGNNNNNNNNNNNNNNNNNNNNCGGCTATTCGGCTTGAGATA', 'GR_amp_s': 'CAGTACAGTTCAGTTCGTCGNNNNNNNNNNCCACCNNNNNNNNNNCTTGGATCAGACGTAGTACC', 'BY_monL_s': 'GCGTACATGCACTATCAGTCNNNNNNNNNNCCACTNNNNNNNNNNTTAAGATGTACCCGATGGGCNNNNNCTGCAAGCTACCATTGAGGA', 'BY_OEL_s': 'TCCTCAATGGTAGCTTGCAGNNNNNGCCCATCGGGTACATCTTAA', 'GR_OET_s': 'CAGTCGTTATACCTCGTGTGNNNNNGGTACTACGTCTGATCCAAG', 'full_tar_tarY_s': 'TATCTCAAGCCGAATAGCCGNNNNNNNNNNNNNNNNNNNNCAGTCGTTATACCTCGTGTGNNNNNGCCCATCGGGTACATCTTAANNNNNNNNNNAGTGGNNNNNNNNNNGACTGATAGTGCATG

In [5]:
### print out domain names and sequences after optimization ###
print(inp)

{'p_yel': ('GCGTACATGCACTATCAGTC', 60), 'p_red': ('CAGTACAGTTCAGTTCGTCG', 60), 'GR_overlap': ('TCCTCAATGGTAGCTTGCAG', 60), 'Tar_overlap': ('CAGTCGTTATACCTCGTGTG', 60), 'GR_OE': ('CTTGGATCAGACGTAGTACC', 60), 'p_tar': ('TATCTCAAGCCGAATAGCCG', 60), 'BY_OE': ('TTAAGATGTACCCGATGGGC', 60)}


In [6]:
### print out domain reverse complements ###
sq.get_complements_list([seq for seq,temp in inp.values()])

['GACTGATAGTGCATGTACGC',
 'CGACGAACTGAACTGTACTG',
 'CTGCAAGCTACCATTGAGGA',
 'CACACGAGGTATAACGACTG',
 'GGTACTACGTCTGATCCAAG',
 'CGGCTATTCGGCTTGAGATA',
 'GCCCATCGGGTACATCTTAA']

In [7]:
from Bio.SeqUtils import MeltingTemp as mt
from Bio.Seq import Seq
### print out Wallace melting temps of the domains ###
for seq in inp.keys():
    tm_wallace = mt.Tm_Wallace(inp[seq])
    print(seq,  tm_wallace)

('p_yel', 60.0)
('p_red', 60.0)
('GR_overlap', 60.0)
('Tar_overlap', 60.0)
('GR_OE', 60.0)
('p_tar', 60.0)
('BY_OE', 60.0)


In [8]:
strands_order_dict

{'BY_OEL_s': 'TCCTCAATGGTAGCTTGCAGNNNNNGCCCATCGGGTACATCTTAA',
 'BY_OET_s': 'CAGTCGTTATACCTCGTGTGNNNNNGCCCATCGGGTACATCTTAA',
 'BY_amp_s': 'GCCCATCGGGTACATCTTAANNNNNNNNNNAGTGGNNNNNNNNNNGACTGATAGTGCATGTACGC',
 'GR_OEL_s': 'CTGCAAGCTACCATTGAGGANNNNNGGTACTACGTCTGATCCAAG',
 'GR_OET_s': 'CAGTCGTTATACCTCGTGTGNNNNNGGTACTACGTCTGATCCAAG',
 'GR_amp_s': 'GGTACTACGTCTGATCCAAGNNNNNNNNNNGGTGGNNNNNNNNNNCGACGAACTGAACTGTACTG',
 'p_red': 'CAGTACAGTTCAGTTCGTCG',
 'p_yel': 'GCGTACATGCACTATCAGTC'}

In [10]:
sq.get_complements_list(strands_order_dict.values())

['GACTGATAGTGCATGTACGC',
 'CTTGGATCAGACGTAGTACCNNNNNTCCTCAATGGTAGCTTGCAG',
 'TTAAGATGTACCCGATGGGCNNNNNCTGCAAGCTACCATTGAGGA',
 'CAGTACAGTTCAGTTCGTCGNNNNNNNNNNCCACCNNNNNNNNNNCTTGGATCAGACGTAGTACC',
 'CGACGAACTGAACTGTACTG',
 'CTTGGATCAGACGTAGTACCNNNNNCACACGAGGTATAACGACTG',
 'TTAAGATGTACCCGATGGGCNNNNNCACACGAGGTATAACGACTG',
 'GCGTACATGCACTATCAGTCNNNNNNNNNNCCACTNNNNNNNNNNTTAAGATGTACCCGATGGGC']

In [11]:
strands_order_dict.keys()

['p_yel',
 'GR_OEL_s',
 'BY_OEL_s',
 'GR_amp_s',
 'p_red',
 'GR_OET_s',
 'BY_OET_s',
 'BY_amp_s']