## Redesigning HIV-1 TAR RNA construct
May 9, 2023

In [93]:
# Import Python NUPACK module
from nupack import *
import pandas as pd
import glob
config.threads = 0
config.cache = 12.0 # GB

import requests

## Generating candidate designs

In [3]:
def design_fourU(temp = 37):
    # Define physical model
    my_model = Model(material='rna', celsius=temp)
    
    # Define sequence domains
    da = Domain('GGN14SN4', name='a')
    db = Domain('GGCAGA', name='b')
    dc = Domain('TCT', name='c')
    dd = Domain('GAGC', name = 'd')
    de = Domain('CTGGGA', name='e')
    df = Domain('GCTCTCTGCC', name='f')
    dg = Domain('N4SN19', name='g')
    pattern = Pattern(['A3','A4', 'U4', 'C4', 'G4', 'U4', 'K6', 'M6', 'R6', 'S6', 'W6', 'Y6'])

    # Define strands containing these domains
    sa = TargetStrand([da, db, dc, dd, de, df, dg], name='WT')

    # Define a target complex
    cstickfigure = TargetComplex([sa], '.....................((((((...((((......))))))))))........................', name='cstickfigure')

    tstickfigure = TargetTube(on_targets={cstickfigure: 1e-06}, off_targets=SetSpec(max_size=2), name='tfigure')

    # Set a stop condition of 1% and a seed for random number generation to get a reproducible result for this demo
    my_options = DesignOptions(f_stop=0.01, wobble_mutations = True)

    # Define and run the test tube design job
    my_design = tube_design(tubes=[tstickfigure], model=my_model, options=my_options, soft_constraints=[pattern])
    my_results = my_design.run(trials=3)
    return my_results

In [4]:
designs_5 = design_fourU(5)
for i in range(len(designs_5)):
    designs_5[i].save(f'HIV_designs/design_5C_{i}.o')
print('5 done')

designs_25 = design_fourU(25)
for i in range(len(designs_25)):
    designs_25[i].save(f'HIV_designs/design_25C_{i}.o')
print('25 done')

designs_37 = design_fourU(37)
for i in range(len(designs_37)):
    designs_37[i].save(f'HIV_designs/design_37C_{i}.o')
print('37 done')

5 done
25 done
37 done


In [None]:
# Define sequence domains
da = Domain('GGN19', name='a')
db = Domain('GGCAGA', name='b')
dc = Domain('TCT', name='c')
dd = Domain('GAGC', name = 'd')
de = Domain('CTGGGA', name='e')
df = Domain('GCTCTCTGCC', name='f')
dg = Domain('N24', name='g')
pattern = Pattern(['A4', 'U4', 'C4', 'G4', 'U4', 'K6', 'M6', 'R6', 'S6', 'W6', 'Y6'])

# Define strands containing these domains
sa = TargetStrand([da, db, dc, dd, de, df, dg], name='(WT)')

In [6]:
def analyze(results, sa):
    # Define strand based on design output

    a = results.to_analysis(sa)

    t1 = Tube(strands={a: 1e-6}, complexes=SetSpec(max_size=2), name='t1')
    model1 = Model()
    tube_results = tube_analysis(tubes=[t1], model=model1,
                                 compute=['pairs', 'mfe', 'sample', 'ensemble_size', 'subopt'],
                                 options={'num_sample': 100, 'energy_gap': 0.1})

    return tube_results['(WT)'].mfe[0].structure.dotparensplus(), tube_results['(WT)'].mfe[0].energy


In [5]:
def analyze_o(dot_o, strand):
    res = DesignResult.load(dot_o)
    a = res.to_analysis[strand]
    t1 = Tube(strands={a: 1e-6}, complexes=SetSpec(max_size=2), name='t1')
    model1 = Model()
    tube_results = tube_analysis(tubes=[t1], model=model1,
                                 compute=['pairs', 'mfe', 'sample', 'ensemble_size', 'subopt'],
                                 options={'num_sample': 100, 'energy_gap': 0.1})
    return tube_results[f'({strand})'].mfe[0].structure.dotparensplus(), tube_results[f'({strand})'].mfe[0].energy

In [None]:
results = []
for r in designs_5:
    results.append([5, r.defects.ensemble_defect, str(r.to_analysis(sa)), analyze(r, sa)[0], analyze(r, sa)[1]])
for r in designs_25:
    results.append([25, r.defects.ensemble_defect, str(r.to_analysis(sa)), analyze(r, sa)[0], analyze(r, sa)[1]])
for r in designs_37:
    results.append([37, r.defects.ensemble_defect, str(r.to_analysis(sa)), analyze(r, sa)[0], analyze(r, sa)[1]])
df_results = pd.DataFrame(results).sort_values(1)

In [153]:
results = []

for f in glob.glob('HIV_designs/*.o'):
    r = DesignResult.load(f)
    results.append([r.defects.ensemble_defect, str(r.to_analysis['WT']), analyze_o(f, 'WT')[0], analyze_o(f, 'WT')[1]])
df_results = pd.DataFrame(results).sort_values(0)

In [23]:
from __future__ import print_function
from base64 import b64encode
import json
from urllib import request, parse


def get_access_token(client_id, client_secret, idt_username, idt_password):
    """
    Create the HTTP request, transmit it, and then parse the response for the 
    access token.
    
    The body_dict will also contain the fields "expires_in" that provides the 
    time window the token is valid for (in seconds) and "token_type".
    """

    # Construct the HTTP request
    authorization_string = b64encode(bytes(client_id + ":" + client_secret, "utf-8")).decode()
    request_headers = { "Content-Type" : "application/x-www-form-urlencoded",
                        "Authorization" : "Basic " + authorization_string }
                    
    data_dict = {   "grant_type" : "password",
                    "scope" : "test",
                    "username" : idt_username,
                    "password" : idt_password }
    request_data = parse.urlencode(data_dict).encode()

    post_request = request.Request("https://www.idtdna.com/Identityserver/connect/token", 
                                    data = request_data, 
                                    headers = request_headers,
                                    method = "POST")

    # Transmit the HTTP request and get HTTP response
    response = request.urlopen(post_request)

    # Process the HTTP response for the desired data
    body = response.read().decode()
    
    # Error and return the response from the endpoint if there was a problem
    if (response.status != 200):
        raise RuntimeError("Request failed with error code:" + response.status + "\nBody:\n" + body)
    
    body_dict = json.loads(body)
    print(body_dict)
    return body_dict["access_token"]
    

def oligocalc(access_token, sequence):
    header = {
        'Authorization': f'Bearer {access_token}'
    }
    data_dict = {"Sequence":sequence, 
    "NaConc":50,
    "MgConc":0,
    "DNTPsConc":1,
    "OligoConc":1,
    "NucleotideType":"RNA"}

    request_data = parse.urlencode(data_dict).encode()

    post_request = request.Request('https://www.idtdna.com/restapi/v1/OligoAnalyzer/Analyze', 
                                    data = request_data, 
                                    headers = header,
                                    method = "POST")
    response = request.urlopen(post_request)
    body = response.read().decode()

    # Error and return the response from the endpoint if there was a problem
    if (response.status != 200):
        raise RuntimeError("Request failed with error code:" + response.status + "\nBody:\n" + body)
    
    body_dict = json.loads(body)
    return body_dict

In [94]:
client_id = "luckslab_idtapi"
client_secret = "487d8ec0-ed19-4dbb-a349-8d79c739479f"
idt_username = "lucks-lab-NU"
idt_password = "t64g&&JR"

token = get_access_token(client_id, client_secret, idt_username, idt_password)

def complement(s, token = token):
    result = oligocalc(token, s)
    return result['Complement'].replace(" ", "")

def idt_tm(s, token = token):
    result = oligocalc(token, s)
    return result['MeltTemp']

def neb_tm(s1):
    x = requests.get(f'https://tmapi.neb.com/tm?seq1={s1}&seq2=&conc=0.5&prodcode=q5-0&email=')
    out = x.json()
    return out['data']['tm1']

{'access_token': 'f15729d23e18b4c789b2df281094444f', 'expires_in': 3600, 'token_type': 'Bearer'}


In [154]:
df_results['5_PCR'] = df_results[1].str[:18]
df_results['RT'] = df_results[1].str[-24:]
df_results['5_neb_tm'] = df_results['5_PCR'].apply(neb_tm)

In [155]:
df_results['RT_c'] = df_results['RT'].apply(complement).str[:-5]
df_results['RT_neb_tm'] = df_results['RT_c'].apply(neb_tm)
df_results['RT_idt_tm'] = df_results['RT_c'].apply(idt_tm)

In [156]:
df_results = df_results[df_results[0] < 0.02]
df_results = df_results[abs(df_results['5_neb_tm'] - df_results['RT_neb_tm']) <= 5]
df_results = df_results[df_results['RT_idt_tm'] > 47]
df_results

Unnamed: 0,0,1,2,3,5_PCR,RT,5_neb_tm,RT_c,RT_neb_tm,RT_idt_tm
26,0.00776,GGCCATCTTTACTTTCATTTCGGCAGATCTGAGCCTGGGAGCTCTC...,.....................((((((...((((......))))))...,-14.975309,GGCCATCTTTACTTTCAT,TTACACTCCACCTCACACTATACA,56,TGTATAGTGTGAGGTGGAG,60,50.9
21,0.007862,GGCCCATCTTAACAATTCAACGGCAGATCTGAGCCTGGGAGCTCTC...,.....................((((((...((((......))))))...,-15.242153,GGCCCATCTTAACAATTC,ACTCTAACCCTCACCTAACTAACC,57,GGTTAGTTAGGTGAGGGTT,61,56.8
13,0.00799,GGCCCTCAATACATACAACTCGGCAGATCTGAGCCTGGGAGCTCTC...,.....................((((((...((((......))))))...,-15.242153,GGCCCTCAATACATACAA,ACCTACACATCCTCATCACTATCC,58,GGATAGTGATGAGGATGTG,59,47.4
15,0.010332,GGCACCTCATAACATAACTAAGGCAGATCTGAGCCTGGGAGCTCTC...,.....................((((((...((((......))))))...,-15.237093,GGCACCTCATAACATAAC,AATCCACTAACCTCACTCACAATC,57,GATTGTGAGTGAGGTTAGT,58,50.8


In [157]:
# specify strands

def nupack_ensemble(s):
    a = Strand(s, name='a')

    # specify complex set
    set1 = ComplexSet(strands=[a], complexes=SetSpec(max_size=3))

    # calculate the partition function for each complex in the complex set
    model1 = Model(material='rna', celsius=37)
    complex_results1 = complex_analysis(complexes=set1, model=model1, compute=['pfunc'])
    complex_results1

     # specify strand concentrations for ComplexSet set1
    concentration_results1 = complex_concentrations(tube=set1, data=complex_results1,
        concentrations={a: 1e-6})
    return concentration_results1

In [158]:
for i in df_results[1]:
    print(i)
    print(nupack_ensemble(i))

GGCCATCTTTACTTTCATTTCGGCAGATCTGAGCCTGGGAGCTCTCTGCCTTACACTCCACCTCACACTATACA
Concentration results:
Complex  tube (M)  
    (a) 3.022e-07  
  (a+a) 2.067e-07  
(a+a+a) 9.476e-08  
GGCCCATCTTAACAATTCAACGGCAGATCTGAGCCTGGGAGCTCTCTGCCACTCTAACCCTCACCTAACTAACC
Concentration results:
Complex  tube (M)  
    (a) 5.274e-07  
  (a+a) 1.745e-07  
(a+a+a) 4.119e-08  
GGCCCTCAATACATACAACTCGGCAGATCTGAGCCTGGGAGCTCTCTGCCACCTACACATCCTCATCACTATCC
Concentration results:
Complex  tube (M)  
    (a) 5.320e-07  
  (a+a) 1.748e-07  
(a+a+a) 3.948e-08  
GGCACCTCATAACATAACTAAGGCAGATCTGAGCCTGGGAGCTCTCTGCCAATCCACTAACCTCACTCACAATC
Concentration results:
Complex  tube (M)  
    (a) 6.239e-07  
  (a+a) 1.877e-07  
(a+a+a) 2.363e-10  


# chose GGCCTCAATTACAATCTCACTGGCAGATCTGAGCCTGGGAGCTCTCTGCCTACTTCACTACTTCACACTTACTC 0.007682