In [1]:
import numpy as np
import math

### Problem Preparation

This section accepts a candidate RNA, finding its stems `(begin_residue, end_residue, length)`, pseudoknots `(stem1, stem2, penalty)`, stem overlaps `(stem1, stem2, penalty)`, and longest stem `mu`. Further, these data are then organized into two dictionaries in preparation for submission to the D-Wave QPU: `L`, which records the linear terms of the QUBO, and `Q`, which records the quadratic terms of the QUBO.

In [2]:
rna = "GGAAGCAAACAUCCCUGU"
matrix = np.zeros((len(rna),len(rna)))
for diag in range(0, len(matrix)):
    for row in range(0, len(matrix)-diag):
        col = row + diag
        base1 = rna[row]
        base2 = rna[col]
        if row != col:
            if (((base1 == "A" and base2 == "U") or (base1 == "U" and base2 == "A")) or ((base1 == "G" and base2 == "C") or (base1 == "C" and base2 == "G")) or ((base1 == "G" and base2 == "U") or (base1 == "U" and base2 == "G"))):
                matrix[row][col] = 1
print(matrix)

[[0. 0. 0. 0. 0. 1. 0. 0. 0. 1. 0. 1. 1. 1. 1. 1. 0. 1.]
 [0. 0. 0. 0. 0. 1. 0. 0. 0. 1. 0. 1. 1. 1. 1. 1. 0. 1.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 1. 0. 1.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 1. 0. 1.]
 [0. 0. 0. 0. 0. 1. 0. 0. 0. 1. 0. 1. 1. 1. 1. 1. 0. 1.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 1. 0. 1.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 1. 0. 1.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 1. 0. 1.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 1. 0. 1.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.

In [3]:
stems_all = []
mu = 0

for row in range(0, len(matrix)):
    for col in range (row, len(matrix)):
        if row != col:
            if matrix[row][col] == 1:
                temp_row = row
                temp_col = col
                stem = [row+1,col+1,0]
                length = 0
                while (matrix[temp_row][temp_col] != 0) and (temp_row != temp_col):
                    length+=1
                    temp_row+=1
                    temp_col-=1
                if length >= 3:
                    stems_all.append(stem)
                    stem[2] = length
                if length > mu:
                    mu = length
                    
print(stems_all)

[[1, 14, 3], [5, 18, 3], [9, 18, 3]]


In [4]:
def pseudoknots(i, j):
    
    stem1 = stems_all[i]
    stem2 = stems_all[j]
    
    i_a = stem1[0]
    j_a = stem1[1]
    i_b = stem2[0]
    j_b = stem2[1]
    
    pseudoknot = [i,j,1]
    
    if (i_a < i_b and i_b < j_a and j_a < j_b) or (i_b < i_a and i_a < j_b and j_b < j_a):
        
        pseudoknot[2] = pseudoknot_penalty
    
    pseudoknots_all.append(pseudoknot)

pseudoknot_penalty = 0.5
pseudoknots_all = []
for i in range(len(stems_all)):
    for j in range(i + 1, len(stems_all)):
        pseudoknots(i, j)
print(pseudoknots_all)

[[0, 1, 0.5], [0, 2, 0.5], [1, 2, 1]]


In [28]:
def overlaps(i, j):
    
    stem1 = stems_all[i]
    stem2 = stems_all[j]
    
    overlap = [i, j, 0]
    
    stem1_cspan = set(range(stem1[1]-stem1[2]+1, stem1[1]+1))
    stem2_cspan = set(range(stem2[1]-stem2[2]+1, stem2[1]+1))
    
    if len(stem1_cspan & stem2_cspan) != 0:
        
        overlap[2] = overlap_penalty
        
    overlaps_all.append(overlap)
        
overlap_penalty = 1000
overlaps_all = []
for i in range(len(stems_all)):
    for j in range(i+1, len(stems_all)):
        overlaps(i, j)
print(overlaps_all)
        

[[0, 1, 0], [0, 2, 0], [1, 2, 1000]]


In [29]:
print("Stems      :", stems_all)
print("Pseudoknots:", pseudoknots_all)
print("Overlaps   :", overlaps_all)
print("Mu         :", mu)

Stems      : [[1, 14, 3], [5, 18, 3], [9, 18, 3]]
Pseudoknots: [[0, 1, 0.5], [0, 2, 0.5], [1, 2, 1]]
Overlaps   : [[0, 1, 0], [0, 2, 0], [1, 2, 1000]]
Mu         : 3


In [30]:
L = {}
Q = {}
cl = 1
cb = 1
k = 0

for i in range(0, len(stems_all)):
    L[str(i)] = cl*((stems_all[i][2]**2)-2*mu*stems_all[i][2]+mu**2)-cb*(stems_all[i][2]**2)
    for j in range(i+1, len(stems_all)):
        Q[(str(i), str(j))] = cb*stems_all[i][2]*stems_all[j][2]*pseudoknots_all[k][2]+overlaps_all[k][2]
        k += 1
        
print(L)
print(Q)

{'0': -9, '1': -9, '2': -9}
{('0', '1'): 4.5, ('0', '2'): 4.5, ('1', '2'): 1009}


### Problem Execution

This section constructs the problem in a formulation acceptable to the D-Wave QPU and submits the problem to the `Advantage4.1` QPU and/or a hybrid solver (settings: $20\text{ }\mu s$ annealing time, $10$ runs for the quantum solver; 1 run for the hybrid solver). The answer is printed to show the selected stems, the energy of the solution, and the number of occurrences of that particular solution. 

In [51]:
from dwave.cloud import Client
client = Client.from_config(token="DEV-6b38e4697eaa586b361595c629788f595b810a14")
client.get_solvers()

from dwave.system.samplers import DWaveSampler
from dwave.system.samplers import LeapHybridSampler
from dwave.system.composites import EmbeddingComposite

import dimod

In [52]:
model = dimod.BinaryQuadraticModel(L, Q, vartype = 'BINARY', offset = 0.0)

In [64]:
sampler_quantum = DWaveSampler(solver={'topology__type': 'pegasus'})
sampler_hybrid  = LeapHybridSampler()

hamiltonian_quantum = EmbeddingComposite(sampler_quantum).sample(model, num_reads = 10)
hamiltonian_hybrid  = sampler_hybrid.sample(model)

In [65]:
print("Quantum sampler results:\n")
for datum in hamiltonian_quantum.data(['sample', 'energy', 'num_occurrences']):
    print(datum.sample, datum.energy, datum.num_occurrences)

print("\n", "Hybrid sampler results:\n")
for datum in hamiltonian_hybrid.data(['sample', 'energy', 'num_occurrences']):
    print(datum.sample, datum.energy, datum.num_occurrences)

Quantum sampler results:

{'0': 1, '1': 1, '2': 0} -13.5 1
{'0': 1, '1': 0, '2': 1} -13.5 2
{'0': 1, '1': 0, '2': 0} -9.0 1
{'0': 0, '1': 0, '2': 1} -9.0 4
{'0': 0, '1': 0, '2': 0} 0.0 2

 Hybrid sampler results:

{'0': 1, '1': 1, '2': 0} -13.5 1


### Problem Evaluation

This section evaluates the results of the above section using the sensitivity ($\sigma_{SN}$) and specificity ($\sigma_{SP}$) metrics defined therein as:

$$\sigma_{SN} = \frac{C}{C+M}$$

And:

$$\sigma_{SN} = \frac{C}{C+I}$$

Where C is the number of correctly identified base pairs, M is the number of the predicted base pairs missing from the known structure, and I is the number of non-predicted base pairs present in
the known structure.

Tests to conduct:

1. Performance as a function of RNA size/complexity
2. Optimization of pseudoknot parameter
3. Adapting known loop-structure penalties to the energy model (need to find these/talk with Hosna)
4. Compare hybrid- versus fully-quantum solvers???