# Programming Lab 1 

## Handout 8

## Lara Schmalenstroer (s0laschm@uni-bonn.de)

#### Ex1 (8pts) Nussinov folding algorithm 
##### Implement the Nussinov folding algorithm, constructing the dynamic programming matrix N(i,j). Your function should take an RNA sequence as argument and return the completed dynaminc programming matrix.

In [59]:
import numpy as np

def is_comp_base(base1,base2):
    comp=[('A','U'),('C','G')]
    if tuple(sorted(base1+base2)) in comp:
        return 1
    return 0

In [60]:
def nussinov(rna_seq):
    N=np.zeros(shape=(len(rna_seq),len(rna_seq)), dtype=int)
    dim=len(N)
    for s in range(1,dim):
        for j in range(s,dim):
            i=j-s
            b1=rna_seq[i]
            b2=rna_seq[j-1]
            left=N[i][j-1]
            below=N[i+1][j]
            diagonal=N[i+1][j-1]+is_comp_base(b1,b2)
            if j-i+1>=3:
                sub=max([N[i][k]+N[k+1][j] for k in range(i,j)])
                N[i][j]=max(left,below,diagonal,sub)
            else:
                N[i][j]=max(left,below,diagonal)
    return N

In [61]:
rna_seq='AUCGGAGCAUUUUUUGCUCCGACGCAGCCUCAUGCUUUUUU'

In [72]:
pairing=nussinov(rna_seq)

In [73]:
pairing

array([[ 0,  0,  1, ..., 14, 14, 14],
       [ 0,  0,  0, ..., 13, 13, 13],
       [ 0,  0,  0, ..., 12, 12, 12],
       ...,
       [ 0,  0,  0, ...,  0,  0,  0],
       [ 0,  0,  0, ...,  0,  0,  0],
       [ 0,  0,  0, ...,  0,  0,  0]])

#### Ex2.(8pts) Nussinov algorithm: Backtracking
##### Given the RNA sequence and the completed dynamic programming matrix N(i,j) as input, write a function that returns the list of all matched basepairs.

In [115]:
def nussinov_backtracking(rna_seq,N,pairs=None,i=None,j=None):
    #set i and j to initial values
    if i==None and j==None:
        i=0
        j=len(rna_seq)-1
    #initialize the pairs list in the initial call
    if pairs==None:
        pairs=[]
    #if we reach the initial diagonals, the pairs can be returned 
    if (j-i+1)<=2:
        return pairs
    else:
        if N[i][j]==N[i+1][j-1]+is_comp_base(rna_seq[i],rna_seq[j]):
            if is_comp_base(rna_seq[i],rna_seq[j])==1:
                pairs.append((i,j))
            nussinov_backtracking(rna_seq,N,pairs,i+1,j-1)
        else:
            for k in range(i,j):
                if N[i][j]==N[i][k]+N[k+1][j]:
                    nussinov_backtracking(rna_seq,N,pairs,i,k-1)
                    nussinov_backtracking(rna_seq,N,pairs,k+1,j-1)
                    break
    return pairs

In [116]:
pairs=nussinov_backtracking(rna_seq,pairing)

In [117]:
print(pairs)

[(0, 40), (1, 21), (2, 20), (3, 19), (4, 18), (5, 17), (6, 16), (8, 14), (23, 34), (24, 33), (25, 32)]


#### Ex3. (2pts) Display matching basepairs
##### To display the resulting base pairing, use parentheses to produce an output as shown above.

In [41]:
pairs=[(3,5),(4,8),(6,9)]
seq='AUUCAUUAUA'

In [45]:
def nussinov_output(pairs,rna_seq):
    indices=[[],[]]
    output=[]
    for k in range(len(pairs)):
        indices[0].append(pairs[k][0])
        indices[1].append(pairs[k][1])
    for i in range(len(rna_seq)):
        if i in indices[0]:
            output.append('(')
        elif i in indices[1]:
            output.append(')')
        else:
            output.append('.')
    output=''.join(output)
    print(rna_seq)
    print(output)
    return 

In [96]:
nussinov_output(pairs,rna_seq)

AUCGGAGCAUUUUUUGCUCCGACGCAGCCUCAUGCUUUUUU
(((((((.(.....).)))))).(((......))).....)


#### Ex4. (2pts) Modifications of the algorithm
##### In the algorithm as described, the minimum hairpin loop consists of 1 base, e.g GUC where G and C are paired and the loop is formed by the single base U. Modify the algorithm so that a parameter h, indicating the minimum allowed loop length, can be given. The above algorithm corresponds to h=1. In addition to h >1, make sure your algorithm also works for h=0.

In [48]:
def nussinov2(rna_seq,h):
    #h means the number bases a match has to be separated from each other. h=1 corresponds to loop
    #length 0
    N=np.zeros(shape=(len(rna_seq),len(rna_seq)), dtype=int)
    dim=len(N)
    for s in range(h,dim):
        for i in range(dim-s):
            j=i+s
            b1=rna_seq[i]
            b2=rna_seq[j-1]
            left=N[i][j-1]
            below=N[i+1][j]
            diagonal=N[i+1][j-1]+is_comp_base(b1,b2)
            sub=max([N[i][k]+N[k+1][j] for k in range(i,j)])
            N[i][j]=max(left,below,diagonal,sub)
    return N

In [49]:
nussinov2(rna_seq,1)

array([[ 0,  0,  1, ..., 14, 14, 14],
       [ 0,  0,  0, ..., 13, 13, 13],
       [ 0,  0,  0, ..., 12, 12, 12],
       ...,
       [ 0,  0,  0, ...,  0,  0,  0],
       [ 0,  0,  0, ...,  0,  0,  0],
       [ 0,  0,  0, ...,  0,  0,  0]])

In [50]:
nussinov2(rna_seq,2)

array([[ 0,  0,  1, ..., 14, 14, 14],
       [ 0,  0,  0, ..., 13, 13, 13],
       [ 0,  0,  0, ..., 12, 12, 12],
       ...,
       [ 0,  0,  0, ...,  0,  0,  0],
       [ 0,  0,  0, ...,  0,  0,  0],
       [ 0,  0,  0, ...,  0,  0,  0]])

In [51]:
nussinov2(rna_seq,5)

array([[ 0,  0,  0, ..., 13, 13, 13],
       [ 0,  0,  0, ..., 12, 12, 12],
       [ 0,  0,  0, ..., 11, 11, 11],
       ...,
       [ 0,  0,  0, ...,  0,  0,  0],
       [ 0,  0,  0, ...,  0,  0,  0],
       [ 0,  0,  0, ...,  0,  0,  0]])