In [1]:
import pandas as pd
import numpy as np

### Ex1.(8pts)Nussinov folding algorithm

In [2]:
def base_pair(i,j):
    if seq[i] == "A" and seq[j] == "U":
        return 1        
        
    elif seq[i] == "U" and seq[j] == "A":
        return 1
        
    elif seq[i] == "C" and seq[j] == "G":
        return 1
        
    elif seq[i] == "G" and seq[j] == "C":
        return 1
    
    else:
        return 0 

In [3]:
def nussinov_matrix(seq):
    L = len(seq)                 # len of the sequence
    M = np.full([L, L],np.nan)   # initialize everything to nan
    #np.fill_diagonal(M, 0)       # diagonal to 0
    for i in range(1,L): 
        M[i,i-1]=0 
    for i in range(0,L):
        M[i,i]=0
    M = fill_matrix(M, seq)
    
    for i in range(L):
        for j in range(0,i):
            M[i][j] = M[j][i]
    
    return M

In [4]:
def fill_matrix(nm, rna):
    """
    Fill the matrix as per the Nussinov algorithm
    """
    minimal_loop_length = 0
    for k in range(1, len(rna)):
        for i in range(len(rna) - k):
            j = i + k
            if j - i >= minimal_loop_length:
                down = nm[i + 1][j] # 1st rule
                left = nm[i][j - 1] # 2nd rule
                diag = nm[i + 1][j - 1] + base_pair(i,j) # 3rd rule
                rc = max([nm[i][t] + nm[t + 1][j] for t in range(i, j)]) # 4th rule
                nm[i][j] = max(down, left, diag, rc) # max of all
            else:
                nm[i][j] = 0
    return nm

In [5]:
seq='AUCGGAGCAUUUUUUGCUCCGACGCAGCCUCAUGCUUUUUU'
NM=nussinov_matrix(seq)
print(NM)

[[ 0.  1.  1. ... 14. 14. 14.]
 [ 1.  0.  0. ... 13. 13. 13.]
 [ 1.  0.  0. ... 13. 13. 13.]
 ...
 [14. 13. 13. ...  0.  0.  0.]
 [14. 13. 13. ...  0.  0.  0.]
 [14. 13. 13. ...  0.  0.  0.]]


### Ex2.(8pts) Nussinov algorithm: Backtracking

In [6]:
def backtracking(N,seq,i,j,NN_list):
    if i<j:
        delta = base_pair(i,j)
        
        if N[i,j] == N[i+1,j]:
            backtracking(N,seq,i+1,j,NN_list)
        elif N[i,j] == N[i,j-1]:
            backtracking(N,seq,i,j-1,NN_list)
        elif N[i,j] == N[i+1,j-1] + delta:
            NN_list.append([i,j,str(seq[i]),str(seq[j])])
            backtracking(N,seq,i+1,j-1,NN_list)
        else:
            for k in range(i+1,j):
                if N[i,j] == N[i,k] + N[k+1,j]:
                    backtracking(N,seq,i,k,NN_list)
                    backtracking(N,seq,k+1,j,NN_list)
                    break
    return NN_list


In [7]:


NN_list = backtracking(NM,seq,0,len(seq)-1, []) 
i = [x[0] for x in NN_list]
j = [x[1] for x in NN_list]
NN_pair = [(x[2],x[3]) for x in NN_list]

print("Number of matched pairs: ",len(NN_list))
print("i  j  NN")
for x in range(0,len(NN_list)):
     print(i[x],j[x],*NN_pair[x])

Number of matched pairs:  6
i  j  NN
2 33 C G
3 30 G C
4 28 G C
5 10 A U
14 25 U A
17 21 U A


### Ex3.(2pts)Display matching base pairs

In [8]:
def write_structure(seq, pairs):
    closures_ascii = ["." for _ in range(len(seq))]
    for pair in pairs:
        closures_ascii[min(pair)] = "("
        closures_ascii[max(pair)] = ")"
    return "".join(closures_ascii)

write_structure(seq, [[i[x],j[x]] for x in range(len(i))])



'..((((....)...(..(...)...)..).)..).......'

### Ex4.(2pts) Modifications of the algorithm

In [9]:
def nussinov_matrix_modified(seq,minimum_hairpin_loop):             # additon of minimum_hairpin_loop value (h)
    print("Minimum hairpin loop length is",minimum_hairpin_loop)
    L = len(seq)                 
    M = np.full([L, L],np.nan)  
    #np.fill_diagonal(M, 0)       
    for i in range(1,L): 
        M[i,i-1]=0 
    for i in range(0,L):
        M[i,i]=0
    M = fill_matrix_modified(M, seq,minimum_hairpin_loop)
    
    for i in range(L):
        for j in range(0,i):
            M[i][j] = M[j][i]
    
    return M



def fill_matrix_modified(nm, rna,minimum_hairpin_loop):
    """
    Fill the matrix as per the Nussinov algorithm
    """
    minimal_loop_length = minimum_hairpin_loop
    for k in range(1, len(rna)):
        for i in range(len(rna) - k):
            j = i + k
            if j - i >= minimal_loop_length:
                down = nm[i + 1][j] # 1st rule
                left = nm[i][j - 1] # 2nd rule
                diag = nm[i + 1][j - 1] + base_pair(i,j) # 3rd rule
                rc = max([nm[i][t] + nm[t + 1][j] for t in range(i, j)]) # 4th rule
                nm[i][j] = max(down, left, diag, rc) # max of all
            else:
                nm[i][j] = 0
    return nm

modified=nussinov_matrix_modified(seq,minimum_hairpin_loop=7)


Minimum hairpin loop length is 7


In [10]:

NN_list = backtracking(modified,seq,0,len(seq)-1, []) 
i = [x[0] for x in NN_list]
j = [x[1] for x in NN_list]
NN_pair = [(x[2],x[3]) for x in NN_list]

print("Number of matched pairs: ",len(NN_list))
print("i  j  NN")
for x in range(0,len(NN_list)):
     print(i[x],j[x],*NN_pair[x])
        
# printing the structure
        
write_structure(seq, [[i[x],j[x]] for x in range(len(i))])


Number of matched pairs:  11
i  j  NN
0 35 A U
1 21 U A
2 20 C G
3 19 G C
4 18 G C
5 17 A U
6 16 G C
7 15 C G
23 34 G C
24 33 C G
25 32 A U


'((((((((.......))))))).(((......)))).....'