In [17]:
%cd ../..
import pandas as pd
from scripts.features import *
pd.set_option('display.max_columns', None)
from scripts.utils_v2_for_jupyter import *

/run/media/nazif/2F946E411BA61D49


In [28]:
import subprocess

def invoke_rnaduplex(long_sequence: str, short_sequence: str, energy_range: float = 5.0,
                     rnaduplex_location: str = "/usr/bin/RNAduplex") -> tuple:

    # short_sequence = reverse_complement(short_sequence)
    
    input_sequence = f"{long_sequence}\n{short_sequence}".encode()

    rnaduplex_subprocess = subprocess.Popen(
        [rnaduplex_location, "-e", f"{energy_range}", "-s"],
        stdout=subprocess.PIPE,
        stdin=subprocess.PIPE,
        stderr=subprocess.PIPE,
    )

    output, error = rnaduplex_subprocess.communicate(input=input_sequence)
    rnaduplex_subprocess.wait()

    first_line = output.decode().split("\n")[0].split()

    dot_bracket_long, dot_bracket_short = first_line[0].split("&")
    start_long, end_long = map(int, first_line[1].split(","))
    start_short, end_short = map(int, first_line[3].split(","))
    energy = float(first_line[-1].strip("()"))

    return start_long, end_long, dot_bracket_long, start_short, end_short, dot_bracket_short, energy

invoke_rnaduplex("AAAAATGGCA", "TGCCATTTTT")


(1, 10, '((((((((((', 1, 10, '))))))))))', -10.5)

# case 1

same sequence with its revcomp

In [10]:
results = invoke_rnaduplex("AAAAATGGCA", "TGCCATTTTT")

print("AAAAATGGCA")
print(results[2])
print(results[5])
print("TGCCATTTTT"[::-1])



AAAAATGGCA
((((((((((
))))))))))
TTTTTACCGT


# case 2

same sequence with its revcomp but the last character is different

In [30]:
seq1 = "AAAAATGGCA"
seq2 = "AAAAATGGCA"

results = invoke_rnaduplex(seq1, seq2)

print(seq1)
print(results[2])
print(results[5])
print(seq2)


AAAAATGGCA
.((.((
.)).))
AAAAATGGCA


In [42]:
seq1 = "AAAAATGGCA"
seq2 = "TGGCAG"

results = invoke_rnaduplex(seq1, seq2)


seq1_slice = seq1[results[0]-1:results[1]]
seq2_slice = " 5 " + seq2[results[3]-1:results[4]] + " 3 "
print(results)

print(seq1_slice)
print(results[2])
print(results[5][::-1])
print(seq2_slice[::-1])

(5, 10, '.((.((', 1, 6, ')).)).', -2.2)
ATGGCA
.((.((
.)).))
 3 GACGGT 5 


In [67]:
reverse_complement("UUGGUGUGUUGGAUGAUGGAGU").replace("U", "T")

'TCTCCTTCTTCCTTCTCTCCTT'

In [69]:
seq1 = "TCGTCGA"
seq2 = "TCGTCGA"

results = invoke_rnaduplex(seq1, seq2)

print(results)

seq1_slice = f"  5'  {seq1[results[0] - 1:results[1]]}  3'  "
seq1_dotbracket = f"  5'  {results[2]}  3'  "
seq2_dotbracket = f"  3'  {results[5][::-1]}  5'  "
seq2_slice = f"  3'  {seq2[results[3] - 1:results[4]][::-1]}  5'  "


print(seq1_slice)
print(seq1_dotbracket)
print(seq2_dotbracket)
print(seq2_slice)

len(seq1)

(1, 7, '(((.(((', 1, 7, '))).)))', -5.1)
  5'  TCGTCGA  3'  
  5'  (((.(((  3'  
  3'  ))).)))  5'  
  3'  AGCTGCT  5'  


7

In [26]:
seq1 = "AAAAATGGCA"
seq2 = "AAAAATGGCT"

results = invoke_rnaduplex(seq1, seq2)

print(seq1)
print(results[2])
print(results[5][::-1])
print(seq2)



AAAAATGGCA
(((((((((.
))))))))).
AAAAATGGCT


In [20]:
seq1 = "CTGGAGGTTAAGTACAGTTA"
seq2 = "TGAGGTAGTAGGTTGTATAGTT"

results = invoke_rnaduplex(seq1, seq2)

print(seq1)
print(results[2])
print(results[5][::-1])
print(seq2[::-1])


CTGGAGGTTAAGTACAGTTA
.((((((((..(((.((((.
.))).)))))))).))))
TTGATATGTTGGATGATGGAGT


In [75]:
seq1 = "GCCAACGUUCGAUUUCUACCUCA"
seq2 = "TGAGGTAGTAGGTTGTGTGGTT"



results = invoke_rnaduplex(seq1, seq2)

print(results)

seq1_slice = f"  5'  {seq1[results[0] - 1:results[1]]}  3'  "
seq1_dotbracket = f"  5'  {results[2]}  3'  "
seq2_dotbracket = f"  3'  {results[5][::-1]}  5'  "
seq2_slice = f"  3'  {seq2[results[3] - 1:results[4]][::-1]}  5'  "


print(seq1_slice)
print(seq1_dotbracket)
print(seq2_dotbracket)
print(seq2_slice)

len(seq1)

(1, 23, '((((.....((((((((((((((', 1, 22, ')))))))).))))))..)))).', -19.0)
  5'  GCCAACGUUCGAUUUCUACCUCA  3'  
  5'  ((((.....((((((((((((((  3'  
  3'  .))))..)))))).))))))))  5'  
  3'  TTGGTGTGTTGGATGATGGAGT  5'  


23

# real targetscan test case

mirna sequence:  UGAGGUAGUAGGUUGUGUGGUU or TGAGGTAGTAGGTTGTGTGGTT (same, T:U change)

In [77]:
seq1 = "GCCAACGUUCGAUUUCUACCUCA"
seq2 = "UGAGGUAGUAGGUUGUGUGGUU"



results = invoke_rnaduplex(seq1, seq2)

print(results)

seq1_slice = f"  5'  {seq1[results[0] - 1:results[1]]}  3'  "
seq1_dotbracket = f"  5'  {results[2]}  3'  "
seq2_dotbracket = f"  3'  {results[5][::-1]}  5'  "
seq2_slice = f"  3'  {seq2[results[3] - 1:results[4]][::-1]}  5'  "


print(seq1_slice)
print(seq1_dotbracket)
print(seq2_dotbracket)
print(seq2_slice)

len(seq1)

(1, 23, '((((.....((((((((((((((', 1, 22, ')))))))).))))))..)))).', -19.0)
  5'  GCCAACGUUCGAUUUCUACCUCA  3'  
  5'  ((((.....((((((((((((((  3'  
  3'  .))))..)))))).))))))))  5'  
  3'  UUGGUGUGUUGGAUGAUGGAGU  5'  


23

mRNA'yı normal 5-3 verince, miRNA'yı da bendeki gibi 5-3 urasil yerine timinli verince kendi kendine hallediyor.

rnaduplex'e sekansları 5-3 DNA şeklinde verince sorun çıkmıyor