# Common

In [1]:
import numpy as np
import pandas as pd
from tqdm import tqdm  # !pip install tqdm
from tqdm import trange
import multiprocessing as mp
import shutil
import glob
import os
import sys
if sys.version_info[0] < 3: 
    from StringIO import StringIO
else:
    from io import StringIO

In [2]:
def bracket_row(row):    
    s = row['data']
    index = min(s.find('.'), s.find('('))
    data = row['data']
    row['data'] = data[0:index]
    row['bracket'] = data[index:]
    return row

In [3]:
def adjust(text,n=7):
    text = str(text)    
    return " " * (n - len(text)) + text

In [4]:
def bracket_to_ct(tag, data, bracket, deltaG, negative_deltaG=True):    
    deltaG = deltaG.replace('(','').replace(')','')
    deltaG = float(deltaG)
    if(deltaG > 0 and negative_deltaG ): # negetive?!
        deltaG = -1 * deltaG
    stack = []
    index = np.zeros((len(bracket)), dtype = int)
    values = np.zeros((len(bracket)), dtype = int)
    for i in range(len(bracket)):
        index[i] = i + 1
        if(bracket[i] == '.'):
            values[i] = 0
        elif(bracket[i] == '('):
            stack.append(i)
        elif(bracket[i] == ')'):
            if(len(stack) == 0 ):
                print('structure error!')
            values[stack[-1]] = i + 1
            values[i]  = stack[-1] + 1
            stack.pop()
        else:
            print('structure error!')
    if(len(stack) != 0 ):
        print('structure error!')
    # body    
    ct = f"{adjust(len(data),6)} dG ={adjust(deltaG,10)} {tag}\n"   
    for i in range(len(bracket)):
        ct += f"{adjust(index[i],6)} {data[i]} {adjust(i,6)} {adjust((i+2)%(len(data)+1),6)} {adjust(values[i],6)} {adjust(index[i],7)}\n"
    return ct

In [5]:
def fasta_to_df(path):
    with open(path, 'r') as file:
        text = file.read()
    lines = [line for line in text.split('\n') if len(line) > 0]
    s = ''
    tags = []
    data = []
    for l in lines:
        if(l[0]=='>'):
            tags.append(l)        
            data.append(s)
            s = ''
        else:
            s += l    
    data.append(s)
    df = pd.DataFrame(
            {
                'tag': tags,
                'data': data[1:]
            })
    df['tag'] = df['tag'].apply(lambda x: x[1:])    
    return df

In [6]:
def df_to_fasta(df, path):
    lines = []
    df.apply(lambda row: lines.append(f">{row['tag']}\n{row['data']}\n"),axis=1)
    with open(path,'w') as file:
        file.write(''.join(lines))

In [7]:
def reformat(path):
    return path.replace('(','_').replace(')','_').replace('.','').replace(':','_')

In [8]:
def reformatCT(path):
    with open(path, 'r') as file:
        text = file.read()
    text = [l for l in text.split('\n') if len(l) > 0 ] # remove blank lines
    text = '\n'.join(text)
    text = text.replace("\t"," ")
    while("  " in text):
        text = text.replace("  ", " ")
    lines = [l for l in text.split('\n')]
    for i in range(len(lines)):
        if(lines[i][0] == " "):
            lines[i] = lines[i][1:]
        if(lines[i][-1] == " "):
            lines[i] = lines[i][:-1]
    text = '\n'.join(lines)
    return text

In [9]:
def get_ct_data(ct):
    ct = "\n".join(ct.split('\n')[1:])
    df = pd.read_csv(StringIO(ct), sep=" ", header=None)               
    nucleotide = df.iloc[:,1]
    index = df.iloc[:,5]
    values = df.iloc[:,4]
    return [nucleotide, index, values]

In [10]:
def ct2dot_bracket(path):
    [nucleotide, index, values] = get_ct_data(reformatCT(path))
    text = ''.join(nucleotide) + "\n"
    watch = []
    for i, v in zip(index,values):
        if(v == 0):
            text += '.'
        else:
            if( v not in watch):
                text += '('
                watch.append(i)
            if( v in watch):
                text += ')'
    return text

In [11]:
def is_nested(index, values):
    max_value = max(index) + 10 # inf
    for i, v in zip(index, values):
        if(v < max_value and v != 0):
            max_value  = v
        if(i >= max_value):
            max_value = max(index) + 10 # inf
        if(v > max_value):
            return False               
    return True

In [12]:
'''ct = reformatCT('./secondary_structure/spot_rna/AMWY020598281_2832-3256_+_/AMWY020598281_2832-3256_+_.ct')
[nucleotide, index, values] = get_ct_data(ct)
print(is_nested( index,  values))
''';

### rename tag of input genome to new tag id

# Download dataset

In [186]:
from Bio import Entrez
Entrez.email = "abolhasani.eliya@gmail.com"     
with Entrez.esearch(db='nucleotide', term="Arabidopsis thaliana") as handle:
    result = Entrez.read(handle)

print(result)
genome_ids = result['IdList']

for genome_id in genome_ids:
    print(genome_id)
    record = Entrez.efetch(db="nucleotide", id=genome_id, rettype="fasta", retmode="text")        
    with open(f'{genome_id}.fasta', 'w') as f:
        f.write(record.read())
    break

{'Count': '3344178', 'RetMax': '20', 'RetStart': '0', 'IdList': ['2112722214', '2112720223', '2112718526', '2112713974', '2112712776', '2112711465', '2112710136', '2112708929', '2112707731', '2112706611', '2112705454', '2112704446', '2112703491', '2112702625', '2112701442', '2112700588', '2112699487', '2112698892', '2112697886', '2112697471'], 'TranslationSet': [{'From': 'Arabidopsis thaliana', 'To': '"Arabidopsis thaliana"[Organism] OR Arabidopsis thaliana[All Fields]'}], 'TranslationStack': [{'Term': '"Arabidopsis thaliana"[Organism]', 'Field': 'Organism', 'Count': '2700195', 'Explode': 'Y'}, {'Term': 'Arabidopsis thaliana[All Fields]', 'Field': 'All Fields', 'Count': '3344178', 'Explode': 'N'}, 'OR', 'GROUP'], 'QueryTranslation': '"Arabidopsis thaliana"[Organism] OR Arabidopsis thaliana[All Fields]'}
2112722214


KeyboardInterrupt: 

In [68]:
from Bio import Entrez
Entrez.email = "abolhasani.eliya@gmail.com"     
record = Entrez.efetch(db="nucleotide", id="NC_054143.4", rettype="fasta", retmode="text")        
with open(f'data.fasta', 'w') as f:
    f.write(record.read())

In [259]:
!wget https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/000/439/995/GCA_000439995.3_AzaInd2.1/GCA_000439995.3_AzaInd2.1_genomic.fna.gz

--2021-10-24 19:04:50--  https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/000/439/995/GCA_000439995.3_AzaInd2.1/GCA_000439995.3_AzaInd2.1_genomic.fna.gz
Resolving ftp.ncbi.nlm.nih.gov (ftp.ncbi.nlm.nih.gov)... 165.112.9.230, 130.14.250.11, 2607:f220:41f:250::229, ...
Connecting to ftp.ncbi.nlm.nih.gov (ftp.ncbi.nlm.nih.gov)|165.112.9.230|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 85647577 (82M) [application/x-gzip]
Saving to: ‘GCA_000439995.3_AzaInd2.1_genomic.fna.gz’


2021-10-24 19:05:01 (7.99 MB/s) - ‘GCA_000439995.3_AzaInd2.1_genomic.fna.gz’ saved [85647577/85647577]



In [263]:
!gzip -d ./GCA_000439995.3_AzaInd2.1_genomic.fna.gz

# Download data from Mirbase

In [112]:
base = "ftp://mirbase.org/pub/mirbase/CURRENT"
!wget {base}/aliases.txt.gz -P ./Data/          ; gzip -d ./Data/aliases.txt.gz 
!wget {base}/hairpin.fa.gz -P ./Data/           ; gzip -d ./Data/hairpin.fa.gz 
!wget {base}/hairpin_high_conf.fa.gz -P ./Data/ ; gzip -d ./Data/hairpin_high_conf.fa.gz 
!wget {base}/mature.fa.gz -P ./Data/            ; gzip -d ./Data/mature.fa.gz 
!wget {base}/mature_high_conf.fa.gz -P ./Data/  ; gzip -d ./Data/mature_high_conf.fa.gz
!wget {base}/miRNA.str.gz -P ./Data/            ; gzip -d ./Data/miRNA.str.gz 
!wget {base}/miRNA.xls.gz -P ./Data/            ; gzip -d ./Data/miRNA.xls.gz 
!wget {base}/organisms.txt.gz -P ./Data/        ; gzip -d ./Data/organisms.txt.gz

--2021-10-21 15:12:53--  ftp://mirbase.org/pub/mirbase/CURRENT/aliases.txt.gz
           => ‘./Data/aliases.txt.gz’
Resolving mirbase.org (mirbase.org)... 130.88.97.249
Connecting to mirbase.org (mirbase.org)|130.88.97.249|:21... connected.
Logging in as anonymous ... Logged in!
==> SYST ... done.    ==> PWD ... done.
==> TYPE I ... done.  ==> CWD (1) /pub/mirbase/CURRENT ... done.
==> SIZE aliases.txt.gz ... 480536
==> PASV ... done.    ==> RETR aliases.txt.gz ... done.
Length: 480536 (469K) (unauthoritative)


2021-10-21 15:12:55 (415 KB/s) - ‘./Data/aliases.txt.gz’ saved [480536]

--2021-10-21 15:12:55--  ftp://mirbase.org/pub/mirbase/CURRENT/hairpin.fa.gz
           => ‘./Data/hairpin.fa.gz’
Resolving mirbase.org (mirbase.org)... 130.88.97.249
Connecting to mirbase.org (mirbase.org)|130.88.97.249|:21... connected.
Logging in as anonymous ... Logged in!
==> SYST ... done.    ==> PWD ... done.
==> TYPE I ... done.  ==> CWD (1) /pub/mirbase/CURRENT ... done.
==> SIZE hairpin.fa.gz ...

In [3]:
df = fasta_to_df('./Data/mature.fa')
#df = fasta_to_df('./Data/mature_high_conf.fa')
#df = fasta_to_df('./Data/hairpin_high_conf.fa')
df['organism'] = df['tag'].apply(lambda x: x[:3])
print(df.shape)
df.head(2)

(48885, 3)


Unnamed: 0,tag,data,organism
0,cel-let-7-5p MIMAT0000001 Caenorhabditis elega...,UGAGGUAGUAGGUUGUAUAGUU,cel
1,cel-let-7-3p MIMAT0015091 Caenorhabditis elega...,CUAUGCAAUUUUCUACCUUACC,cel


In [5]:
organism = pd.read_csv('./Data/organisms.txt',sep='\t')
organism.columns = [c.replace('#','') for c in organism.columns] # remove sharp from columns
print(organism.shape)
organism.head(2)

(285, 5)


Unnamed: 0,organism,division,name,tree,NCBI-taxid
0,aqu,AQU,Amphimedon queenslandica,Metazoa;Porifera;,400682
1,nve,NVE,Nematostella vectensis,Metazoa;Cnidaria;,45351


In [6]:
items = list(organism['tree'].unique())
items.sort(key=len)
items

['Viruses;',
 'Mycetozoa;',
 'Alveolata;',
 'Metazoa;Porifera;',
 'Metazoa;Cnidaria;',
 'Viridiplantae;Chlorophyta;',
 'Viridiplantae;Embryophyta;',
 'Viridiplantae;Coniferophyta;',
 'Viridiplantae;Magnoliophyta;',
 'Metazoa;Bilateria;Deuterostoma;',
 'Chromalveolata;Heterokontophyta;',
 'Metazoa;Bilateria;Ecdysozoa;Nematoda;',
 'Metazoa;Bilateria;Lophotrochozoa;Annelida;',
 'Metazoa;Bilateria;Lophotrochozoa;Nemertea;',
 'Metazoa;Bilateria;Lophotrochozoa;Mollusca;',
 'Viridiplantae;Magnoliophyta;monocotyledons;',
 'Metazoa;Bilateria;Deuterostoma;Hemichordata;',
 'Metazoa;Bilateria;Deuterostoma;Echinodermata;',
 'Metazoa;Bilateria;Lophotrochozoa;Brachiopoda;',
 'Metazoa;Bilateria;Ecdysozoa;Arthropoda;Hexapoda;',
 'Metazoa;Bilateria;Ecdysozoa;Arthropoda;Crustacea;',
 'Metazoa;Bilateria;Lophotrochozoa;Platyhelminthes;',
 'Metazoa;Bilateria;Ecdysozoa;Arthropoda;Chelicerata;',
 'Metazoa;Bilateria;Ecdysozoa;Arthropoda;Mandibulata;',
 'Viridiplantae;Magnoliophyta;eudicotyledons;Poaceae;',
 'M

In [7]:
selectedTree = organism[organism['tree'].apply(lambda x: "Viridiplantae;" in x)]
print(selectedTree.shape)
selectedTree.head(5)

(86, 5)


Unnamed: 0,organism,division,name,tree,NCBI-taxid
66,cre,CRE,Chlamydomonas reinhardtii,Viridiplantae;Chlorophyta;,3055
67,pta,PTA,Pinus taeda,Viridiplantae;Coniferophyta;,3352
68,ppt,PPT,Physcomitrella patens,Viridiplantae;Embryophyta;,3218
69,smo,SMO,Selaginella moellendorffii,Viridiplantae;Embryophyta;,88036
70,ath,ATH,Arabidopsis thaliana,Viridiplantae;Magnoliophyta;eudicotyledons;Bra...,3702


In [8]:
selected = df[df['organism'].isin(selectedTree['organism'])]
print(selected.shape)
selected.head()

(10414, 3)


Unnamed: 0,tag,data,organism
316,ath-miR156a-5p MIMAT0000166 Arabidopsis thalia...,UGACAGAAGAGAGUGAGCAC,ath
317,ath-miR156a-3p MIMAT0031865 Arabidopsis thalia...,GCUCACUGCUCUUUCUGUCAGA,ath
318,ath-miR156b-5p MIMAT0000167 Arabidopsis thalia...,UGACAGAAGAGAGUGAGCAC,ath
319,ath-miR156b-3p MIMAT0031866 Arabidopsis thalia...,UGCUCACCUCUCUUUCUGUCAGU,ath
320,ath-miR156c-5p MIMAT0000168 Arabidopsis thalia...,UGACAGAAGAGAGUGAGCAC,ath


In [9]:
df_to_fasta(selected,'./Temp/selected.fasta')

# Remove redundant

## cdhit-est

In [13]:
!cdhit/cd-hit-est -i ./Temp/selected.fasta  -o ./Temp/out.fasta \
    -c 1 -r 0 -G 1 -g 1 -b 30 -l 10 -aL 0 -AL 99999999 -aS 0 \
    -AS 99999999 -s 0 -S 0 

Program: CD-HIT, V4.8.1 (+OpenMP), Oct 23 2021, 21:45:39
Command: cdhit/cd-hit-est -i ./Temp/selected.fasta -o
         ./Temp/out.fasta -c 1 -r 0 -G 1 -g 1 -b 30 -l 10 -aL 0
         -AL 99999999 -aS 0 -AS 99999999 -s 0 -S 0

Started: Wed Oct 27 20:00:13 2021
                            Output                              
----------------------------------------------------------------
total seq: 10414
longest and shortest : 28 and 17
Total letters: 222978
Sequences have been sorted

Approximated minimal memory consumption:
Sequence        : 1M
Buffer          : 1 X 12M = 12M
Table           : 1 X 16M = 16M
Miscellaneous   : 0M
Total           : 30M

Table limit with the given memory limit:
Max number of representatives: 4000000
Max number of word counting entries: 96149440

comparing sequences from          0  to      10414
..........    10000  finished       5817  clusters

    10414  finished       6028  clusters

Approximated maximum memory consumption: 31M
writing new database
w

## reformat

In [14]:
with open('./Temp/out.fasta.clstr','r') as file:
    text = file.read()
lines = [line for line in text.split('\n') if len(line) > 0]
cluster = []
seqid = []
last_cluster = ""
for l in lines:
    if(l[0]=='>'):        
        last_cluster = l.replace('>Cluster ',"C")
    else:        
        cluster.append(last_cluster)
        seqid.append(l.split(', >')[1].split('...')[0])                
seq2cluster = pd.DataFrame({'seqid': seqid,'cluster': cluster})
print(seq2cluster.shape)
seq2cluster.head(2)    

(10414, 2)


Unnamed: 0,seqid,cluster
0,cst-miR11332,C0
1,stu-miR7994b-5p,C1


In [15]:
df = fasta_to_df("./Temp/selected.fasta")
df['accession'] = df['tag'].apply(lambda x : x.split(' ')[0])
seq2cluster = pd.merge(df,seq2cluster,how="inner",left_on='accession',right_on="seqid")[['cluster','seqid','tag']]
print(seq2cluster.shape)
display(seq2cluster.head(2))
seq2cluster.to_csv('./Temp/seq2cluster.csv',index=False)

(10414, 3)


Unnamed: 0,cluster,seqid,tag
0,C5495,ath-miR156a-5p,ath-miR156a-5p MIMAT0000166 Arabidopsis thalia...
1,C1199,ath-miR156a-3p,ath-miR156a-3p MIMAT0031865 Arabidopsis thalia...


In [16]:
# todo: sorted first by cluster then by seqid
seq2cluster.sort_values("cluster").head(2)

Unnamed: 0,cluster,seqid,tag
9422,C0,cst-miR11332,cst-miR11332 MIMAT0044622 Cucumis sativus miR1...
7002,C1,stu-miR7994b-5p,stu-miR7994b-5p MIMAT0031188 Solanum tuberosum...


In [17]:
df = fasta_to_df("./Temp/out.fasta")
df['tag'] = df['tag'].apply(lambda x : x.split(' ')[0])
df = pd.merge(df,seq2cluster,how="inner",left_on='tag',right_on="seqid")[['cluster','data']]

lines = []
df.apply(lambda row: lines.append(f">{row['cluster']}\n{row['data']}\n"),axis=1)
print(df.shape)
with open('./Temp/selected_clustrerd.fasta','w') as file:
    file.write(''.join(lines))

(6028, 2)


# BlastN

!sudo apt-get install ncbi-blast+


In [18]:
!makeblastdb -in input_genome.fna \
             -dbtype nucl \
             -out ./Temp/blastn_database



Building a new DB, current time: 10/27/2021 20:01:13
New DB name:   /home/jupyter/plant_microRNA_prediction/Temp/blastn_database
New DB title:  input_genome.fna
Sequence type: Nucleotide
Keep MBits: T
Maximum file size: 1000000000B
Adding sequences from FASTA; added 126142 sequences in 9.91275 seconds.


In [63]:
header = "qseqid sseqid qstart qend sstart send qseq sseq evalue bitscore score length pident nident mismatch positive gapopen gaps ppos frames qframe sframe sstrand qcovs qcovhsp qlen slen"

In [29]:
blast_header = "6 " + header
!blastn -query ./Temp/selected_clustrerd.fasta \
        -out ./Temp/out_blastn \
        -num_threads {mp.cpu_count()} \
        -db ./Temp/blastn_database \
        -word_size 7 \
        -penalty -3 \
        -reward 2 \
        -gapopen 5 \
        -gapextend 2 \
        -outfmt {blast_header}

In [64]:
df_blastn = pd.read_csv('./Temp/out_blastn', sep='\t',header=None)
df_blastn.columns = header.replace("  "," ").split(" ")
print(df_blastn.shape)
df_blastn.head(2)

(326849, 27)


Unnamed: 0,qseqid,sseqid,qstart,qend,sstart,send,qseq,sseq,evalue,bitscore,...,gaps,ppos,frames,qframe,sframe,sstrand,qcovs,qcovhsp,qlen,slen
0,C5495,AMWY02099822.1,1,20,1769,1750,TGACAGAAGAGAGTGAGCAC,TGACAGAAGAGAGTGAGCAC,0.004,37.4,...,0,100.0,1/-1,1,-1,minus,100,100,20,3308
1,C5495,AMWY02082313.1,1,20,5954,5973,TGACAGAAGAGAGTGAGCAC,TGACAGAAGAGAGTGAGCAC,0.004,37.4,...,0,100.0,1/1,1,1,plus,100,100,20,8471


In [65]:
threshold = 4
df_blastn['Nonconformity'] = df_blastn['qlen'] - (abs(df_blastn['qend'] - df_blastn['qstart']) + 1) + df_blastn['gaps'] + df_blastn['mismatch']
df_blastn = df_blastn[df_blastn['Nonconformity'] <= threshold]
print(df_blastn.shape)
df_blastn.head(2)

(80217, 28)


Unnamed: 0,qseqid,sseqid,qstart,qend,sstart,send,qseq,sseq,evalue,bitscore,...,ppos,frames,qframe,sframe,sstrand,qcovs,qcovhsp,qlen,slen,Nonconformity
0,C5495,AMWY02099822.1,1,20,1769,1750,TGACAGAAGAGAGTGAGCAC,TGACAGAAGAGAGTGAGCAC,0.004,37.4,...,100.0,1/-1,1,-1,minus,100,100,20,3308,0
1,C5495,AMWY02082313.1,1,20,5954,5973,TGACAGAAGAGAGTGAGCAC,TGACAGAAGAGAGTGAGCAC,0.004,37.4,...,100.0,1/1,1,1,plus,100,100,20,8471,0


In [66]:
# remore redundancy and hold best one base of Nonconformity value
df_blastn = df_blastn.sort_values(["Nonconformity", "evalue"], ascending = (True, True))
df_blastn = df_blastn.drop_duplicates(subset=['sseqid','sstart', 'send','sstrand'], keep='first')
df_blastn.to_csv('./Temp/filtered_out_blastn.csv')
print(df_blastn.shape)

(66445, 28)


# Result of the blastn to bed file

In [67]:
flanking_value = 200
df = df_blastn[['sseqid', 'sstart', 'send', 'sstrand','slen']]
df['ones'] = 1
df.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['ones'] = 1


Unnamed: 0,sseqid,sstart,send,sstrand,slen,ones
132836,AMWY02059828.1,3033,3056,plus,4662,1
300170,AMWY02004761.1,2054,2077,plus,3675,1
320840,AMWY02033394.1,693,670,minus,1874,1
39433,AMWY02098706.1,207,185,minus,2315,1
39436,AMWY02003241.1,2198,2220,plus,4402,1


In [68]:
def switch(row):
    if(row['sstart'] > row['send']):        
        temp = row['sstart']
        row['sstart'] = row['send']
        row['send'] = temp
    return row
df = df.apply(lambda row: switch(row), axis=1)
df.head()

Unnamed: 0,sseqid,sstart,send,sstrand,slen,ones
132836,AMWY02059828.1,3033,3056,plus,4662,1
300170,AMWY02004761.1,2054,2077,plus,3675,1
320840,AMWY02033394.1,670,693,minus,1874,1
39433,AMWY02098706.1,185,207,minus,2315,1
39436,AMWY02003241.1,2198,2220,plus,4402,1


In [69]:
def convert(inp):
    if(inp == "plus"):
        return "forward"
    if(inp == "minus"):
        return "reverse"
    raise Exception('Error, sstrand contains illegal word! only "plus" and "minus" are allowed')
df['strand'] = df['sstrand'].apply(lambda x: convert(x))
df.head()

Unnamed: 0,sseqid,sstart,send,sstrand,slen,ones,strand
132836,AMWY02059828.1,3033,3056,plus,4662,1,forward
300170,AMWY02004761.1,2054,2077,plus,3675,1,forward
320840,AMWY02033394.1,670,693,minus,1874,1,reverse
39433,AMWY02098706.1,185,207,minus,2315,1,reverse
39436,AMWY02003241.1,2198,2220,plus,4402,1,forward


In [70]:
def convert2sign(inp):
    if(inp == "plus"):
        return "+"
    if(inp == "minus"):
        return "-"
    raise Exception('Error, sstrand contains illegal word! only "plus" and "minus" are allowed')
df['sign'] = df['sstrand'].apply(lambda x: convert2sign(x))
df.head()

Unnamed: 0,sseqid,sstart,send,sstrand,slen,ones,strand,sign
132836,AMWY02059828.1,3033,3056,plus,4662,1,forward,+
300170,AMWY02004761.1,2054,2077,plus,3675,1,forward,+
320840,AMWY02033394.1,670,693,minus,1874,1,reverse,-
39433,AMWY02098706.1,185,207,minus,2315,1,reverse,-
39436,AMWY02003241.1,2198,2220,plus,4402,1,forward,+


In [71]:
df['hit_length'] = df.apply(lambda row: abs(row['send'] - row['sstart']) + 1 ,axis=1)
df.head()

Unnamed: 0,sseqid,sstart,send,sstrand,slen,ones,strand,sign,hit_length
132836,AMWY02059828.1,3033,3056,plus,4662,1,forward,+,24
300170,AMWY02004761.1,2054,2077,plus,3675,1,forward,+,24
320840,AMWY02033394.1,670,693,minus,1874,1,reverse,-,24
39433,AMWY02098706.1,185,207,minus,2315,1,reverse,-,23
39436,AMWY02003241.1,2198,2220,plus,4402,1,forward,+,23


## convert sstart and send from location to index (range)

In [72]:
df['sstart'] = df['sstart'].apply(lambda x: x - 1)
df.head()

Unnamed: 0,sseqid,sstart,send,sstrand,slen,ones,strand,sign,hit_length
132836,AMWY02059828.1,3032,3056,plus,4662,1,forward,+,24
300170,AMWY02004761.1,2053,2077,plus,3675,1,forward,+,24
320840,AMWY02033394.1,669,693,minus,1874,1,reverse,-,24
39433,AMWY02098706.1,184,207,minus,2315,1,reverse,-,23
39436,AMWY02003241.1,2197,2220,plus,4402,1,forward,+,23


In [73]:
df['downstream_flanking'] = df['sstart'].apply(lambda x:  flanking_value if x > flanking_value else x)
df.head(2)

Unnamed: 0,sseqid,sstart,send,sstrand,slen,ones,strand,sign,hit_length,downstream_flanking
132836,AMWY02059828.1,3032,3056,plus,4662,1,forward,+,24,200
300170,AMWY02004761.1,2053,2077,plus,3675,1,forward,+,24,200


In [74]:
df['upstream_flanking'] = df.apply(lambda row:  flanking_value if (row['send']+flanking_value) <= row['slen'] else row['slen'] - row['send'],axis=1)
df.head()

Unnamed: 0,sseqid,sstart,send,sstrand,slen,ones,strand,sign,hit_length,downstream_flanking,upstream_flanking
132836,AMWY02059828.1,3032,3056,plus,4662,1,forward,+,24,200,200
300170,AMWY02004761.1,2053,2077,plus,3675,1,forward,+,24,200,200
320840,AMWY02033394.1,669,693,minus,1874,1,reverse,-,24,200,200
39433,AMWY02098706.1,184,207,minus,2315,1,reverse,-,23,184,200
39436,AMWY02003241.1,2197,2220,plus,4402,1,forward,+,23,200,200


In [75]:
df['hit_start'] = df.apply(lambda row: row['downstream_flanking'] if row['sign'] == "+" else row['upstream_flanking'],axis=1)
df.head(2)

Unnamed: 0,sseqid,sstart,send,sstrand,slen,ones,strand,sign,hit_length,downstream_flanking,upstream_flanking,hit_start
132836,AMWY02059828.1,3032,3056,plus,4662,1,forward,+,24,200,200,200
300170,AMWY02004761.1,2053,2077,plus,3675,1,forward,+,24,200,200,200


In [76]:
df['hit_end'] = df.apply(lambda row: row['downstream_flanking'] + row['hit_length'] if row['sign'] == "+" else row['upstream_flanking'] + row['hit_length'],axis=1)
df.head(2)

Unnamed: 0,sseqid,sstart,send,sstrand,slen,ones,strand,sign,hit_length,downstream_flanking,upstream_flanking,hit_start,hit_end
132836,AMWY02059828.1,3032,3056,plus,4662,1,forward,+,24,200,200,200,224
300170,AMWY02004761.1,2053,2077,plus,3675,1,forward,+,24,200,200,200,224


In [77]:
df['sstart'] = df['sstart'].apply(lambda x: max(x - flanking_value, 0))
df['send'] = df.apply(lambda row: min(row['send'] + flanking_value , row['slen']),axis=1)
df.head(2)

Unnamed: 0,sseqid,sstart,send,sstrand,slen,ones,strand,sign,hit_length,downstream_flanking,upstream_flanking,hit_start,hit_end
132836,AMWY02059828.1,2832,3256,plus,4662,1,forward,+,24,200,200,200,224
300170,AMWY02004761.1,1853,2277,plus,3675,1,forward,+,24,200,200,200,224


In [78]:
df['tag'] = df.apply(lambda row: f">{row['sseqid']}:{row['sstart']}-{row['send']}({row['sign']})",axis=1)
df['reformated_tag'] = df['tag'].apply(lambda t: reformat(t))
df[['tag', 'reformated_tag', 'hit_start', 'hit_end']].to_csv('./Temp/hit_index_info.csv')#, index=False)

In [79]:
df['location_tag'] = df.apply(lambda row: f">{row['sseqid']}|{row['sign']}|{row['sstart'] + 1}-{row['send']}|{row['hit_start']+1}-{row['hit_end']}",axis=1)
df[['location_tag']].to_csv('./Temp/pipe_seprated_location_list.csv',index=False)
df[['location_tag']].head(10)

Unnamed: 0,location_tag
132836,>AMWY02059828.1|+|2833-3256|201-224
300170,>AMWY02004761.1|+|1854-2277|201-224
320840,>AMWY02033394.1|-|470-893|201-224
39433,>AMWY02098706.1|-|1-407|201-223
39436,>AMWY02003241.1|+|1998-2420|201-223
40450,>AMWY02103996.1|-|38-460|201-223
40452,>AMWY02089009.1|+|1705-2127|201-223
40454,>AMWY02083127.1|+|3091-3455|201-223
40456,>AMWY02043062.1|-|415-837|201-223
40459,>AMWY02038760.1|+|1825-2247|201-223


In [124]:
df_blastn[df_blastn.index==39433]

Unnamed: 0,qseqid,sseqid,qstart,qend,sstart,send,qseq,sseq,evalue,bitscore,...,frames,qframe,sframe,sstrand,qcovs,qcovhsp,qlen,slen,Nonconformity,hit
39433,C1015,AMWY02098706.1,1,23,207,185,TGGAGTGGAGTGGAGTGGAGTGG,TGGAGTGGAGTGGAGTGGAGTGG,0.000104,42.8,...,1/-1,1,-1,minus,100,100,23,2315,0,TGGAGTGGAGTGGAGTGGAGTGG


In [80]:
df[['sseqid','sstart','send','strand','ones', 'sign']].to_csv('./Temp/extension_index.bed', 
        index=False, header=False, sep="\t")

# Extention


## !sudo apt-get install bedtools

In [81]:
!bedtools getfasta -fi ./input_genome.fna -fo ./Temp/extended.txt -s -bed ./Temp/extension_index.bed
!rm input_genome.fna.fai

index file ./input_genome.fna.fai not found, generating...


In [82]:
# todo: remove duplicated
df = fasta_to_df("./Temp/extended.txt")
df = df.drop_duplicates(subset=['tag'], keep='first')
df_to_fasta(df,"./Temp/extended.txt")
len(df['tag'].unique()) 

65798

# Seq Validation


In [144]:
df_blastn['hit'] = df_blastn['sseq'].apply(lambda x: x.replace('-', ''))
df_blastn.head(2)

Unnamed: 0,qseqid,sseqid,qstart,qend,sstart,send,qseq,sseq,evalue,bitscore,...,frames,qframe,sframe,sstrand,qcovs,qcovhsp,qlen,slen,Nonconformity,hit
132836,C286,AMWY02059828.1,1,24,3033,3056,ATTCAGTTGATGCAAGGCGGGATC,ATTCAGTTGATGCAAGGCGGGATC,3e-05,44.6,...,1/1,1,1,plus,100,100,24,4662,0,ATTCAGTTGATGCAAGGCGGGATC
300170,C953,AMWY02004761.1,1,24,2054,2077,ATTTCGGACCAGGCTTCATTCCCC,ATTTCGGACCAGGCTTCATTCCCC,3e-05,44.6,...,1/1,1,1,plus,100,100,24,3675,0,ATTTCGGACCAGGCTTCATTCCCC


In [126]:
info = pd.read_csv('./Temp/hit_index_info.csv')
info.head(2)

Unnamed: 0.1,Unnamed: 0,tag,reformated_tag,hit_start,hit_end
0,132836,>AMWY02059828.1:2832-3256(+),>AMWY020598281_2832-3256_+_,200,224
1,300170,>AMWY02004761.1:1853-2277(+),>AMWY020047611_1853-2277_+_,200,224


In [145]:
info = pd.read_csv('./Temp/hit_index_info.csv')
info.head(2)

Unnamed: 0.1,Unnamed: 0,tag,reformated_tag,hit_start,hit_end
0,132836,>AMWY02059828.1:2832-3256(+),>AMWY020598281_2832-3256_+_,200,224
1,300170,>AMWY02004761.1:1853-2277(+),>AMWY020047611_1853-2277_+_,200,224


In [87]:
ext = fasta_to_df('./Temp/extended.txt')
ext.head(2)

Unnamed: 0,tag,data
0,AMWY02059828.1:2832-3256(+),AAAGAATCAGCAATGGAAAAATAACCGGTTCTTAATTCAGcataac...
1,AMWY02004761.1:1853-2277(+),actaataatgCATGGCCATATATATCAAATCTACCATATgccattt...


In [114]:
counter = 0
for index in df_blastn.index:
    hit = df_blastn['hit'][index]
    row = info[info['Unnamed: 0']== index].reset_index()
    tag = row['tag'][0][1:]
    hs = row['hit_start'][0]
    he = row['hit_end'][0]
    seq = ext[ext['tag']==tag]['data'].iloc[0]
    if(seq[hs:he].upper() != hit.upper()):
        print(tag, df_blastn['slen'][index])
        print(seq[hs:he])
        print(hit)
        print('\n\n')
        counter += 1                

# RNA 2d prediction

## Mfold

In [11]:
'''
# installation
!wget http://www.unafold.org/download/mfold-3.6.tar.gz
!tar -xvf ./mfold-3.6.tar.gz; rm ./mfold-3.6.tar.gz
%cd ./mfold-3.6
!./configure
!make
!make install
%cd ..
!sudo apt install texlive-font-utils
''';

In [12]:
#todo : add all hyperparameter(options) to GUI

In [19]:
counter = 0
base = "./secondary_structure/mfold/"
!rm -r {base}
!mkdir -p {base}
df = fasta_to_df('./extended.txt')

for index, row in df.iterrows():    
    tag = reformat(row['tag'])
    if(not os.path.exists(base + tag)):
        os.makedirs(base + tag)            
    with open(base + f"{tag}/SEQ.FASTA",'w') as file:
        file.write(f">{row['tag']}\n{row['data']}")
    counter += 1    
    #if(counter >= 1):
    #    break

In [20]:
def run_mfold(tag):
    tag = reformat(tag)
    %cd {base + tag}
    !mfold  SEQ="SEQ.FASTA" T=20 
    #!find . -type f ! -name "*.ct" ! -name "*.pdf" -exec rm {} \;
    %cd ../../..

if __name__ == '__main__':        
    pool = mp.Pool(mp.cpu_count())  
    pool.map(run_mfold, df['tag'])

/home/jupyter/plant_microRNA_prediction/secondary_structure/mfold/AMWY020598281_2832-3256_+_/home/jupyter/plant_microRNA_prediction/secondary_structure/mfold/AMWY020501341_2471-2892_+_/home/jupyter/plant_microRNA_prediction/secondary_structure/mfold/AMWY020898121_0-419_-_/home/jupyter/plant_microRNA_prediction/secondary_structure/mfold/AMWY021038501_42-463_+_/home/jupyter/plant_microRNA_prediction/secondary_structure/mfold/AMWY020679141_985-1406_-_/home/jupyter/plant_microRNA_prediction/secondary_structure/mfold/AMWY020252691_2832-3253_-_/home/jupyter/plant_microRNA_prediction/secondary_structure/mfold/AMWY020390581_1030-1451_-_/home/jupyter/plant_microRNA_prediction/secondary_structure/mfold/AMWY020538411_3049-3470_+_/home/jupyter/plant_microRNA_prediction/secondary_structure/mfold/AMWY020925661_83-504_+_/home/jupyter/plant_microRNA_prediction/secondary_structure/mfold/AMWY020713941_544-966_+_/home/jupyter/plant_microRNA_prediction/secondary_structure/mfold/AMWY020024871_2518-2939_+_/

6	6	Minimum folding energy is -138.50 kcal/mol.
Energy increment is 6.92 kcal/mol.
2	170,80,90,100,15	15	/home/jupyter/plant_microRNA_prediction
180,/home/jupyter/plant_microRNA_prediction/secondary_structure/mfold/AMWY020998221_1548-1969_-_110,H-num file created from plot file.
1,
7	14	
Structure plots generated.
All done.
190,8	120,130,16	4	2,3,4,5,6,7,mfold version 3.6
5	18	140,200,REUSE= NO

3	5	/home/jupyter/plant_microRNA_prediction8,9,10,150,160,16	/home/jupyter/plant_microRNA_prediction/secondary_structure/mfold/AMWY020333961_16-439_-_
8	210, 
17	16	SEQ.pnt created.
9	170,180,7	mfold version 3.6
220,Sequence length is 421
3	190,REUSE= NO
16	7	230,240,16	RNA free energy files (version 2.3) at 20 degrees created.
200,250,
Structure plots generated.
All done.
10,20,30,40,50,8	15	210,SEQ.pnt created.
Sequence length is 423
17	260,60,70,80,90,Suboptimal foldings created.
Energy dot plot created.
220,19	5	/home/jupyter/plant_microRNA_prediction/home/jupyter/plant_microRNA_prediction/

270,210,Suboptimal foldings created.
H-num file created from plot file.
4,5,6,7,120,130,240,7	220,280,290,1,2,3,4,140,8,9,10,Energy dot plot created.
10	3	RNA free energy files (version 2.3) at 20 degrees created.
10,6	6	230,250,5,6,7,8,300,310,4	150,160,20,30,40,11,12, 
9,10,11,12,240,250,320,260,270,170,11	22	50,60,260,280,13,14,330,340,350,9	180,8	1	9	270,280,290,360,370,380,5	70,80,90,190,2	15,16,17,18,19,5	4	290, 
100,200,300,390,400,410,420, 
End of Fill
1	210,110,120,310,320,330,300,310,11	Save file created using nafold.
Minimum folding energy is -111.00 kcal/mol.
Energy increment is 5.55 kcal/mol.
8	130,Suboptimal foldings created.
Energy dot plot created.
10	220,320,330,5	4	340,350,7	7	140,230,
Structure plots generated.
All done.
340,H-num file created from plot file.
23	360,370,380,240,250,150,9	350,360,2	1,2,3,12	/home/jupyter/plant_microRNA_prediction
390,400,410,420,160,260,370,380,390,400,410,420,/home/jupyter/plant_microRNA_prediction/secondary_structure/mfold/AMWY02098

5	10	/home/jupyter/plant_microRNA_predictionMinimum folding energy is -117.50 kcal/mol.
Energy increment is 5.88 kcal/mol.
100,110,60,70,5,6,7,8,
340,350,/home/jupyter/plant_microRNA_prediction/secondary_structure/mfold/AMWY020037851_11206-11628_+_9	
170,180,1	80,90,100,9,10,11,120,130,360,370,Suboptimal foldings created.
Energy dot plot created.
5	Suboptimal foldings created.
Energy dot plot created.
H-num file created from plot file.
mfold version 3.6
13	H-num file created from plot file.
110,140,150,9	10	380,390,400,190,200,12,13,14,REUSE= NO
17	120,130,6	3	1,2,3,4,5,1	12	160,1,2,3,4,410,420, 
End of Fill
Save file created using nafold.
15,16, 
210,220,230,7	140,150,6,7,SEQ.pnt created.
Sequence length is 422
5,6,7,8,170,180,Minimum folding energy is -140.00 kcal/mol.
Energy increment is 7.00 kcal/mol.
240,250,260,270,280, 
160,8,9,10,11, 
3	H-num file created from plot file.
30	RNA free energy files (version 2.3) at 20 degrees created.

Structure plots generated.
All done.
1	End of

5	380,390,400,410,Suboptimal foldings created.
Energy dot plot created.
/home/jupyter/plant_microRNA_prediction
130,140,/home/jupyter/plant_microRNA_prediction/secondary_structure/mfold/AMWY020987061_0-407_-_
180,190,2	mfold version 3.6
RNA free energy files (version 2.3) at 20 degrees created.
12	
Structure plots generated.
420, 
End of Fill
Save file created using nafold.
All done.
200,1	150,5	4	mfold version 3.6
Minimum folding energy is -175.00 kcal/mol.
Energy increment is 8.75 kcal/mol.
210,220,9	10,20,30,40,REUSE= NO
160,170,REUSE= NO
/home/jupyter/plant_microRNA_prediction
/home/jupyter/plant_microRNA_prediction/secondary_structure/mfold/AMWY020834341_1627-2049_-_
230,240,250,8	50,60,70,7	SEQ.pnt created.
Sequence length is 421
9	180,190,H-num file created from plot file.
12	260,270,280,mfold version 3.6
80,90,100,RNA free energy files (version 2.3) at 20 degrees created.
SEQ.pnt created.
Sequence length is 407
200,12	12	REUSE= NO
290,300,310, 
End of Fill
Save file created usi

40,50,60,RNA free energy files (version 2.3) at 20 degrees created.
10,180,340,350,390,400,410,420, 
End of Fill
Save file created using nafold.
Minimum folding energy is -145.90 kcal/mol.
Energy increment is 7.29 kcal/mol.
370,380,390,400,410,Suboptimal foldings created.
Energy dot plot created.
1	230,110,120,20,30,40,50,5	70,80,90,190,360,370,380,420, 
End of Fill
Save file created using nafold.
Minimum folding energy is -165.90 kcal/mol.
Energy increment is 8.29 kcal/mol.
8	19	3	130,140,150,100,110,240,60,70,H-num file created from plot file.
4	H-num file created from plot file.
11	390,400,410,420, 
End of Fill
160,200,120,80,90,H-num file created from plot file.
250,1	1,2,3,4,5	8	1,2,3,4,170,180,8	Save file created using nafold.
Minimum folding energy is -120.20 kcal/mol.
Energy increment is 6.01 kcal/mol.
4	210,220,130,140,150,260,270,280,1,2,3,4,190,100,110,5,6,7,8,9,10,11,160,5,6,7,8,9,10,11,5,6,7,H-num file created from plot file.
1,230,3	120,130,200,11	290,12,13,5	12,13,14,15,

120,130,210,220,6	230, 
190,250,280,14	Sequence length is 421
RNA free energy files (version 2.3) at 20 degrees created.
5	230,240,H-num file created from plot file.
200,140,150,10,20,30,/home/jupyter/plant_microRNA_prediction290,260,270,
240,250,/home/jupyter/plant_microRNA_prediction/secondary_structure/mfold/AMWY020399811_1215-1637_-_210,
1,2,3,4,5,160,40,50,60,280,300,250,260,270,220,Suboptimal foldings created.
Energy dot plot created.
10	
Structure plots generated.
All done.
13	170,180,mfold version 3.6
6,7,8,9,10,11,12,3	6	290,300,260,310,320,330,280,70,80,90,100,190,13,14, 

Structure plots generated.
All done.
230,REUSE= NO
5	310,270,280,/home/jupyter/plant_microRNA_prediction290,110,120,340,350,360,Suboptimal foldings created.
Energy dot plot created.

240,250,/home/jupyter/plant_microRNA_prediction/secondary_structure/mfold/AMWY020332491_0-289_-_
4	9	/home/jupyter/plant_microRNA_prediction300,310,130,1	290,3	10	
SEQ.pnt created.
Sequence length is 422
200,320,330,340,/home/j

/home/jupyter/plant_microRNA_prediction/secondary_structure/mfold/AMWY020080191_2185-2606_+_
1,2,3,4,140,
Structure plots generated.
All done.
350,360,370,/home/jupyter/plant_microRNA_prediction
390,400,410,420, 
End of Fill
Save file created using nafold.
/home/jupyter/plant_microRNA_prediction/secondary_structure/mfold/AMWY020148281_6365-6787_-_2	270,180,190,
200,5,6,7,22	mfold version 3.6
150,160,
Structure plots generated.
All done.
380,390,400,410,6	Minimum folding energy is -162.10 kcal/mol.
Energy increment is 8.11 kcal/mol.
/home/jupyter/plant_microRNA_prediction
8	200,210,280,8,9, 
/home/jupyter/plant_microRNA_prediction/secondary_structure/mfold/AMWY021218601_0-393_+_
/home/jupyter/plant_microRNA_prediction
mfold version 3.6
/home/jupyter/plant_microRNA_prediction/secondary_structure/mfold/AMWY020081831_1162-1583_+_REUSE= NO
420, 
End of Fill
Save file created using nafold.
Minimum folding energy is -178.80 kcal/mol.

170,
Structure plots generated.
All done.
18	220,230,290,2

1	110,120,360,370,230,9	Suboptimal foldings created.
Energy dot plot created.
10	320,2	H-num file created from plot file.
13	130,380,390,400,410,240,250,260,140,150,330,340,350,2	
Structure plots generated.
All done.
1,2,3,420, 
End of Fill
Save file created using nafold.
270,280,290,300, 
End of Fill
7	Suboptimal foldings created.
160,360,370,380,11	Minimum folding energy is -156.20 kcal/mol.
Energy increment is 7.81 kcal/mol.
3	4,5, 
14	Save file created using nafold.
Minimum folding energy is -54.80 kcal/mol.
Energy increment is 2.74 kcal/mol.
Energy dot plot created.
10	/home/jupyter/plant_microRNA_prediction390,400,410,420, 
End of Fill
Save file created using nafold.

5	170,/home/jupyter/plant_microRNA_prediction/secondary_structure/mfold/AMWY020148281_6366-6787_-_1	5	
H-num file created from plot file.

Structure plots generated.
All done.
180,H-num file created from plot file.
Minimum folding energy is -144.60 kcal/mol.
Energy increment is 7.23 kcal/mol.
15	1,2,3,4,mfold versio

1	380,390,400,410,420, 
End of Fill

Structure plots generated.
All done.
250,mfold version 3.6
H-num file created from plot file.
190,REUSE= NO
380,390,400,410,3	130,220,H-num file created from plot file.
7	Save file created using nafold.
Minimum folding energy is -130.50 kcal/mol.
Energy increment is 6.53 kcal/mol.
REUSE= NO
200,1,2,3,8	420, 
End of Fill
Save file created using nafold.
Minimum folding energy is -150.20 kcal/mol.
Energy increment is 7.51 kcal/mol.
140,150,230,260,1,2,3,4,5,/home/jupyter/plant_microRNA_prediction
210,4,5,6,/home/jupyter/plant_microRNA_prediction/secondary_structure/mfold/AMWY020643721_833-1254_-_SEQ.pnt created.
Sequence length is 421
SEQ.pnt created.
240,8	
270,280,160,6,7,8,9,7,8,H-num file created from plot file.
1,220,19	Sequence length is 421
250,H-num file created from plot file.
10,11,12,RNA free energy files (version 2.3) at 20 degrees created.
1,2,3,6	9,10, 
9	260,270,mfold version 3.6
2,3,4,5,170,180,230,290,300,4	4,5,6,7,10,20,30,40,50,RNA f

14	3	3	340,350,8	

Structure plots generated.
All done.
240,330,340,350,11	17	360,370,380,mfold version 3.6
3	H-num file created from plot file.
250,260,H-num file created from plot file.
360,370,380,390,250,REUSE= NO
390,400,410,420, 
End of Fill
Save file created using nafold.
6	1,2,3,/home/jupyter/plant_microRNA_prediction270,400,410,420, 
End of Fill

SEQ.pnt created.
260,270,/home/jupyter/plant_microRNA_prediction/secondary_structure/mfold/AMWY020662941_3811-4232_-_1,2,3,4,12	
Minimum folding energy is -168.70 kcal/mol.
Energy increment is 8.44 kcal/mol.
Suboptimal foldings created.
Energy dot plot created.
4,5,6,2	280,290,Sequence length is 422
14	Save file created using nafold.
Minimum folding energy is -120.10 kcal/mol.
Energy increment is 6.00 kcal/mol.
280,10	7,8, 
mfold version 3.6
5,6,7,RNA free energy files (version 2.3) at 20 degrees created.
7	300,310,290,H-num file created from plot file.
6	2	8,9,10,11,15	H-num file created from plot file.
320,11	10,20,30,40,REUSE= NO
4

7	/home/jupyter/plant_microRNA_prediction7	Minimum folding energy is -131.00 kcal/mol.
Energy increment is 6.55 kcal/mol.
310,10,20,60,70,350,
/home/jupyter/plant_microRNA_prediction/secondary_structure/mfold/AMWY020136111_3258-3679_-_
4	320,330,11	30,40,50,Suboptimal foldings created.
6	360,370,380,390,400,H-num file created from plot file.

Structure plots generated.
All done.
340,350,11	80,90,Energy dot plot created.
10	mfold version 3.6
12	60,70,80,90,410,420, 
End of Fill
Save file created using nafold.
Minimum folding energy is -155.20 kcal/mol.
1,2,3,360,370,380,100,110,120,4	2	/home/jupyter/plant_microRNA_predictionREUSE= NO
4,5,6,7,8,
100,110,3	/home/jupyter/plant_microRNA_prediction/secondary_structure/mfold/AMWY020713941_325-746_-_5	
Energy increment is 7.76 kcal/mol.
130,140,5	390,400,410,420, 
End of Fill
Save file created using nafold.
Suboptimal foldings created.
Energy dot plot created.
6	SEQ.pnt created.
Sequence length is 421
H-num file created from plot file.
120,130

210,290,300,mfold version 3.6
1,2,3,4,150,160,Minimum folding energy is -130.30 kcal/mol.
Energy increment is 6.51 kcal/mol.
11,12,310,320,12	170,180,H-num file created from plot file.
5,6,7,8,220,230,5	5,6,7,8,9,REUSE= NO
7	13, 
H-num file created from plot file.
330,340,2	190,7	10,11,12,13,14,15,1,2,SEQ.pnt created.
Sequence length is 422
240,250,9, 
1,2,3,4,5,350,360,370,6	6	1	200, 
6,7,8,RNA free energy files (version 2.3) at 20 degrees created.
380,390,6	3,4,5,6,7,8,210,260,270,10,20,30,40,50,60,11	9, 
9	400,410,420, 
End of Fill
Save file created using nafold.
Minimum folding energy is -138.20 kcal/mol.
Energy increment is 6.91 kcal/mol.
2	14	Suboptimal foldings created.
Energy dot plot created.
1	Suboptimal foldings created.
9,10,11,12,13,14,15,220,9	280,70,80,90,100,230,240,Energy dot plot created.
10	16,17,18,19,13	6	H-num file created from plot file.
110,120,130,290,300,310,250,6	8	20,21,22,140,81,2,3,4,Suboptimal foldings created.
Energy dot plot created.
1	260,270,23,24,25,

8	210,10,20,30,40,50,8	
Minimum folding energy is -80.50 kcal/mol.
Energy increment is 4.03 kcal/mol.
300,310,400,410,420, 
End of Fill
16	H-num file created from plot file.
6	60,70,80,220,11	21	5	1,2,3,4,320,330,340,Save file created using nafold.
Minimum folding energy is -164.50 kcal/mol.
Energy increment is 8.22 kcal/mol.
23	230,240,11	H-num file created from plot file.
90,5,6,7,8,9,10,350,360,370,8	18	1,2,3,4,5,6,250,100,110,120,11,12,13,14, 
4	7,380,390,400,H-num file created from plot file.
9	1	3	8	410,420, 
End of Fill
Save file created using nafold.
130,260,Suboptimal foldings created.
Energy dot plot created.
10	8, 
1,2,3,4,5,12	Minimum folding energy is -121.70 kcal/mol.
Energy increment is 6.08 kcal/mol.
270,280,140,150,160,
Structure plots generated.
All done.
6,7,8,1	7	9	12	170, 
290,2	Suboptimal foldings created.
Energy dot plot created.
1	H-num file created from plot file.
/home/jupyter/plant_microRNA_prediction
180,300,310,/home/jupyter/plant_microRNA_prediction/second

360,370,380,390,400,
/home/jupyter/plant_microRNA_prediction/secondary_structure/mfold/AMWY020125221_5544-5965_+_
/home/jupyter/plant_microRNA_prediction
/home/jupyter/plant_microRNA_prediction/secondary_structure/mfold/AMWY020301671_571-992_-_350,360,370,SEQ.pnt created.
Sequence length is 345
5	SEQ.pnt created.
Sequence length is 422

410,420, 
End of Fill
Save file created using nafold.
9	/home/jupyter/plant_microRNA_prediction
/home/jupyter/plant_microRNA_prediction/secondary_structure/mfold/AMWY021218601_0-394_+_mfold version 3.6

/home/jupyter/plant_microRNA_prediction5	Minimum folding energy is -151.60 kcal/mol.
Energy increment is 7.58 kcal/mol.
2	RNA free energy files (version 2.3) at 20 degrees created.

RNA free energy files (version 2.3) at 20 degrees created.
380,390,400,410,420, 
End of Fill
/home/jupyter/plant_microRNA_prediction/secondary_structure/mfold/AMWY020284261_331-752_+_mfold version 3.6
REUSE= NO
9	
9	10,20,30,40,50,10,20,30,40,50,Save file created using nafold

140,1,2,3,1,10	11	4	Suboptimal foldings created.
Energy dot plot created.
190,210,1,2,3,4,160,170,150,160,8	4,5,6,7,120,130,140,190,200,10	2,3,4,5,6,7,15	200,210,5,6,7,
Structure plots generated.
All done.
220,180,190,200,3	150,160,170,220,230,210,170,180,8,9,10,11,8,9,10,11,12,13,8,9,10,11,230,240,
Structure plots generated.
210,240,7	180,190,All done.
220,/home/jupyter/plant_microRNA_prediction12,13,14,15,190,200,14,15,16,

Structure plots generated.
All done.
/home/jupyter/plant_microRNA_prediction/secondary_structure/mfold/AMWY020781141_1194-1616_-_
Structure plots generated.
All done.
250,11	8	
12,13,14,15,250,200,210,230,210,220,16, 
220,17, 
 
260,9	/home/jupyter/plant_microRNA_prediction
mfold version 3.6
260,270,220,/home/jupyter/plant_microRNA_prediction/home/jupyter/plant_microRNA_prediction/secondary_structure/mfold/AMWY020047611_1788-2209_+_240,230,240,250,11	270,/home/jupyter/plant_microRNA_prediction230,


5	/home/jupyter/plant_microRNA_prediction/secondary_structure/mfo

/home/jupyter/plant_microRNA_prediction/secondary_structure/mfold/AMWY021034711_2890-3311_+_mfold version 3.6
18	SEQ.pnt created.
230,420, 
End of Fill
Save file created using nafold.
Minimum folding energy is -135.10 kcal/mol.
Energy increment is 6.75 kcal/mol.

80,90,1,2,3,Suboptimal foldings created.
4	240,250,Sequence length is 421
RNA free energy files (version 2.3) at 20 degrees created.
2	
Structure plots generated.
All done.
100,110,120,4,5,6,REUSE= NO
Energy dot plot created.
1	mfold version 3.6
1	Suboptimal foldings created.
Energy dot plot created.
10	10,20,130,
Structure plots generated.
All done.
260,7,8,9,10,REUSE= NO
30,40,50,60,/home/jupyter/plant_microRNA_predictionH-num file created from plot file.
1,2,140,150,2	270,

Structure plots generated.
All done.
/home/jupyter/plant_microRNA_prediction/secondary_structure/mfold/AMWY020621741_139-560_+_SEQ.pnt created.
Sequence length is 421
11,12,13,14, 

/home/jupyter/plant_microRNA_prediction160,SEQ.pnt created.

70,80,90,3,

3	290,300,130,11, 
12	12	8,9,10,11,12,5	10,20,30,40,50,60,70,4	/home/jupyter/plant_microRNA_prediction310,320,
/home/jupyter/plant_microRNA_prediction/secondary_structure/mfold/AMWY020125221_5489-5910_-_13,
140,150,9	330,340,7	80,90,100,mfold version 3.6
9	160,170,350,360,110,120,14, 

Structure plots generated.
All done.
19	12	REUSE= NO
180,370,380,390,11	15	130,1	1	12	4	190,400,410,420, 
End of Fill
8	SEQ.pnt created.
Sequence length is 421
/home/jupyter/plant_microRNA_prediction
Suboptimal foldings created.
Energy dot plot created.
10	/home/jupyter/plant_microRNA_prediction/secondary_structure/mfold/AMWY020024871_2516-2939_+_140,150,200,Save file created using nafold.

160,170,6	4	13	13	RNA free energy files (version 2.3) at 20 degrees created.
Minimum folding energy is -194.40 kcal/mol.
Energy increment is 9.72 kcal/mol.
210,180,10,20,30,40,50,H-num file created from plot file.
Suboptimal foldings created.
Energy dot plot created.
mfold version 3.6
5	
Structure plots generated.
All

170,180,1,2,20,30,40,50,60,70,7,8,9,10,11,280,13, 
80,90,190,8,9,10,1,2,3,4,5,12,13,14,15,16,240,80,90,3,4,5,Suboptimal foldings created.
Energy dot plot created.
10	20	200,100,290,300,17,9	6,7,8,11, 
 
100,110,3	2	310,250,260,6	9	110,120,130,210,
Structure plots generated.
All done.
120,130,9,10,11,12,13,15	270,280, 
11	140,150,Suboptimal foldings created.
Energy dot plot created.
320,330,14	140,150,220,230, 
17	/home/jupyter/plant_microRNA_predictionSuboptimal foldings created.
Energy dot plot created.

1	160,290,/home/jupyter/plant_microRNA_prediction/secondary_structure/mfold/AMWY020268181_607-1028_+_340,350,240,250,160,18	6	
Suboptimal foldings created.
Energy dot plot created.
170,180,300,310,170,260,10	360,370,380,390,180,10	mfold version 3.6
320,
Structure plots generated.
All done.
190,200,21	400,410,420, 
End of Fill
270,280,1	4	REUSE= NO
190,210,Save file created using nafold.
Minimum folding energy is -153.60 kcal/mol.
330,340,350,
Structure plots generated.
All done.
290,3

10,/home/jupyter/plant_microRNA_prediction/secondary_structure/mfold/AMWY020950091_1185-1607_+_
SEQ.pnt created.
Sequence length is 398
7	3	210,1,2,3,4,5,22, 
300,20,30,40,50,9	/home/jupyter/plant_microRNA_predictionREUSE= NO
2	
3	220,6,7,8,9,/home/jupyter/plant_microRNA_prediction/secondary_structure/mfold/AMWY020028821_33638-34059_-_mfold version 3.6
RNA free energy files (version 2.3) at 20 degrees created.
10,15	60,70,80,310,320,230,
REUSE= NO
10,11,SEQ.pnt created.
6	20,30,40,50,90,100,110,
Structure plots generated.
All done.

Structure plots generated.
All done.
330,340,350,360,370,7	240,250,
Structure plots generated.
All done.
8	12,13,14,15, 
Sequence length is 420
RNA free energy files (version 2.3) at 20 degrees created.
60,70,80,120,
Structure plots generated.
All done.
380,390,400,410, 
End of Fill
mfold version 3.6
SEQ.pnt created.
Sequence length is 422
90,100,10,20,30,40,260,130,140,/home/jupyter/plant_microRNA_prediction
4	/home/jupyter/plant_microRNA_prediction/second

220,REUSE= NO
150,9	Minimum folding energy is -146.70 kcal/mol.
Energy increment is 7.33 kcal/mol.
10,20,30,40,50,240,2,3,4,5,6,7,8,9,200,1,2,3,4,5,Suboptimal foldings created.
Energy dot plot created.
10	160,230,H-num file created from plot file.
60,70,80,12	250,5,6,7,8,9,SEQ.pnt created.
Sequence length is 420
10,11,12,210,6,7,8,9,170,180,Suboptimal foldings created.
Energy dot plot created.
10	240,1,2,3,4,90,10,11,12,H-num file created from plot file.
10,11,260,270,Suboptimal foldings created.
Energy dot plot created.
13, 
220,RNA free energy files (version 2.3) at 20 degrees created.
250,9	190,100,110,20	13,14,230,1,2,3,4,5,6,12,5,6,7,8,9,10,9	280,290,300,10,20,30,40,260,15	120,130,140,10	15,7,8, 
11,12, 
310,240,Suboptimal foldings created.
200,15	Suboptimal foldings created.
Energy dot plot created.

Structure plots generated.
50,60,70,270,280,150,9,10,11,All done.
320,330,250,Energy dot plot created.
10	16, 
2	80,90,210,220,230,10	 
160,290,340,260,Suboptimal foldings created.
E

280,290,/home/jupyter/plant_microRNA_prediction160,170,Suboptimal foldings created.
Energy dot plot created.
10	
320,330,Suboptimal foldings created.
Energy dot plot created.
1	Suboptimal foldings created.
Energy dot plot created.
/home/jupyter/plant_microRNA_prediction/secondary_structure/mfold/AMWY020740331_1781-2201_+_160,
240,250,410, 
End of Fill
Save file created using nafold.
Minimum folding energy is -154.50 kcal/mol.
Energy increment is 7.72 kcal/mol.
10	300,310,210,380,390,400,410, 
End of Fill
Save file created using nafold.
180,13	170,180,340,350,360,Suboptimal foldings created.
260,190,220,230,3	320,Minimum folding energy is -159.30 kcal/mol.
Energy increment is 7.96 kcal/mol.
mfold version 3.6
370,380,390,8	H-num file created from plot file.
190,200,270,280,Energy dot plot created.
10	H-num file created from plot file.
2	200,400,410, 
End of Fill
Save file created using nafold.
Suboptimal foldings created.
Energy dot plot created.
10	9	330,340,350,240,250,REUSE= NO
210,22

9	SEQ.pnt created.
mfold version 3.6
140,300,310,10,20,100,110,120,Sequence length is 421
150,REUSE= NO
3	12	30,40,50,130,H-num file created from plot file.
320,330,RNA free energy files (version 2.3) at 20 degrees created.
160,140,150,6	60,70,14	SEQ.pnt created.
Sequence length is 420
9	9	340,350,360,1,2,3,7	10,20,30,40,170,160,17	80,90,100,370,380,7	4,5,6,50,60,180,RNA free energy files (version 2.3) at 20 degrees created.
390,400,410, 
End of Fill
170,
Structure plots generated.
All done.
110,120,12	7,8,70,80,90,190,Save file created using nafold.
Minimum folding energy is -150.30 kcal/mol.
Energy increment is 7.51 kcal/mol.
16	10,20,30,40,50,180,190,130,100,15	5	60,70, 
200,4	140,150,200,Suboptimal foldings created.
Energy dot plot created.
10	
Structure plots generated.
All done.
/home/jupyter/plant_microRNA_prediction
110,120,/home/jupyter/plant_microRNA_prediction/secondary_structure/mfold/AMWY020024871_2691-3112_+_
80,90,210,220,210,160,130,140,7	100,110,H-num file created from

5,6,7,8,3	SEQ.pnt created.
Sequence length is 420
110,5	10	290,300,7	370,380,390, 
120,RNA free energy files (version 2.3) at 20 degrees created.
400,410, 
End of Fill
310,4	10,20,8	15	130,140,H-num file created from plot file.
5	320,330,6	Save file created using nafold.
Minimum folding energy is -170.80 kcal/mol.
Energy increment is 8.54 kcal/mol.
6	30,40,50,18	340,350,360,150,160,1	1,2,12	H-num file created from plot file.
60,70,80,Suboptimal foldings created.
3	370,380,3,4,5,6,11	1,2,90,170,180,Energy dot plot created.
1	390,400,410, 
End of Fill
Save file created using nafold.
9	22	11	7,8,9,3,4,5,6,100,2	190,200,Minimum folding energy is -148.80 kcal/mol.
Energy increment is 7.44 kcal/mol.
4	110,120,10,11,12,7,8,8	210,220,130,6	
Structure plots generated.
All done.
5	 
9,10,16	H-num file created from plot file.
6	230, 
7	140,150,1,2,3,7	/home/jupyter/plant_microRNA_prediction240,250,
2	13	/home/jupyter/plant_microRNA_prediction/secondary_structure/mfold/AMWY020022121_7867-8287_+_16

H-num file created from plot file.
270,280,9,10,/home/jupyter/plant_microRNA_predictionSuboptimal foldings created.
Energy dot plot created.
10	150,
REUSE= NO
70,80,Suboptimal foldings created.
/home/jupyter/plant_microRNA_prediction/secondary_structure/mfold/AMWY020781141_1197-1618_-_Save file created using nafold.
Minimum folding energy is -119.90 kcal/mol.
Energy increment is 6.00 kcal/mol.
1,2,3,
11,12, 
160,170,290,300,4	2	90,100,110,Energy dot plot created.
10	20	mfold version 3.6
310,320,330,180,H-num file created from plot file.
4,5,SEQ.pnt created.
Sequence length is 420
Suboptimal foldings created.
Energy dot plot created.
2	120,9	30	7	8	1,2,3,4,1	190,340,350,360,REUSE= NO
6,7,8,130,RNA free energy files (version 2.3) at 20 degrees created.
5,6,200,SEQ.pnt created.
370,380,390,400,410,420, 
End of Fill
Save file created using nafold.
9	9, 
2	6	7,8,9,10,11,140,150,10,20,30,Suboptimal foldings created.
Minimum folding energy is -141.70 kcal/mol.
Energy increment is 7.08 kcal/mo

10,20,30,40,330,340,4	10	 
21	/home/jupyter/plant_microRNA_predictionSuboptimal foldings created.
Energy dot plot created.
250,270,280,50,60,70,5	H-num file created from plot file.

1	350,360,370,3	
Structure plots generated.
All done.
H-num file created from plot file.
/home/jupyter/plant_microRNA_prediction/secondary_structure/mfold/AMWY020823131_5752-6172_+_290,300,310,
260,270,1	8	1,2,3,4,80,90,100,380,390,400,410, 
End of Fill
Save file created using nafold.
12	1,2,3,4,5,6,11	320,330,280,5,6,7,8,11	Suboptimal foldings created.
Energy dot plot created.
110,120,/home/jupyter/plant_microRNA_prediction
/home/jupyter/plant_microRNA_prediction/secondary_structure/mfold/AMWY020645111_8294-8715_+_Minimum folding energy is -145.30 kcal/mol.
Energy increment is 7.26 kcal/mol.

7,8,9,1	340,350,9,10,11,mfold version 3.6
290,300,130,140,1	10,11,12,13,14,15,mfold version 3.6
REUSE= NO
360,370,6	H-num file created from plot file.
12,13,150,310,320,16,2	380,390,400,410, 
End of Fill
Save file cre

1	
220,230,360,370,380,390,400,1	1	270,280,/home/jupyter/plant_microRNA_prediction/secondary_structure/mfold/AMWY020228321_1667-2087_-_230,200,
mfold version 3.6
4	10	RNA free energy files (version 2.3) at 20 degrees created.
240,220,230,240,410, 
End of Fill
Save file created using nafold.
Minimum folding energy is -154.50 kcal/mol.
H-num file created from plot file.
16	290,300,10,20,30,40,50,60,210,220,17	250,mfold version 3.6
REUSE= NO
30	250,Energy increment is 7.72 kcal/mol.
310,1,2,3,4,240,70,80,230,260,
Structure plots generated.
6	SEQ.pnt created.
12	260,H-num file created from plot file.
320,330,All done.
REUSE= NO
250,260,5,6,7,8,20	90,100,110,270,270,240,1,2,3,Sequence length is 421
RNA free energy files (version 2.3) at 20 degrees created.
10,270,250,340,350,360,280,290,9,10,11, 
/home/jupyter/plant_microRNA_prediction120,130,140,150,280,
4,5, 
5	SEQ.pnt created.
Sequence length is 420
/home/jupyter/plant_microRNA_prediction/secondary_structure/mfold/AMWY020227101_82-502_-_

330,/home/jupyter/plant_microRNA_prediction/secondary_structure/mfold/AMWY020779351_5799-6219_-_SEQ.pnt created.
Sequence length is 420

150,390,400,410, 
End of Fill
160,RNA free energy files (version 2.3) at 20 degrees created.
110,11	9	210,310,320,H-num file created from plot file.
400,410,420, 
End of Fill
Energy increment is 7.71 kcal/mol.
5	340,160,10,20,30,40,50,220,120,130,350,360,170,180,1,2,3,4,Suboptimal foldings created.
Energy dot plot created.
10	8	mfold version 3.6
11	RNA free energy files (version 2.3) at 20 degrees created.
Save file created using nafold.
Minimum folding energy is -162.10 kcal/mol.
Energy increment is 8.11 kcal/mol.
Save file created using nafold.
Minimum folding energy is -151.20 kcal/mol.
Energy increment is 7.56 kcal/mol.
330,340,350,H-num file created from plot file.
60,70,80,90,100,5	190,200,10,20,30,40,50,170,H-num file created from plot file.
140,150,370,380,390,360,370,380,390,400,REUSE= NO
110,120,H-num file created from plot file.
5,6,7,8,60,

340,350,360,80,90,10,20,30,40,50,REUSE= NO
Minimum folding energy is -168.10 kcal/mol.
Energy increment is 8.40 kcal/mol.
250,200,50,60,70,120,130,280,290,10,20,30,100,110,120,370,380,390,11	250,260,210,220,60,70,80,H-num file created from plot file.
80,40,50,60,140,300,310,SEQ.pnt created.
130,400,410,420, 
End of Fill
Save file created using nafold.
260,270,8	90,100,16	150,Sequence length is 420
230,140,150,90,100,1,2,3,4,320,330,4	7	8	110,120,70,80,280,270,280,5	Minimum folding energy is -148.60 kcal/mol.
Energy increment is 7.43 kcal/mol.
160,170,160,170,RNA free energy files (version 2.3) at 20 degrees created.
240,9	130,90,100,110,120,110,120,130,340,350,360,5,6,7,8,9,10,H-num file created from plot file.
290,300,310,180,290,300,6	180,10,20,30,40,140,150,250,260,130,140,140,11,12,13,19	370,380,390,400,410,420,320,330,310,320,3	190,50,60,70,80,190,1,2,3,160,270,150,4	150,330,340, 
End of Fill
Save file created using nafold.
Minimum folding energy is -161.60 kcal/mol.
Energy increm

21	1	7	H-num file created from plot file.
240,Sequence length is 420
1	1,2,3,4,150,160,18	mfold version 3.6
9	1,2,6	RNA free energy files (version 2.3) at 20 degrees created.
10,20,250,4	5,6, 
REUSE= NO
170,180,
Structure plots generated.
All done.
Suboptimal foldings created.
Energy dot plot created.
260,9	3,4,5,6,16	30,40,50,60,70,80,10	190,270,SEQ.pnt created.
Sequence length is 420
7,8,/home/jupyter/plant_microRNA_prediction1	
Structure plots generated.
All done.

200,280,90,100,110,
Structure plots generated.
All done.
7	RNA free energy files (version 2.3) at 20 degrees created.
Suboptimal foldings created.
/home/jupyter/plant_microRNA_prediction/secondary_structure/mfold/AMWY020835681_5790-6209_-_6	 

24	210,220,120,130,290,10,20,30,40,Energy dot plot created.
1	/home/jupyter/plant_microRNA_prediction
22	230,/home/jupyter/plant_microRNA_prediction/secondary_structure/mfold/AMWY020632401_624-1044_-_50,60,70,140,150,mfold version 3.6
/home/jupyter/plant_microRNA_prediction
2	300,31

70,80,90,290,340,350,H-num file created from plot file.
1,9	140,150,4	Energy increment is 5.41 kcal/mol.
360,370,380,15	100,110,120,300,310,320,31	2,3,4,5,6,5	Suboptimal foldings created.
Energy dot plot created.
10	390,400,410, 
End of Fill
8	H-num file created from plot file.
130,140,11	
Structure plots generated.
All done.
7,330,340,350,
Structure plots generated.
All done.
160,12	
Structure plots generated.
All done.
1,2,150,Save file created using nafold.
Minimum folding energy is -176.30 kcal/mol.
Energy increment is 8.81 kcal/mol.
360,370,380,390, 
170,180,26	3,4,5,6,/home/jupyter/plant_microRNA_prediction
/home/jupyter/plant_microRNA_prediction/home/jupyter/plant_microRNA_prediction/secondary_structure/mfold/AMWY021039961_38-458_-_160,170,400,410, 
End of Fill
Save file created using nafold.


/home/jupyter/plant_microRNA_prediction4	/home/jupyter/plant_microRNA_prediction/secondary_structure/mfold/AMWY020755761_1062-1483_+_
190,
H-num file created from plot file.
7,8,9,10,11,6

250,260,/home/jupyter/plant_microRNA_prediction/secondary_structure/mfold/AMWY020823131_5753-6173_+_180,230,210,220,80,90,REUSE= NO

320,
Structure plots generated.
240,240,3	260,190,230,
Structure plots generated.
All done.

All done.
270,280,290,8	100,110,120,330,340,350,360,SEQ.pnt created.
mfold version 3.6
250,1	200,270,280,240,300,310,320,330,340, 
End of Fill
Save file created using nafold.
370,380,390,130,140,Sequence length is 418
mfold version 3.6
260,11	210,250,REUSE= NO
3	290,
Structure plots generated.
All done.
250,
/home/jupyter/plant_microRNA_predictionMinimum folding energy is -111.10 kcal/mol.
Energy increment is 5.55 kcal/mol.
/home/jupyter/plant_microRNA_prediction/secondary_structure/mfold/AMWY020024871_2689-3109_-_
/home/jupyter/plant_microRNA_prediction150,REUSE= NO

RNA free energy files (version 2.3) at 20 degrees created.
10,400,410, 
End of Fill
Save file created using nafold.
270,280,/home/jupyter/plant_microRNA_prediction/secondary_structure/mfold/AMWY02114

9	Save file created using nafold.
Minimum folding energy is -119.00 kcal/mol.
Energy increment is 5.95 kcal/mol.
REUSE= NO
11	11,12, 
4	10,20,6	SEQ.pnt created.
6	22	3	
Structure plots generated.
All done.
30,40,50,60,H-num file created from plot file.
12	7	Sequence length is 420
2	15	70,80,90,/home/jupyter/plant_microRNA_prediction1,2,3,4,13	RNA free energy files (version 2.3) at 20 degrees created.
10,20,5	
100,110,5	5,6, 
/home/jupyter/plant_microRNA_prediction/secondary_structure/mfold/AMWY020539211_1499-1919_+_
30,40,50,60,120,1	11	12	12	Suboptimal foldings created.
16	130,140,70,80,mfold version 3.6
Energy dot plot created.
10	
Structure plots generated.
All done.
REUSE= NO
12	7	5	90,100,110,150,Suboptimal foldings created.
Energy dot plot created.
1	4	23	SEQ.pnt created.
Sequence length is 420
/home/jupyter/plant_microRNA_prediction160,120,130,7	8	
16	13	170,/home/jupyter/plant_microRNA_prediction/secondary_structure/mfold/AMWY020097671_736-1160_-_
140,3	14	RNA free energy files

8	RNA free energy files (version 2.3) at 20 degrees created.
290,280,290,8,9,10,160,170,2,3,4,5,6,7	12	
Structure plots generated.
All done.
H-num file created from plot file.
180,8	300,310,300,310,10,20,30,40,50,11,7,8,9,10,11,12,13,4	1,2,3,2	190,11	320,330,340,3	14,15,16,17,5	320,330,60,70,80, 
/home/jupyter/plant_microRNA_prediction
/home/jupyter/plant_microRNA_prediction/secondary_structure/mfold/AMWY020068341_835-1255_-_200,
Structure plots generated.
All done.
4,5,6,7,8,
18,19,7	350,360,370,340,350,360,
Structure plots generated.
All done.
90,100,110,120,210,220,9,10,11,12,13,14, 
380,390,400,410, 
End of Fill
11	370,380,390,400,410, 
End of Fill
130,140,/home/jupyter/plant_microRNA_predictionmfold version 3.6

15,16,17, 
Save file created using nafold.
Minimum folding energy is -155.80 kcal/mol.
Energy increment is 7.79 kcal/mol.
/home/jupyter/plant_microRNA_prediction/home/jupyter/plant_microRNA_prediction/secondary_structure/mfold/AMWY020679181_1888-2306_+_Save file created us

260,/home/jupyter/plant_microRNA_prediction/secondary_structure/mfold/AMWY020781141_1197-1616_-_/home/jupyter/plant_microRNA_prediction/secondary_structure/mfold/AMWY020227111_453-873_-_Suboptimal foldings created.
Energy dot plot created.
10	170,

/home/jupyter/plant_microRNA_predictionSEQ.pnt created.
Sequence length is 420
4	1	
/home/jupyter/plant_microRNA_prediction/home/jupyter/plant_microRNA_prediction/secondary_structure/mfold/AMWY020632931_2313-2681_-_20	

RNA free energy files (version 2.3) at 20 degrees created.
10,REUSE= NO
/home/jupyter/plant_microRNA_prediction/secondary_structure/mfold/AMWY020028821_33639-34058_-_270,180,190,8	15	
5	mfold version 3.6
RNA free energy files (version 2.3) at 20 degrees created.
mfold version 3.6
2	200,SEQ.pnt created.
Sequence length is 420
280,20,30,40,50,60,210,REUSE= NO
REUSE= NO
mfold version 3.6
mfold version 3.6
RNA free energy files (version 2.3) at 20 degrees created.
10,20,30,40,50,8	70,80,90,290,
Structure plots generated.
All done

10	8	/home/jupyter/plant_microRNA_prediction310,
15,16,17,/home/jupyter/plant_microRNA_prediction/secondary_structure/mfold/AMWY020242731_4923-5343_-_13	10,20,30,
/home/jupyter/plant_microRNA_prediction320,330,
/home/jupyter/plant_microRNA_prediction/secondary_structure/mfold/AMWY021032231_150-412_-_
40,50,60,18,19,2	mfold version 3.6
340,350,7	15	1	Suboptimal foldings created.
Energy dot plot created.

Structure plots generated.
All done.
5	REUSE= NO
70,80,mfold version 3.6
5	360,370,380,20,21,22,23,24,
Structure plots generated.
8	1	All done.
6	390,400,410, 
End of Fill
Save file created using nafold.
7	REUSE= NO
90,100,110,SEQ.pnt created.
 
/home/jupyter/plant_microRNA_prediction
/home/jupyter/plant_microRNA_prediction/secondary_structure/mfold/AMWY020781141_1359-1778_-_
Sequence length is 420
RNA free energy files (version 2.3) at 20 degrees created.
17	Minimum folding energy is -150.80 kcal/mol.
Energy increment is 7.54 kcal/mol.
SEQ.pnt created.
Sequence length is 262
120,130,/h

1	/home/jupyter/plant_microRNA_prediction
21	50,60,70,80,8	3,4,5,/home/jupyter/plant_microRNA_prediction/secondary_structure/mfold/AMWY020314451_2886-3306_-_
Suboptimal foldings created.
5	9	20,30,40,50,260,mfold version 3.6
90,6,7,8,Energy dot plot created.
10	4	mfold version 3.6
60,70,80,270,280,9,10, 
100,110,90,100,6	REUSE= NO
3	290,REUSE= NO
120,130,110,300,SEQ.pnt created.
7	16	18	17	140,120,130,3	310,320,4	Sequence length is 419
SEQ.pnt created.
Sequence length is 420
140,
Structure plots generated.
330,340,150,160,All done.
Suboptimal foldings created.
Energy dot plot created.
RNA free energy files (version 2.3) at 20 degrees created.
10,20,
Structure plots generated.
22	All done.
3	9	6	10	150,160,350,360,170,2	30,40,50,60,RNA free energy files (version 2.3) at 20 degrees created.
/home/jupyter/plant_microRNA_prediction1	5	
370,380,390,400,410, 
/home/jupyter/plant_microRNA_prediction180,170,180,
/home/jupyter/plant_microRNA_prediction/secondary_structure/mfold/AMWY020242731_53

/home/jupyter/plant_microRNA_prediction/secondary_structure/mfold/AMWY021068931_846-1265_-_
220,230,Energy increment is 8.35 kcal/mol.
REUSE= NO
13	8	180,240,H-num file created from plot file.
Suboptimal foldings created.
Energy dot plot created.
10	Suboptimal foldings created.
3	mfold version 3.6

Structure plots generated.
All done.
SEQ.pnt created.
Sequence length is 420
2	19	190,200,7	14	250,1,2,3,Suboptimal foldings created.
Energy dot plot created.
10	REUSE= NO
14	25	Energy dot plot created.
10	4	6	210,4,5,6,7,260,270,RNA free energy files (version 2.3) at 20 degrees created.
/home/jupyter/plant_microRNA_prediction8,9,
220,230,/home/jupyter/plant_microRNA_prediction/secondary_structure/mfold/AMWY020729951_8589-9009_+_10,20,30,40,SEQ.pnt created.
Sequence length is 419
19	280,5	
RNA free energy files (version 2.3) at 20 degrees created.
10,11,50,60,10,20,30,290,300,240,Suboptimal foldings created.
Energy dot plot created.
10	 
70,80,mfold version 3.6
Suboptimal foldings created.
E

1	mfold version 3.6
4	390,400,410, 
End of Fill
Save file created using nafold.
6	/home/jupyter/plant_microRNA_prediction4	10,20,2	
/home/jupyter/plant_microRNA_prediction/secondary_structure/mfold/AMWY020016741_0-398_-_REUSE= NO
270,280,14	
22,23,24,25,30,40,50,Minimum folding energy is -171.70 kcal/mol.
Energy increment is 8.59 kcal/mol.
36	26,27,
Structure plots generated.
All done.
290,300,
Structure plots generated.
All done.
mfold version 3.6
60,70,80,2	2	SEQ.pnt created.
Sequence length is 419
28,29, 
310,H-num file created from plot file.
1,15	REUSE= NO
90,100,RNA free energy files (version 2.3) at 20 degrees created.
/home/jupyter/plant_microRNA_prediction
320,330,/home/jupyter/plant_microRNA_prediction2,3,4,5,
/home/jupyter/plant_microRNA_prediction/secondary_structure/mfold/AMWY020136111_3258-3678_-_/home/jupyter/plant_microRNA_prediction/secondary_structure/mfold/AMWY020911361_538-871_-_

10,20,110,120,340,8	6,7,SEQ.pnt created.
Sequence length is 398
350,360,370,mfold vers

16,17,18,19,20,Energy increment is 6.65 kcal/mol.
310,10,20,30,40,50,60,70,280,290,13	210,140,17	H-num file created from plot file.
5	320,330,REUSE= NO
300,310,SEQ.pnt created.
Sequence length is 424
RNA free energy files (version 2.3) at 20 degrees created.
80,90,100,9	21,22,23,24,/home/jupyter/plant_microRNA_prediction150,12	
220,/home/jupyter/plant_microRNA_prediction/secondary_structure/mfold/AMWY020390581_1030-1450_-_
340,350,SEQ.pnt created.
4	10,20,30,40,50,RNA free energy files (version 2.3) at 20 degrees created.
110,120,
Structure plots generated.
All done.
9	1,2,3,4,5,6,320,330,25,26,27, 
230,160,170,Sequence length is 420
10,20,30,40,130,360,370,380,60,70,mfold version 3.6
7,8,9,10,11,Suboptimal foldings created.
Energy dot plot created.
10	240,340,350,360,370,50,60,180,390,400,410, 
End of Fill
Save file created using nafold.
140,150,/home/jupyter/plant_microRNA_prediction

Structure plots generated.
All done.
/home/jupyter/plant_microRNA_prediction/secondary_structure/mfo

14,15,16,17,18,310,/home/jupyter/plant_microRNA_prediction
/home/jupyter/plant_microRNA_prediction/secondary_structure/mfold/AMWY020024871_2518-2937_+_150,160,9	SEQ.pnt created.
Sequence length is 423

280,290,1,2,3,4,5,19,20,21, 
2	40,50,60,70,80,320,330,/home/jupyter/plant_microRNA_prediction170,300,310,3	
RNA free energy files (version 2.3) at 20 degrees created.

2	/home/jupyter/plant_microRNA_prediction/secondary_structure/mfold/AMWY020515531_857-1275_+_6,7,8,9,10,11,12,mfold version 3.6
90,100,180,190,340,350,360,10,20,30,40,1	320,330,110,120,130,13,14, 
REUSE= NO

Structure plots generated.
All done.
9	4	370,380,390,400,410, 
End of Fill
50,60,200,mfold version 3.6
340,350,360,140,150,SEQ.pnt created.
Sequence length is 419
70,80,210,24	Save file created using nafold.
Minimum folding energy is -161.40 kcal/mol.
Energy increment is 8.07 kcal/mol.
/home/jupyter/plant_microRNA_prediction3	
21	160,RNA free energy files (version 2.3) at 20 degrees created.
REUSE= NO
/home/jupyter/pla

5,6,7,8,9,10,11,7	190,4,5,6,5,6,12	12, 
RNA free energy files (version 2.3) at 20 degrees created.
REUSE= NO
5	7,8,9,10,200,5	Suboptimal foldings created.
Energy dot plot created.
8	20	7,8, 
10,20,13	11,12,210,10	1	11	SEQ.pnt created.
Sequence length is 420
15	30,40,50,60,15	220, 
RNA free energy files (version 2.3) at 20 degrees created.
70,80,90,Suboptimal foldings created.
Energy dot plot created.
230,Suboptimal foldings created.
Energy dot plot created.
9	14	10,20,30,Suboptimal foldings created.
100,110,10	6	15	240,3	10	40,50,60,Energy dot plot created.
1	120,130,250,8	70,80,140,6	13	9	260,Suboptimal foldings created.
Energy dot plot created.
6	150,160,90,100,110,21	11	16	10	20	14	170,120,270,280,16	180,12	290,300,
Structure plots generated.
All done.
130,15	16	11	310,190,200,11	140,150,4	/home/jupyter/plant_microRNA_prediction2	
320,330,160,170,7	/home/jupyter/plant_microRNA_prediction/secondary_structure/mfold/AMWY020390581_1031-1451_-_
210,7	180,9	1	340,350,
Structure plots gene

H-num file created from plot file.
2	13	5	80,90,100,300,310,3	7	REUSE= NO
SEQ.pnt created.
Sequence length is 420
1,2,3,4,5,/home/jupyter/plant_microRNA_prediction
/home/jupyter/plant_microRNA_prediction/secondary_structure/mfold/AMWY020190751_4526-4945_+_
320,330,SEQ.pnt created.
16	110,120,/home/jupyter/plant_microRNA_prediction
11	17	/home/jupyter/plant_microRNA_prediction/secondary_structure/mfold/AMWY020072891_2620-2840_-_6,7,8,

Structure plots generated.
All done.
Sequence length is 420
340,350,130,140,
Structure plots generated.
All done.
5	mfold version 3.6
RNA free energy files (version 2.3) at 20 degrees created.
9,10,11,12,13,11	360,370,380,390,mfold version 3.6
RNA free energy files (version 2.3) at 20 degrees created.
10,20,30,40,150,160,REUSE= NO
10,20,30,40,50,60,22	/home/jupyter/plant_microRNA_prediction4	3	
23	/home/jupyter/plant_microRNA_prediction14,15,16,17,18,REUSE= NO
170,180,190,200,210, 
End of Fill
6	/home/jupyter/plant_microRNA_prediction/secondary_structure/

Suboptimal foldings created.
Energy dot plot created.
10	SEQ.pnt created.
Sequence length is 420
8	SEQ.pnt created.
Sequence length is 420
13,14,180,190,16	400,410, 
End of Fill
Save file created using nafold.
10,20,30,40,50,RNA free energy files (version 2.3) at 20 degrees created.
15,16,17,RNA free energy files (version 2.3) at 20 degrees created.
200,Minimum folding energy is -124.70 kcal/mol.
Energy increment is 6.24 kcal/mol.
17	60,70,80,90,10,20,30,40,210,5	10,20,30,40,50, 
8	H-num file created from plot file.
11	100,110,3	50,60,70,80,Suboptimal foldings created.
Energy dot plot created.
1,60,70,80,220,120,H-num file created from plot file.
1,10	16	1	22	
Structure plots generated.
All done.
90,100,230,3	2,3,4,2,3,4,5,6,16	130,140,90,100,110,110,120,
Structure plots generated.
240,All done.
17	7,8,9,5,6,7,150,160,/home/jupyter/plant_microRNA_prediction120,130,130,
/home/jupyter/plant_microRNA_prediction/secondary_structure/mfold/AMWY021256121_1409-1828_-_10,11,12,
250,11	8,9,10,11

140,/home/jupyter/plant_microRNA_prediction/home/jupyter/plant_microRNA_prediction
/home/jupyter/plant_microRNA_prediction/secondary_structure/mfold/AMWY020389531_1712-2136_+_RNA free energy files (version 2.3) at 20 degrees created.

1,2,3,
Sequence length is 420
150,/home/jupyter/plant_microRNA_prediction/secondary_structure/mfold/AMWY020245601_873-1293_-_13	4,5,
Structure plots generated.
All done.

310,10,mfold version 3.6
RNA free energy files (version 2.3) at 20 degrees created.
10,20,6,7, 
160,16	4	19	20,30,40,50,60,320,330,4	REUSE= NO
2	/home/jupyter/plant_microRNA_prediction8	30,40,50,mfold version 3.6
27	170,180,
/home/jupyter/plant_microRNA_prediction/secondary_structure/mfold/AMWY020501341_2472-2892_+_70,80,90,
13	60,70,340,350,360,4	REUSE= NO
190,SEQ.pnt created.
100,21	370,380,390,400,80,90,15	200,210,220,110,120,mfold version 3.6
8	Sequence length is 424
14	Suboptimal foldings created.
Energy dot plot created.
100,110,SEQ.pnt created.
Sequence length is 420
2	230,240,130

10,20,30,140,150,290,300,310,320,330, 16	180,190,7	100,220,SEQ.pnt created.
40,50,60,70,6	160,
End of Fill
Save file created using nafold.
Minimum folding energy is -101.50 kcal/mol.
Energy increment is 5.08 kcal/mol.
110,120,200,9	Sequence length is 364
80,210,130,230,90,100,170,13	13	22	220,230,RNA free energy files (version 2.3) at 20 degrees created.
140,240,
Structure plots generated.
All done.
110,10,20,30,40,50,23	4	120,180,240,Suboptimal foldings created.
Energy dot plot created.
150,160,Suboptimal foldings created.
Energy dot plot created.
10	60,70,
/home/jupyter/plant_microRNA_prediction190,H-num file created from plot file.
250,130,140,/home/jupyter/plant_microRNA_prediction/secondary_structure/mfold/AMWY021039961_112-532_+_250,
10	6	8	14	80,14	150,200,170,25	1,2,260,260,90,100,180,190,mfold version 3.6
160,270,280,3,4,5,6,7,210,270,280,110,REUSE= NO
200,170,220,290,17	8	290, 
7	210,180,190,120,130,230,300,310,
Structure plots generated.
All done.
300,23	14	200,210,14	2	320,

1	9	2	22	7	15	160,170,11	
Structure plots generated.
All done.
9	420, 
End of Fill
Save file created using nafold.
Minimum folding energy is -116.30 kcal/mol.
Energy increment is 5.82 kcal/mol.
11, 
8	SEQ.pnt created.
Sequence length is 423
9	4	180,13	H-num file created from plot file.
8	/home/jupyter/plant_microRNA_prediction
/home/jupyter/plant_microRNA_prediction/secondary_structure/mfold/AMWY021258501_0-238_-_190,200,1,16	RNA free energy files (version 2.3) at 20 degrees created.
10,
2,3,4,16	210,20,30,40,50,mfold version 3.6
5,6,8	8	60,70,Suboptimal foldings created.
Energy dot plot created.
220,230,Suboptimal foldings created.
Energy dot plot created.

Structure plots generated.
All done.
7,8, 
REUSE= NO
3	80,90,100,2	240,
Structure plots generated.
All done.
10	10	
Structure plots generated.
All done.
23	12	8	110,120,250,16	/home/jupyter/plant_microRNA_prediction
Structure plots generated.
All done.

SEQ.pnt created.
Sequence length is 238
/home/jupyter/plant_microRNA_prediction

5	2,3,4,H-num file created from plot file.
30,40,50,60,360,370,380,10	25,26,Suboptimal foldings created.
Energy dot plot created.
5,6,7,8,7	SEQ.pnt created.
Sequence length is 421
1,2,3,4,5,6,70,80,90,27,28,29, 
390,400,410,420, 
End of Fill
8	Suboptimal foldings created.
Energy dot plot created.
9, 
10	100,4	6	7,8,9,
Structure plots generated.
All done.
17	1	Save file created using nafold.
Minimum folding energy is -146.40 kcal/mol.
Energy increment is 7.32 kcal/mol.
110,RNA free energy files (version 2.3) at 20 degrees created.
10,10,11,12,22	20	2	120,4/home/jupyter/plant_microRNA_prediction13,14,15,20,30,40,50,60,H-num file created from plot file.
8	
/home/jupyter/plant_microRNA_prediction/secondary_structure/mfold/AMWY020430621_489-909_+_130,Suboptimal foldings created.
Energy dot plot created.
10	14	
6		70,80,90,100,1, 
140,5	Suboptimal foldings created.
Energy dot plot created.
1	mfold version 3.6
11	2,3,4,5,6,110,120,130,150,160,11	REUSE= NO
8	9	7,8,9,10,140,5	SEQ.pnt created.
1

290,300,17,18,19,20,120,130,200,Suboptimal foldings created.280,Save file created using nafold.
Minimum folding energy is -125.70 kcal/mol.
Energy increment is 6.29 kcal/mol.
310,320,14	140,150,10,20,30,40,50,210,310,320,2	28	26	17	21,22,23,/home/jupyter/plant_microRNA_prediction290,300,1	

Energy dot plot created.
10	160,20	/home/jupyter/plant_microRNA_prediction/secondary_structure/mfold/AMWY020567971_4071-4490_+_330,340,350,220,60,70,80,
330,340,24,25,310,170,Suboptimal foldings created.
90,100,360,370,9	26,350,360,370,380,H-num file created from plot file.
230,8	320,330,mfold version 3.6
Energy dot plot created.
10	 
110,120,7	380,390,400,410, 
End of Fill
Save file created using nafold.
180,190,340,350,360,1,2,3,4,5,6,390,400,410, 
End of Fill
Save file created using nafold.
15	240,250,130,140,REUSE= NO
Minimum folding energy is -118.30 kcal/mol.
Energy increment is 5.92 kcal/mol.
200,Minimum folding energy is -126.70 kcal/mol.
Energy increment is 6.33 kcal/mol.
370,380,7,8,9,4	3	

40,50,8	12	Sequence length is 419
360,370,380,150,8	220,60,70,80,H-num file created from plot file.

Structure plots generated.
All done.
Sequence length is 420
20	mfold version 3.6
19	15	160,170,390,400,410,420,RNA free energy files (version 2.3) at 20 degrees created.
10,20,90,RNA free energy files (version 2.3) at 20 degrees created.
REUSE= NO
230,240,1,2,3,4,5,6,18	
Structure plots generated.
All done.
/home/jupyter/plant_microRNA_prediction180, 
End of Fill
Save file created using nafold.
Minimum folding energy is -141.10 kcal/mol.
30,40,50,250,10,20,30,100,110,
7,8,9,10,11,12,13,8	/home/jupyter/plant_microRNA_prediction/secondary_structure/mfold/AMWY021097971_486-905_-_Energy increment is 7.05 kcal/mol.
SEQ.pnt created.
Sequence length is 420
60,70,7	260,
190,40,50,60,120,130,/home/jupyter/plant_microRNA_prediction14,15,16,17,
80,90,/home/jupyter/plant_microRNA_prediction/secondary_structure/mfold/AMWY020389531_1710-2133_+_270,mfold version 3.6
140,
19	
Structure plots generated.

H-num file created from plot file.
10	250,330,340,400,410, 
End of Fill
Save file created using nafold.
16	3	1,2,3,4,5,6,7,1	H-num file created from plot file.
260,Suboptimal foldings created.
Energy dot plot created.
10	350,360,370,380,390,Minimum folding energy is -161.20 kcal/mol.
Energy increment is 8.06 kcal/mol.
8,9,10,6	270,1,2,3,14	7	5	14	11,12,3	400,410, 
End of Fill
Save file created using nafold.
280,1	H-num file created from plot file.
1	4	4,5,6,7,11	13,14,15,Suboptimal foldings created.
290,13	Minimum folding energy is -162.40 kcal/mol.
Energy increment is 8.12 kcal/mol.
1,2,3,4,5,26	8,9,10,11,12,6,7,300,Energy dot plot created.
10	16,17, 
H-num file created from plot file.
8	17	8,9,13,14,15,16,1	1,2,3,310,320, 
 
1	330,340,350,360,4	4,5,6,7,8,7	2	6	8	9,10,15	370,380,390,400,5	15	11, 
2	4	410,420, 
End of Fill
Save file created using nafold.
Minimum folding energy is -179.00 kcal/mol.
Energy increment is 8.95 kcal/mol.
2	12	Suboptimal foldings created.
Energy dot plot crea

10,20,30,40,130,140,H-num file created from plot file.
2	320,330,340,REUSE= NO
30,40,50,60,130,140,150,130,140,4,5,6,160,170,5,6,7,5	10	50,60,70,80,90,100,1,2,3,350,150,160,SEQ.pnt created.
Sequence length is 232
70,80,90,/home/jupyter/plant_microRNA_prediction9	160,170,
7,8,9,180,190,/home/jupyter/plant_microRNA_prediction/secondary_structure/mfold/AMWY021068511_448-868_+_110,
8,9,10,11,12,150,160,360,370,380,390,400,410,4,5,6,7	100,110,200,170,180,RNA free energy files (version 2.3) at 20 degrees created.
10,11,12, 
13,14,1	120,130,170,10,20,30,40,50,60, 
End of Fill
Save file created using nafold.
Minimum folding energy is -152.90 kcal/mol.
180,190,200,mfold version 3.6
7,8,9,10,120,190,200,210,21	140,180,15,16, 
9	REUSE= NO
130,140,150,220,230,11,12,13,14,15,210,70,80,150,160,3	210,190,Energy increment is 7.64 kcal/mol.

Structure plots generated.
All done.
160, 
4	170,90,100,110,220,11	200,220,H-num file created from plot file.
SEQ.pnt created.
Sequence length is 420
3	11	170,240,

200,210,10,20,30,40,50,1,2,3,4,130,1	9	410,420, 
End of Fill
Save file created using nafold.
SEQ.pnt created.
60,70,11	2	 
140,150,220,9	Minimum folding energy is -96.40 kcal/mol.
Energy increment is 4.82 kcal/mol.
Sequence length is 419

Structure plots generated.
All done.
80,90,5	Suboptimal foldings created.
Energy dot plot created.
1	8	160,170,180,230,RNA free energy files (version 2.3) at 20 degrees created.
2	5	13	100,110,H-num file created from plot file.
3	240,250,190,200,210,/home/jupyter/plant_microRNA_prediction10,20,30,40,50,1	
26	
Structure plots generated.
All done.
1,2,120,130,/home/jupyter/plant_microRNA_prediction/secondary_structure/mfold/AMWY020389531_1615-2035_-_
260,12	220,230,240, 
End of Fill
Save file created using nafold.
Minimum folding energy is -96.00 kcal/mol.
60,70,80,3,4,5,6,7,8,140,150,8	/home/jupyter/plant_microRNA_predictionmfold version 3.6
Energy increment is 4.80 kcal/mol.
270,2	
9,10,11,
Structure plots generated.
All done.
90,100,11	/home/jupyter/

2	9	90,100,80,90,100,130,240,6	110,120,11	370,80,90,140,150,10,20,30,40,50,60,70,80,250,260,
Structure plots generated.
All done.
110,130,140,	100,110,5	160,1	120,130,140,90,100,110,270,150,4	1	120,130,7	/home/jupyter/plant_microRNA_prediction280,290,150,4	170,120,130,140,
12	140,160,4	6	160,170,8	/home/jupyter/plant_microRNA_prediction/secondary_structure/mfold/AMWY020501341_2472-2891_+_
150,160,300,310,150,160,180,190,4	3	
Structure plots generated.
All done.
180,190,170,180,320,330,170,180,mfold version 3.6
200,170,180,190,200,/home/jupyter/plant_microRNA_prediction190,200,340,350,
REUSE= NO
7	/home/jupyter/plant_microRNA_prediction/secondary_structure/mfold/AMWY021261181_0-310_+_210,
200,190,12	4	210,360,370,380,210,220,210,
Structure plots generated.
200,220,mfold version 3.6
220,All done.
2	390,400,410,SEQ.pnt created.
210,REUSE= NO
220,230, 
End of Fill
Save file created using nafold.
5	2	230,Sequence length is 419
230,8	Minimum folding energy is -169.80 kcal/mol.
Energy increme

110,120,11	5,6,7,180,12	15	SEQ.pnt created.
Sequence length is 419
150,160,8,9,10,130,140,190,15	RNA free energy files (version 2.3) at 20 degrees created.
/home/jupyter/plant_microRNA_prediction
/home/jupyter/plant_microRNA_prediction/secondary_structure/mfold/AMWY020151371_2324-2743_-_11,12,13,
150,170,200,20	10,20,30,40,5	14,4	
Structure plots generated.
All done.
160,210,180,190,mfold version 3.6
50,60,6	18	170,15,16, 
220,200,3	70,80,90,REUSE= NO
/home/jupyter/plant_microRNA_prediction230,
180,15	210,8	6	1SEQ.pnt created.
100,110,190,200,240,	Sequence length is 419
220,16	13	3	210,120,130,250,12	230,240,RNA free energy files (version 2.3) at 20 degrees created.
220,140,260,250,21	16	10,20,30,230,150,160,270,260,5	40,50,60,70,170,7	240,6	280,270,Suboptimal foldings created.
Energy dot plot created.
10	80,90,Suboptimal foldings created.
Energy dot plot created.
250,19	180,190,280,290,100,110,
Structure plots generated.
4	260,10	All done.
200,290,300,300,310,1	270,120,130,310,210,1	9

1	11	H-num file created from plot file.
6	10,20,30,40,50,60,Suboptimal foldings created.
Energy dot plot created.
10	410, 
End of Fill
Save file created using nafold.
6	/home/jupyter/plant_microRNA_prediction
8	/home/jupyter/plant_microRNA_prediction/secondary_structure/mfold/AMWY020148281_6138-6560_-_
1,2,3,4,4	70,80,Minimum folding energy is -151.10 kcal/mol.
Energy increment is 7.55 kcal/mol.
15	
Structure plots generated.
All done.
5,6,mfold version 3.6
90,100,110,4	30	7, 
22	REUSE= NO
H-num file created from plot file.
120,9	/home/jupyter/plant_microRNA_prediction
8	/home/jupyter/plant_microRNA_prediction/secondary_structure/mfold/AMWY020412431_1839-2258_+_6	
SEQ.pnt created.
Sequence length is 422
1,2,3,130,140,8	RNA free energy files (version 2.3) at 20 degrees created.
4,5,6,7,mfold version 3.6
150,160,1219	8	10,8,9,10,Suboptimal foldings created.
Energy dot plot created.
1	REUSE= NO
	170,180,2	20,30,40,50,60,7	11,12,13,SEQ.pnt created.
9	190,11	5	7	70,80,90, 
Sequence length i

19,20, 
11	150,160,290,190,End of Fill
Save file created using nafold.
Minimum folding energy is -133.60 kcal/mol.
Energy increment is 6.68 kcal/mol.
3	mfold version 3.6
/home/jupyter/plant_microRNA_predictionREUSE= NO

7	340,350,360,170,300,310,320,/home/jupyter/plant_microRNA_prediction/secondary_structure/mfold/AMWY021035691_247-604_+_11	
200,REUSE= NO
7	SEQ.pnt created.
Sequence length is 419
180,330,340,370,380,390,400,410,420, 
End of Fill
H-num file created from plot file.
210,mfold version 3.6
RNA free energy files (version 2.3) at 20 degrees created.
2	190,350,360,Save file created using nafold.
Minimum folding energy is -109.40 kcal/mol.
Energy increment is 5.47 kcal/mol.
1,2,3,4,SEQ.pnt created.
Sequence length is 422
220,REUSE= NO
10,20,30,40,50,200,370,380,390,5,6,RNA free energy files (version 2.3) at 20 degrees created.
SEQ.pnt created.
Sequence length is 357
4	230,60,70,80,H-num file created from plot file.
400,410, 
End of Fill
Save file created using nafold.
 
210,220

/home/jupyter/plant_microRNA_prediction
160,/home/jupyter/plant_microRNA_prediction/secondary_structure/mfold/AMWY020136111_3260-3679_-_
5	10,20,30,40,50,60,15	Suboptimal foldings created.
Energy dot plot created.
10	170,24	5	mfold version 3.6
REUSE= NO
7	70,80,90,180,8	SEQ.pnt created.
100,110,190,22	7	Sequence length is 419
120,130,7	200,12	3	4	RNA free energy files (version 2.3) at 20 degrees created.
10,
Structure plots generated.
All done.
140,150,8	11	210,20,30,40,50,60,70,160,7	220,230,/home/jupyter/plant_microRNA_prediction
80,90,/home/jupyter/plant_microRNA_prediction/secondary_structure/mfold/AMWY020713941_391-810_-_
170,240,100,110,6	180,mfold version 3.6
REUSE= NO
16	250,120,2	190,200,8	6	11	260,SEQ.pnt created.
130,140,9	23	270,280,Sequence length is 419
150,210,220,8	290,RNA free energy files (version 2.3) at 20 degrees created.
10,20,30,4	1	230,160,170,5	300,310,40,50,60,8	12	9	240,180,320,330,70,80,90,190,250,8	340,100,200,7	260,270,350,360,370,110,120,210,12	17	280,290

90,100,110,160,10,20,30,40,230,100,110,7	120,130,/home/jupyter/plant_microRNA_prediction170,
/home/jupyter/plant_microRNA_prediction/secondary_structure/mfold/AMWY020029101_704-1126_-_50,60,70,80,mfold version 3.6
REUSE= NO

240,3	120,130,140,90,100,SEQ.pnt created.
Sequence length is 422
250,mfold version 3.6
REUSE= NO
9	140,150,5	11	150,160,170,180,190,110,120,RNA free energy files (version 2.3) at 20 degrees created.
260,270,SEQ.pnt created.
Sequence length is 422
160,180,190,200,130,140,10,20,30,40,50,280,RNA free energy files (version 2.3) at 20 degrees created.
170,180,200,16	150,60,70,80,290,5	10,20,30,40,50,12	190,200,210,210,160,170,90,100,24	300,310,320,60,70,80,90,210,220,220,180,110,120,8	330,340,100,110,230,220,230,240,190,130,140,4	350,360,370,380,120,130,230,240,240,250,250,200,210,
Structure plots generated.
All done.
150,390,400,410, 
End of Fill
6	12	140,150,250,260,260,220,160,Save file created using nafold.
Minimum folding energy is -109.90 kcal/mol.
Energy incremen

190,290,300,7	3	/home/jupyter/plant_microRNA_prediction
200,310,320,330,mfold version 3.6
REUSE= NO
7	12	210,340,350,360,370,22	SEQ.pnt created.
Sequence length is 423
RNA free energy files (version 2.3) at 20 degrees created.
18	19	220,230,380,390,400,410,420, 
End of Fill
10,20,30,40,240,Save file created using nafold.
Minimum folding energy is -109.60 kcal/mol.
Energy increment is 5.48 kcal/mol.
13	11	50,60,70,80,H-num file created from plot file.
7	250,260,90,100,1,2,3,4,5,4	7	270,280,110,120,130,8	6,7,8,9,290,140,8	10,11, 
19	300,150,160,23	13	1	310,320,170,330,340,350,180,14	12	360,370,380,390,190,8	Suboptimal foldings created.
Energy dot plot created.
400,410, 
End of Fill
Save file created using nafold.
Minimum folding energy is -191.00 kcal/mol.
Energy increment is 9.55 kcal/mol.
200,5	10	8	210,9	9	1	H-num file created from plot file.
220,1,2,3,4,230,2	14	20	5,6,7,8,240,250,9,10,11,12,260,13	15	9	13,14, 
270,9	280,290,6	11	
Structure plots generated.
All done.

Structure plots

220,2	
Structure plots generated.
All done.
RNA free energy files (version 2.3) at 20 degrees created.
230,12	2	20	10,20,30,40,50,60,240,250,/home/jupyter/plant_microRNA_prediction
/home/jupyter/plant_microRNA_prediction/secondary_structure/mfold/AMWY020959931_3810-4232_+_
70,80,260,270,6	90,100,
Structure plots generated.
All done.
280,mfold version 3.6
REUSE= NO
20	110,120,290,300,SEQ.pnt created.
Sequence length is 422
130,140,/home/jupyter/plant_microRNA_prediction13	
/home/jupyter/plant_microRNA_prediction/secondary_structure/mfold/AMWY020169141_901-1320_+_310,320,
RNA free energy files (version 2.3) at 20 degrees created.
10,20,30,40,3	150,3	330,340,350,50,60,70,80,90,mfold version 3.6
REUSE= NO
160,170,360,370,380,390,100,110,120,SEQ.pnt created.
Sequence length is 419
21	180,400,410, 
End of Fill
Save file created using nafold.
130,140,RNA free energy files (version 2.3) at 20 degrees created.
10,20,30,7	190,150,160,Minimum folding energy is -169.80 kcal/mol.
Energy increment i

In [61]:
'''
base = "secondary_structure/mfold/"
for directory in glob.glob(f"{base}*"):    
    tag = directory[len(base):]
    ct_files = glob.glob(f'{directory}/*.ct')        
    try:
        ct_files.remove(f'{base}{tag}/SEQ.ct')
    except:
        print(directory)
        print(ct_files)
        print("*****************")
    for file in ct_files:        
        shutil.copy(file, './1.ct')
        #dot = ct2dot_bracket('./1.ct')
        #dot = dot.split('\n')
        #with open('./2.ct', 'w') as stream:
            #stream.write(bracket_to_ct(tag, dot[0] , dot[1] , "(0)"))        
        #ct1 = '\n'.join(reformatCT('./1.ct').split('\n')[1:])
        #ct2 = '\n'.join(reformatCT('./2.ct').split('\n')[1:])
        #if(ct1 != ct2):
            #print(file)
        ct = reformatCT('./1.ct')
        [nucleotide, index, values] = get_ct_data(ct)        
        #print(is_nested( index,  values))
        if(not is_nested( index,  values)):
            print("************")             
'''

## Mxfold2

In [57]:
#!wget https://github.com/keio-bioinformatics/mxfold2/releases/download/v0.1.1/mxfold2-0.1.1.tar.gz
#!pip3 install mxfold2-0.1.1.tar.gz
#!rm mxfold2-0.1.1.tar.gz

In [58]:
!mxfold2 predict ./extended.txt > secondary_structure/mxfold2_result.txt

>AMWY02059828.1:2832-3256(+)
AAAGAATCAGCAATGGAAAAATAACCGGTTCTTAATTCAGcataacaaattattcaattataatatagcTGTAAAAGAAATCTAAGTCTATTTGATATAGATCGGAATTTacgcaaattaaaaatttccaaataaGCAGTTCCGACCTGAGATCTGAACCGAAAACGCAAGATCCATCTAAACTCTCACCTCGGTCTCCGATTCAGTTGATGCAAGGCGGGATCCAATTCGCCTTTTCATTCAATTACATTCACCAATAACAGCTCGCCATCtggcttttaataaaaagttgcCAATCGGTTCCCGACCTGCACCAAGCGAATTAGAGACCGCCGGTAACTGAATCATTCTACATTAATCCCCGACTCCTCCTTTTACACATAGCAACTTCGCCCAAGAagactaaaaagaaaaggaagctAAC
........................((((...........((..........................))..........................................................................................................................(((...(((...............((((..........)))).................................(((....))).....................)))...)))..........................))))........................................................................................ (35.0)
>AMWY02004761.1:1853-2277(+)
actaataatgCATGGCCATATATATCAAATCTACCATATgccatttaataattttccttttttcttcttctttctttttctctct

>AMWY02002487.1:2516-2939(+)
tctctcttctttttcttccttaacTTTTCATCCAGCTTCAACCTCCATTTAGATCaaagttattgaatttttttttcatcttatttatgtaaatatatattgtttccTGCGGAAACGAATCCATGAACAACAGTCAATCAGTCATTGTTTGCTGATGCAGCGTCATCAAGATTCGCATGCTGATGGGTCGAGCAAAGCAGTGAGAATCTTGATGATGCTGCATCGGCCATAATTGACTATAtctcgtcatcatcatcatcatcatccagtTTCAACCTCCATGTAAATCaagttattgaattatttggtAAATAGATACTGATTCCCGCAGAATTGAATCAATGAACAACAGTCAATCAGTCATTGTTTGCTGATGCAGCATCATCAAGATTCACATGCGAATGGGTCGAACCAAAGCAGTGA
............................................................................................................((((....(....)............................((....)).(((...........))).........((.((.((...((..................)).))..)).)).................................................................................................))))........................................((....))....................((....((......))...))..... (30.2)
>AMWY02089812.1:0-419(-)
TCtgtctatatttattttcttctcattcACTGTAGTAATTTAAGCCTATACAGTTCTGAGTTGACCNatttctttatataaagtTNATTTC

>AMWY02001968.1:1243-1665(+)
CTACCTAAACTCCATGCATGGCTCGTGCTAGCTTTctggtttcttcttttttctttaagggcttattataaatttgcaGCAAGCCTAAACCCTTCTTAATTTCAAGATCTCTCTTcatttgattctttctttctttttctagggattcttcttcttcttcttcttcttgtttgctGCTGGTGTATGTTGGTTTGAGAGATTGAAGCTGCCAGCATGATCTGGTAATATGGAAcctaatattatacatatacatatctatatctatatatatagatagatagttttagatttactctttaattatattaattcctCCACCTATAGTTTTAGAGttactctttaattatattaattcttccACCTCTTTGTATAGATAGATAGAAACATATAGAAGGTCTTAGATTTCCTTTGCTTTTAGATCC
...((....(.((..((...((....))...............................(((....................)))..........................................................................................)).)).)......))........................................................................................................................................................................................................................................ (27.0)
>AMWY02039981.1:1214-1636(-)
atttatgttttctattttataattaaaaaataaaaaaaaaataataaggtaatctctctctctttttcttttatattcatatgaGGTCT

In [143]:
df = fasta_to_df('secondary_structure/mxfold2_result.txt')
df = df.apply(lambda row: bracket_row(row) , axis=1)
df.head(2)

Unnamed: 0,tag,data,bracket
0,AMWY02059828.1:2832-3256(+),AAAGAATCAGCAATGGAAAAATAACCGGTTCTTAATTCAGcataac...,........................((((...........((........
1,AMWY02004761.1:1853-2277(+),actaataatgCATGGCCATATATATCAAATCTACCATATgccattt...,.................................................


In [145]:
base = "./secondary_structure/mxfold2/"
!rm -r {base}
!mkdir -p {base}
for index, row in df.iterrows():    
    if(not os.path.exists(base + reformat(row['tag']))):
        os.makedirs(base + reformat(row['tag']))        
    tag = reformat(row['tag'])
    with open(base + f"{tag}/{tag}.ct",'w') as file:
        bracket = row['bracket'].split(' ')[0]
        deltaG = row['bracket'].split(' ')[1]
        ct = bracket_to_ct(row['tag'], row['data'], bracket, deltaG)
        file.write(ct)    

## SPOT-RNA

In [8]:
#!git clone https://github.com/jaswindersingh2/SPOT-RNA.git
#%cd SPOT-RNA
#!wget 'https://www.dropbox.com/s/dsrcf460nbjqpxa/SPOT-RNA-models.tar.gz' || wget -O SPOT-RNA-models.tar.gz 'https://app.nihaocloud.com/f/fbf3315a91d542c0bdc2/?dl=1'
#!tar -xvzf SPOT-RNA-models.tar.gz && rm SPOT-RNA-models.tar.gz
#!sudo apt-get install python3.6
#!python3.6 -m pip install tensorflow==1.14.0 # or for gpu: tensorflow-gpu==1.14.0
#! python3.6 -m pip install -r requirements.txt

In [23]:
base = "./secondary_structure/spot_rna/"
!rm -r {base}
!mkdir -p {base}

In [29]:
!python3.6 ./SPOT-RNA/SPOT-RNA.py  --inputs ./extended.txt  --outputs '{base}'  --cpu 32 --plots True

>> Opening FASTA file...
>> Converting FASTA file from multiline to single line and writing to file.
>> Done!

Preparing tfr records file for SPOT-RNA:
100%|█████████████████████████████████████████████| 1/1 [00:04<00:00,  4.21s/it]

Predicting for SPOT-RNA model 0
100%|█████████████████████████████████████████████| 1/1 [00:05<00:00,  5.23s/it]

Predicting for SPOT-RNA model 1
100%|█████████████████████████████████████████████| 1/1 [00:09<00:00,  9.51s/it]

Predicting for SPOT-RNA model 2
100%|█████████████████████████████████████████████| 1/1 [00:13<00:00, 13.10s/it]

Predicting for SPOT-RNA model 3
100%|█████████████████████████████████████████████| 1/1 [00:16<00:00, 16.44s/it]

Predicting for SPOT-RNA model 4
100%|█████████████████████████████████████████████| 1/1 [00:18<00:00, 18.18s/it]

Post Processing and Saving Output

Finished!

Processsing Time 203.32813096046448 seconds


In [48]:
!rm {base}/*.bpseq
!rm {base}/*.prob
for file in glob.glob(f"{base}*.ct"):    
    f = file[len(base):-3] # .ct        
    f = reformat(f)        
    if(not os.path.exists(base + f)):
        os.makedirs(base + f)  
    header = reformatCT(file).split("\n")[0]    
    with open(f"{base}{f}.dot", 'w') as stream:        
        stream.write(ct2dot_bracket(file))
    !RNAeval "{base}{f}.dot" -T 20 -v 
    #shutil.move(file, f"{base}{f}/{f}.ct")    

rm: cannot remove './RNA_secondary_structure/spot_rna//*.bpseq': No such file or directory
rm: cannot remove './RNA_secondary_structure/spot_rna//*.prob': No such file or directory
[36mExternal loop[0m                           : [32m -371[0m
[36mInterior loop[0m (  3, 32) [1mAU[0m; (  4, 31) [1mGC[0m: [32m -256[0m
[36mInterior loop[0m (  4, 31) [1mGC[0m; (  5, 30) [1mAU[0m: [32m -294[0m
[36mInterior loop[0m (  5, 30) [1mAU[0m; (  6, 29) [1mAU[0m: [32m -122[0m
[36mHairpin  loop[0m (  6, 29) [1mAU[0m              : [32m  716[0m
[36mInterior loop[0m ( 38, 71) [1mCG[0m; ( 39, 70) [1mAU[0m: [32m -255[0m
[36mInterior loop[0m ( 39, 70) [1mAU[0m; ( 40, 69) [1mGC[0m: [32m -256[0m
[36mInterior loop[0m ( 40, 69) [1mGC[0m; ( 41, 68) [1mCG[0m: [32m -403[0m
[36mInterior loop[0m ( 41, 68) [1mCG[0m; ( 49, 64) [1mAU[0m: [32m  618[0m
[36mInterior loop[0m ( 49, 64) [1mAU[0m; ( 50, 63) [1mUA[0m: [32m -155[0m
[36mInterior loop[0m 

In [68]:
df = fasta_to_df('./secondary_structure/spot_rna/AMWY02059828.1:2832-3256(+).dot')
df = df.apply(lambda row: bracket_row(row) , axis=1)
bracket = df['bracket'][0].split(' ')[0]
ct = bracket_to_ct(df['tag'][0], df['data'][0], bracket, "(0)")
print(ct)

   424 dG =       0.0 AMWY02059828.1:2832-3256(+)
     1 A      0      2      0       1
     2 A      1      3      0       2
     3 A      2      4     32       3
     4 G      3      5     31       4
     5 A      4      6     30       5
     6 A      5      7     29       6
     7 U      6      8      0       7
     8 C      7      9      0       8
     9 A      8     10      0       9
    10 G      9     11      0      10
    11 C     10     12      0      11
    12 A     11     13      0      12
    13 A     12     14      0      13
    14 U     13     15      0      14
    15 G     14     16      0      15
    16 G     15     17      0      16
    17 A     16     18      0      17
    18 A     17     19      0      18
    19 A     18     20      0      19
    20 A     19     21      0      20
    21 A     20     22      0      21
    22 U     21     23      0      22
    23 A     22     24      0      23
    24 A     23     25      0      24
    25 C     24     26      0      25


## Vienna package

In [69]:
#!wget https://www.tbi.univie.ac.at/RNA/download/ubuntu/ubuntu_20_04/viennarna_2.4.18-1_amd64.deb -O viennarna.deb
#!sudo dpkg -i ./viennarna.deb
#!sudo apt-get -f install
#!rm viennarna.deb

In [70]:
base = "./secondary_structure/viennarna/"
!rm -r {base}
!mkdir -p {base}

In [71]:
%cd {base}
!RNAfold --jobs=0 --infile ../../extended.txt  -T 20 > ../viennarna_result.txt
%cd ../../

/home/jupyter/plant_microRNA_prediction/RNA_secondary_structure/viennarna
/home/jupyter/plant_microRNA_prediction


In [124]:
df = fasta_to_df('secondary_structure/viennarna_result.txt')
df = df.apply(lambda row: bracket_row(row) , axis=1)
df.head(2)

Unnamed: 0,tag,data,bracket
0,AMWY02059828.1:2832-3256(+),AAAGAAUCAGCAAUGGAAAAAUAACCGGUUCUUAAUUCAGcauaac...,...((.((((....((........))..(((((....(((((((.....
1,AMWY02004761.1:1853-2277(+),acuaauaaugCAUGGCCAUAUAUAUCAAAUCUACCAUAUgccauuu...,...........(((((.((((..............))))))))).....


In [125]:
for index, row in df.iterrows():    
    tag = reformat(row['tag'])
    if(not os.path.exists(base + tag)):
        os.makedirs(base + tag)      
    with open(base + f"{tag}/{tag}.ct",'w') as file:
        bracket = row['bracket'].split(' ')[0]
        deltaG = row['bracket'].split(' ')[1]
        ct = bracket_to_ct(row['tag'], row['data'], bracket, deltaG, False)
        file.write(ct)    

In [126]:
import glob
for file in glob.glob(f"{base}*.ps"):    
    f = file[len(base):-6] # _ss.ps 
    f = reformat(f)        
    shutil.move(file, f"{base}{f}/{f}.ps")    

## ContraFold

In [39]:
#!wget http://contra.stanford.edu/contrafold/contrafold_v2_02.tar.gz
#!tar -xvzf contrafold_v2_02.tar.gz && rm contrafold_v2_02.tar.gz
#%cd contrafold/src
#!make clean
#!make 
# to file must changed to be complieable # utility.hpp and optimization.c++ files

In [42]:
counter = 0
base = "./secondary_structure/contrafold/"
!rm -r {base}
!mkdir -p {base}
df = fasta_to_df('./Temp/extended.txt')

for index, row in df.iterrows():    
    tag = reformat(row['tag'])
    if(not os.path.exists(base + tag)):
        os.makedirs(base + tag)            
    with open(base + f"{tag}/{tag}.FASTA",'w') as file:
        file.write(f">{row['tag']}\n{row['data']}")
    counter += 1    
    if(counter >= 10):
        break

In [43]:
def run_contrafold(tag):
    tag = reformat(tag)    
    %cd contrafold/src
    !./contrafold predict ../..{base[1:]}{tag}/{tag}.FASTA > ../..{base[1:]}{tag}/{tag}.dot
    with open(f"../..{base[1:]}{tag}/{tag}.dot", 'r') as file:
        text = file.read()
    text = [l for l in text.split("\n") if l[:len(">structure")] != ">structure"]    
    header = text[0]
    with open(f"../..{base[1:]}{tag}/{tag}.dot", 'w') as file:
        file.write('\n'.join(text[1:]))    
    !RNAeval  ../..{base[1:]}{tag}/{tag}.dot -T 20 > ../..{base[1:]}{tag}/{tag}.dotdg    
    with open(f"../..{base[1:]}{tag}/{tag}.dotdg", 'r') as file:
        text = file.read()
    with open(f"../..{base[1:]}{tag}/{tag}.dot", 'w') as file:
        file.write(header + "\n" + text)    
    
    df = fasta_to_df(f'../..{base[1:]}{tag}/{tag}.dot')
    df = df.apply(lambda row: bracket_row(row) , axis=1)        
    tag = reformat(df['tag'][0])
    with open(f'../..{base[1:]}{tag}/{tag}.ct','w') as file:
        bracket = df['bracket'][0].split(' ')[0]        
        deltaG = df['bracket'][0].split(' ')[1]
        ct = bracket_to_ct(df['tag'][0], df['data'][0], bracket, deltaG, False)
        file.write(ct)    
    #!rm ../..{base[1:]}{tag}/{tag}.dot
    #!rm ../..{base[1:]}{tag}/{tag}.dotdg
    !rm ../..{base[1:]}{tag}/{tag}.FASTA
    %cd ../../        

if __name__ == '__main__':        
    pool = mp.Pool(mp.cpu_count() - 1)  
    pool.map(run_contrafold, df['tag'].iloc[:10])

In [56]:
s = 'CUCCCCUUGUCUACCAUCCCCAACUAGCGAGAGAGACAUUACCUACCUGAAUAGAAGAUCUCUCUCGAGCUCUCGagcucucucuuuuucuauaUCUCUGUCUCUUUGUGUCUCUGGAGCUUGUACUAACAUUAAUAUCGUGCACCAGCAGCAGUUGAAGCUGCCAGCAUGAUCUAAACUUCCUUCUCUGUAAAGGAUAGAUCGGAUCAUGUGGUAGCUUCACCUGUUGAUGGGAUCACGAAAGCGCCCCUCUUACUACUCUACAUUAAUUCUUUCUCGUUAUACAACCUCCCAGUAAGCAUGCUUUCAAAACCAACUUGAGuaaguuaauuuguuuagcuuuuguuuuuggcucuuccuuuacuuuaaauuuucucaucuggguuuuuguuauauauauguacuguuuuauauauguauuccu'
d = '............................((((((((..(...(((......))).)..))))))))(((((....)))))...................................((((.((((...(((.......(((((..(((.((((((.((((((((((.(((((((((.(.(((((((.......))))).)).).))))))))))))))))))).)))))).)))...))))).....................................)))..)))).))))....((((()))))..((((((....((((.(((((((.....)))(((.........)))................)))).))))....))))))....(((((((((((......)))))))))))....'
print(s[300],s[301])
print(d[300],d[301])

A U
( )


In [55]:
'''path = 'secondary_structure/contrafold/AMWY020333941_469-893_-_/AMWY020333941_469-893_-_.dot'
!RNAeval  {path} -T 20 -v'''; 

[36mExternal loop[0m                           : [32m -364[0m
[36mInterior loop[0m ( 29, 66) [1mGC[0m; ( 30, 65) [1mAU[0m: [32m -294[0m
[36mInterior loop[0m ( 30, 65) [1mAU[0m; ( 31, 64) [1mGC[0m: [32m -256[0m
[36mInterior loop[0m ( 31, 64) [1mGC[0m; ( 32, 63) [1mAU[0m: [32m -294[0m
[36mInterior loop[0m ( 32, 63) [1mAU[0m; ( 33, 62) [1mGC[0m: [32m -256[0m
[36mInterior loop[0m ( 33, 62) [1mGC[0m; ( 34, 61) [1mAU[0m: [32m -294[0m
[36mInterior loop[0m ( 34, 61) [1mAU[0m; ( 35, 60) [1mGC[0m: [32m -256[0m
[36mInterior loop[0m ( 35, 60) [1mGC[0m; ( 36, 59) [1mAU[0m: [32m -294[0m
[36mInterior loop[0m ( 36, 59) [1mAU[0m; ( 39, 56) [1mUA[0m: [32m  103[0m
[36mInterior loop[0m ( 39, 56) [1mUA[0m; ( 43, 54) [1mCG[0m: [32m  305[0m
[36mInterior loop[0m ( 43, 54) [1mCG[0m; ( 44, 53) [1mUA[0m: [32m -256[0m
[36mInterior loop[0m ( 44, 53) [1mUA[0m; ( 45, 52) [1mAU[0m: [32m -165[0m
[36mHairpin  loop[

# Visualization

In [25]:
#https://github.com/ViennaRNA/forna
#http://varna.lri.fr/

# CT Analizer

In [160]:
base = "./secondary_structure/mfold/"
for directory in glob.glob(f"{base}/*"):
    df = fasta_to_df(f"{directory}/SEQ.FASTA")
    tag = df['tag'][0] 
    display(df.head())
    tag
    print(tag)
    for ct_file in glob.glob(f"{directory}/*.ct"):
        #print(ct_file)
        pass
    break

Unnamed: 0,tag,data
0,AMWY02022710.1:81-502(-),attCTATAGATaatggaaagaaagaaagaaagaaagaaagaaacca...


AMWY02022710.1:81-502(-)


In [141]:
path = "./AMWY02090868.1_+_870-1290_201-221_7.ct"
hit_start = 201
hit_end = 221
dG = -166.62
sign = "+"
[nucleotide, index, values] = get_ct_data(reformatCT(path))
hit = values[hit_start-1:hit_end] 
hit_len = hit_end - hit_start + 1

In [142]:
# self Hit_self_complementarity
hit_self_complementarity = ((hit < hit_start) | (hit > hit_end)).all()
hit_self_complementarity

True

In [143]:
if(sum(hit == 0) == hit_len):
    complementarity_in_hit_region = "no"
elif(sum(hit != 0) == hit_len):
    complementarity_in_hit_region = "fully_connected"
else:    
    complementarity_in_hit_region = "yes"
print(complementarity_in_hit_region)

yes


In [96]:
# detecthit_end hit type: 3p or 5p
hit_star = values[hit_start-1 - 2:hit_end - 2]
hit_star

198    308
199    307
200    306
201    305
202    304
203    303
204    302
205    301
206    300
207    299
208    298
209    297
210    296
211    295
212    293
213    292
214    291
215    290
216    289
217      0
218    287
Name: 4, dtype: int64

# BLASTX or DIMOND

# DIAMOND

https://github.com/bbuchfink/diamond

In [None]:
'''
import hashlib
import os
parallel = [os.path.join(dp, f) for dp, dn, filenames in os.walk("./PRNA_secondary_structure") for f in filenames ]
series = [os.path.join(dp, f) for dp, dn, filenames in os.walk("./SRNA_secondary_structure") for f in filenames ]
for i in range(len(parallel)):        
    md5_hash = hashlib.md5()
    with open(parallel[i],"rb") as file:        
        md5_hash.update(file.read())
        digest1 = md5_hash.hexdigest()                
    md5_hash = hashlib.md5()
    with open(series[i],"rb") as file:        
        md5_hash.update(file.read())
        digest2 = md5_hash.hexdigest()                
    if(digest1 != digest2):
        print(parallel[i])
        print(series[i])
        print("***********")     
'''