# Common

In [1]:
import numpy as np
import pandas as pd
from tqdm import tqdm  # !pip install tqdm
from tqdm import trange
import multiprocessing as mp
import shutil
import glob
import os
import sys
if sys.version_info[0] < 3: 
    from StringIO import StringIO
else:
    from io import StringIO

In [2]:
!mkdir -p Temp

In [3]:
def bracket_row(row):    
    s = row['data']
    index = min(s.find('.'), s.find('('))
    data = row['data']
    row['data'] = data[0:index]
    row['bracket'] = data[index:]
    return row

In [4]:
def adjust(text,n=7):
    text = str(text)    
    return " " * (n - len(text)) + text

In [5]:
def bracket_to_ct(tag, data, bracket, deltaG, negative_deltaG=True):    
    deltaG = deltaG.replace('(','').replace(')','')
    deltaG = float(deltaG)
    if(deltaG > 0 and negative_deltaG ): # negetive?!
        deltaG = -1 * deltaG
    stack = []
    index = np.zeros((len(bracket)), dtype = int)
    values = np.zeros((len(bracket)), dtype = int)
    for i in range(len(bracket)):
        index[i] = i + 1
        if(bracket[i] == '.'):
            values[i] = 0
        elif(bracket[i] == '('):
            stack.append(i)
        elif(bracket[i] == ')'):
            if(len(stack) == 0 ):
                print('structure error!')
            values[stack[-1]] = i + 1
            values[i]  = stack[-1] + 1
            stack.pop()
        else:
            print('structure error!')
    if(len(stack) != 0 ):
        print('structure error!')
    # body    
    ct = f"{adjust(len(data),6)} dG ={adjust(deltaG,10)} {tag}\n"   
    for i in range(len(bracket)):
        ct += f"{adjust(index[i],6)} {data[i]} {adjust(i,6)} {adjust((i+2)%(len(data)+1),6)} {adjust(values[i],6)} {adjust(index[i],7)}\n"
    return ct

In [6]:
def fasta_to_df(path):
    with open(path, 'r') as file:
        text = file.read()
    lines = [line for line in text.split('\n') if len(line) > 0]
    s = ''
    tags = []
    data = []
    for l in lines:
        if(l[0]=='>'):
            tags.append(l)        
            data.append(s)
            s = ''
        else:
            s += l    
    data.append(s)
    df = pd.DataFrame(
            {
                'tag': tags,
                'data': data[1:]
            })
    df['tag'] = df['tag'].apply(lambda x: x[1:])    
    return df

In [7]:
def df_to_fasta(df, path):
    lines = []
    df.apply(lambda row: lines.append(f">{row['tag']}\n{row['data']}\n"),axis=1)
    with open(path,'w') as file:
        file.write(''.join(lines))

In [8]:
def reformat(path):
    return path.replace('(','_').replace(')','_').replace('.','').replace(':','_')

In [9]:
def reformatCT(path):
    with open(path, 'r') as file:
        text = file.read()
    text = [l for l in text.split('\n') if len(l) > 0 ] # remove blank lines
    text = '\n'.join(text)
    text = text.replace("\t"," ")
    while("  " in text):
        text = text.replace("  ", " ")
    lines = [l for l in text.split('\n')]
    for i in range(len(lines)):
        if(lines[i][0] == " "):
            lines[i] = lines[i][1:]
        if(lines[i][-1] == " "):
            lines[i] = lines[i][:-1]
    text = '\n'.join(lines)
    return text

In [10]:
def get_ct_data(ct):
    ct = "\n".join(ct.split('\n')[1:])
    df = pd.read_csv(StringIO(ct), sep=" ", header=None)               
    nucleotide = df.iloc[:,1]
    index = df.iloc[:,5]
    values = df.iloc[:,4]
    return [nucleotide, index, values]

In [11]:
def ct2dot_bracket(path):
    [nucleotide, index, values] = get_ct_data(reformatCT(path))
    text = ''.join(nucleotide) + "\n"
    watch = []
    for i, v in zip(index,values):
        if(v == 0):
            text += '.'
        else:
            if( v not in watch):
                text += '('
                watch.append(i)
            if( v in watch):
                text += ')'
    return text

In [12]:
def is_nested(index, values):
    max_value = max(index) + 10 # inf
    for i, v in zip(index, values):
        if(v < max_value and v != 0):
            max_value  = v
        if(i >= max_value):
            max_value = max(index) + 10 # inf
        if(v > max_value):
            return False               
    return True

In [13]:
'''ct = reformatCT('./secondary_structure/spot_rna/AMWY020598281_2832-3256_+_/AMWY020598281_2832-3256_+_.ct')
[nucleotide, index, values] = get_ct_data(ct)
print(is_nested( index,  values))
''';

### rename tag of input genome to new tag id

# Download dataset

In [13]:
'''
from Bio import Entrez
Entrez.email = "abolhasani.eliya@gmail.com"     
with Entrez.esearch(db='nucleotide', term="Arabidopsis thaliana") as handle:
    result = Entrez.read(handle)

print(result)
genome_ids = result['IdList']

for genome_id in genome_ids:
    print(genome_id)
    record = Entrez.efetch(db="nucleotide", id=genome_id, rettype="fasta", retmode="text")        
    with open(f'{genome_id}.fasta', 'w') as f:
        f.write(record.read())
    break
''';
'''
from Bio import Entrez
Entrez.email = "abolhasani.eliya@gmail.com"     
record = Entrez.efetch(db="nucleotide", id="NC_054143.4", rettype="fasta", retmode="text")        
with open(f'data.fasta', 'w') as f:
    f.write(record.read())
''';

In [14]:
!wget https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/000/439/995/GCA_000439995.3_AzaInd2.1/GCA_000439995.3_AzaInd2.1_genomic.fna.gz

--2021-11-14 18:10:04--  https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/000/439/995/GCA_000439995.3_AzaInd2.1/GCA_000439995.3_AzaInd2.1_genomic.fna.gz
Resolving ftp.ncbi.nlm.nih.gov (ftp.ncbi.nlm.nih.gov)... 130.14.250.10, 130.14.250.7, 2607:f220:41e:250::11, ...
Connecting to ftp.ncbi.nlm.nih.gov (ftp.ncbi.nlm.nih.gov)|130.14.250.10|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 85647577 (82M) [application/x-gzip]
Saving to: ‘GCA_000439995.3_AzaInd2.1_genomic.fna.gz’

CA_000439995.3_AzaI  27%[====>               ]  22.82M  7.26MB/s    eta 8s     ^C


In [263]:
!gzip -d ./GCA_000439995.3_AzaInd2.1_genomic.fna.gz

# Download data from Mirbase

In [28]:
directory = 'miRBase_driven_data'

In [18]:
base = "https://www.mirbase.org/ftp/CURRENT"        
!rm -r {directory}
!mkdir -p {directory}
!wget {base}/aliases.txt.gz -P ./{directory}/       ; gzip -d ./{directory}/aliases.txt.gz 
!wget {base}/hairpin.fa.gz -P ./{directory}/           ; gzip -d ./{directory}/hairpin.fa.gz 
!wget {base}/hairpin_high_conf.fa.gz -P ./{directory}/ ; gzip -d ./{directory}/hairpin_high_conf.fa.gz 
!wget {base}/mature.fa.gz -P ./{directory}/            ; gzip -d ./{directory}/mature.fa.gz 
!wget {base}/mature_high_conf.fa.gz -P ./{directory}/  ; gzip -d ./{directory}/mature_high_conf.fa.gz
!wget {base}/miRNA.str.gz -P ./{directory}/            ; gzip -d ./{directory}/miRNA.str.gz 
!wget {base}/miRNA.xls.gz -P ./{directory}/            ; gzip -d ./{directory}/miRNA.xls.gz 
!wget {base}/organisms.txt.gz -P ./{directory}/        ; gzip -d ./{directory}/organisms.txt.gz

--2021-11-14 19:08:31--  https://www.mirbase.org/ftp/CURRENT/aliases.txt.gz
Resolving www.mirbase.org (www.mirbase.org)... 130.88.97.249
Connecting to www.mirbase.org (www.mirbase.org)|130.88.97.249|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 480536 (469K) [application/x-gzip]
Saving to: ‘./miRBase_driven_data/aliases.txt.gz’


2021-11-14 19:08:32 (469 KB/s) - ‘./miRBase_driven_data/aliases.txt.gz’ saved [480536/480536]

--2021-11-14 19:08:33--  https://www.mirbase.org/ftp/CURRENT/hairpin.fa.gz
Resolving www.mirbase.org (www.mirbase.org)... 130.88.97.249
Connecting to www.mirbase.org (www.mirbase.org)|130.88.97.249|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 1547350 (1.5M) [application/x-gzip]
Saving to: ‘./miRBase_driven_data/hairpin.fa.gz’


2021-11-14 19:08:34 (995 KB/s) - ‘./miRBase_driven_data/hairpin.fa.gz’ saved [1547350/1547350]

--2021-11-14 19:08:35--  https://www.mirbase.org/ftp/CURRENT/hairpin_high_conf.fa.gz
Resol

In [29]:
df = fasta_to_df(f'./{directory}/mature.fa')
#df = fasta_to_df('./Data/mature_high_conf.fa')
df['organism'] = df['tag'].apply(lambda x: x[:3])
print(df.shape)
df.head(2)

(48885, 3)


Unnamed: 0,tag,data,organism
0,cel-let-7-5p MIMAT0000001 Caenorhabditis elega...,UGAGGUAGUAGGUUGUAUAGUU,cel
1,cel-let-7-3p MIMAT0015091 Caenorhabditis elega...,CUAUGCAAUUUUCUACCUUACC,cel


In [30]:
organism = pd.read_csv(f'./{directory}/organisms.txt',sep='\t')
organism.columns = [c.replace('#','') for c in organism.columns] # remove sharp from columns
print(organism.shape)
organism.head(2)

(285, 5)


Unnamed: 0,organism,division,name,tree,NCBI-taxid
0,aqu,AQU,Amphimedon queenslandica,Metazoa;Porifera;,400682
1,nve,NVE,Nematostella vectensis,Metazoa;Cnidaria;,45351


In [31]:
items = list(organism['tree'].unique())
items.sort(key=len)
items

['Viruses;',
 'Mycetozoa;',
 'Alveolata;',
 'Metazoa;Porifera;',
 'Metazoa;Cnidaria;',
 'Viridiplantae;Chlorophyta;',
 'Viridiplantae;Embryophyta;',
 'Viridiplantae;Coniferophyta;',
 'Viridiplantae;Magnoliophyta;',
 'Metazoa;Bilateria;Deuterostoma;',
 'Chromalveolata;Heterokontophyta;',
 'Metazoa;Bilateria;Ecdysozoa;Nematoda;',
 'Metazoa;Bilateria;Lophotrochozoa;Annelida;',
 'Metazoa;Bilateria;Lophotrochozoa;Nemertea;',
 'Metazoa;Bilateria;Lophotrochozoa;Mollusca;',
 'Viridiplantae;Magnoliophyta;monocotyledons;',
 'Metazoa;Bilateria;Deuterostoma;Hemichordata;',
 'Metazoa;Bilateria;Deuterostoma;Echinodermata;',
 'Metazoa;Bilateria;Lophotrochozoa;Brachiopoda;',
 'Metazoa;Bilateria;Ecdysozoa;Arthropoda;Hexapoda;',
 'Metazoa;Bilateria;Ecdysozoa;Arthropoda;Crustacea;',
 'Metazoa;Bilateria;Lophotrochozoa;Platyhelminthes;',
 'Metazoa;Bilateria;Ecdysozoa;Arthropoda;Chelicerata;',
 'Metazoa;Bilateria;Ecdysozoa;Arthropoda;Mandibulata;',
 'Viridiplantae;Magnoliophyta;eudicotyledons;Poaceae;',
 'M

In [32]:
selectedTree = organism[organism['tree'].apply(lambda x: "Viridiplantae;" in x)]
print(selectedTree.shape)
selectedTree.head(5)

(86, 5)


Unnamed: 0,organism,division,name,tree,NCBI-taxid
66,cre,CRE,Chlamydomonas reinhardtii,Viridiplantae;Chlorophyta;,3055
67,pta,PTA,Pinus taeda,Viridiplantae;Coniferophyta;,3352
68,ppt,PPT,Physcomitrella patens,Viridiplantae;Embryophyta;,3218
69,smo,SMO,Selaginella moellendorffii,Viridiplantae;Embryophyta;,88036
70,ath,ATH,Arabidopsis thaliana,Viridiplantae;Magnoliophyta;eudicotyledons;Bra...,3702


In [33]:
selected = df[df['organism'].isin(selectedTree['organism'])]
print(selected.shape)
selected.head()

(10414, 3)


Unnamed: 0,tag,data,organism
316,ath-miR156a-5p MIMAT0000166 Arabidopsis thalia...,UGACAGAAGAGAGUGAGCAC,ath
317,ath-miR156a-3p MIMAT0031865 Arabidopsis thalia...,GCUCACUGCUCUUUCUGUCAGA,ath
318,ath-miR156b-5p MIMAT0000167 Arabidopsis thalia...,UGACAGAAGAGAGUGAGCAC,ath
319,ath-miR156b-3p MIMAT0031866 Arabidopsis thalia...,UGCUCACCUCUCUUUCUGUCAGU,ath
320,ath-miR156c-5p MIMAT0000168 Arabidopsis thalia...,UGACAGAAGAGAGUGAGCAC,ath


In [34]:
df_to_fasta(selected,'./Temp/mature_microRNA_queries.fasta')

# Remove redundant

## cdhit-est

In [41]:
!cdhit/cd-hit-est -i ./Temp/mature_microRNA_queries.fasta  -o ./Temp/NR_mature_microRNA_queries.fasta \
    -c 1 -r 0 -G 1 -g 1 -b 30 -l 10 -aL 0 -AL 99999999 -aS 0 \
    -AS 99999999 -s 0 -S 0 

Program: CD-HIT, V4.8.1 (+OpenMP), Oct 23 2021, 21:45:39
Command: cdhit/cd-hit-est -i
         ./Temp/mature_microRNA_queries.fasta -o
         ./Temp/NR_mature_microRNA_queries.fasta -c 1 -r 0 -G 1
         -g 1 -b 30 -l 10 -aL 0 -AL 99999999 -aS 0 -AS 99999999
         -s 0 -S 0

Started: Sun Nov 14 19:23:14 2021
                            Output                              
----------------------------------------------------------------
total seq: 10414
longest and shortest : 28 and 17
Total letters: 222978
Sequences have been sorted

Approximated minimal memory consumption:
Sequence        : 1M
Buffer          : 1 X 12M = 12M
Table           : 1 X 16M = 16M
Miscellaneous   : 0M
Total           : 30M

Table limit with the given memory limit:
Max number of representatives: 4000000
Max number of word counting entries: 96149440

comparing sequences from          0  to      10414
..........    10000  finished       5817  clusters

    10414  finished       6028  clusters

Approximate

## reformat

In [42]:
with open('./Temp/NR_mature_microRNA_queries.fasta.clstr','r') as file:
    text = file.read()
lines = [line for line in text.split('\n') if len(line) > 0]
cluster = []
seqid = []
last_cluster = ""
for l in lines:
    if(l[0]=='>'):        
        last_cluster = l.replace('>Cluster ',"C")
    else:        
        cluster.append(last_cluster)
        seqid.append(l.split(', >')[1].split('...')[0])                
seq2cluster = pd.DataFrame({'seqid': seqid,'cluster': cluster})
print(seq2cluster.shape)
seq2cluster.head(2)    

(10414, 2)


Unnamed: 0,seqid,cluster
0,cst-miR11332,C0
1,stu-miR7994b-5p,C1


In [43]:
df = fasta_to_df("./Temp/mature_microRNA_queries.fasta")
df['accession'] = df['tag'].apply(lambda x : x.split(' ')[0])
seq2cluster = pd.merge(df,seq2cluster,how="inner",left_on='accession',right_on="seqid")[['cluster','seqid','tag']]
print(seq2cluster.shape)
display(seq2cluster.head(2))
seq2cluster.to_csv('./Temp/seq2cluster.csv',index=False)

(10414, 3)


Unnamed: 0,cluster,seqid,tag
0,C5495,ath-miR156a-5p,ath-miR156a-5p MIMAT0000166 Arabidopsis thalia...
1,C1199,ath-miR156a-3p,ath-miR156a-3p MIMAT0031865 Arabidopsis thalia...


In [44]:
# todo: sorted first by cluster then by seqid
seq2cluster.sort_values("cluster").head(2)

Unnamed: 0,cluster,seqid,tag
9422,C0,cst-miR11332,cst-miR11332 MIMAT0044622 Cucumis sativus miR1...
7002,C1,stu-miR7994b-5p,stu-miR7994b-5p MIMAT0031188 Solanum tuberosum...


In [45]:
df = fasta_to_df("./Temp/NR_mature_microRNA_queries.fasta")
df['tag'] = df['tag'].apply(lambda x : x.split(' ')[0])
df = pd.merge(df,seq2cluster,how="inner",left_on='tag',right_on="seqid")[['cluster','data']]

lines = []
df.apply(lambda row: lines.append(f">{row['cluster']}\n{row['data']}\n"),axis=1)
print(df.shape)
with open('./Temp/BLASTn_queries.fasta','w') as file:
    file.write(''.join(lines))

(6028, 2)


# BlastN

!sudo apt-get install ncbi-blast+


In [46]:
!makeblastdb -in input_genome.fna \
             -dbtype nucl \
             -out ./Temp/blastn_database



Building a new DB, current time: 11/14/2021 19:23:46
New DB name:   /home/jupyter/plant_microRNA_prediction/Temp/blastn_database
New DB title:  input_genome.fna
Sequence type: Nucleotide
Keep MBits: T
Maximum file size: 1000000000B
Adding sequences from FASTA; added 126142 sequences in 9.84965 seconds.


In [17]:
header = 'qseqid sseqid qstart qend sstart send qseq sseq evalue bitscore score length pident nident mismatch positive gapopen gaps ppos frames qframe sframe sstrand qcovs qcovhsp qlen slen'

In [18]:
!blastn -query ./Temp/BLASTn_queries.fasta \
        -out ./Temp/BLASTn_result \
        -num_threads {mp.cpu_count()} \
        -db ./Temp/blastn_database \
        -word_size 7 \
        -penalty -3 \
        -reward 2 \
        -gapopen 5 \
        -gapextend 2 \
        -outfmt '6 qseqid sseqid qstart qend sstart send qseq sseq evalue bitscore score length pident nident mismatch positive gapopen gaps ppos frames qframe sframe sstrand qcovs qcovhsp qlen slen'       

In [19]:
df_blastn = pd.read_csv('./Temp/BLASTn_result', sep='\t',header=None)
df_blastn.columns = header.replace("  "," ").split(" ")
print(df_blastn.shape)
df_blastn.head(2)

(326849, 27)


Unnamed: 0,qseqid,sseqid,qstart,qend,sstart,send,qseq,sseq,evalue,bitscore,...,gaps,ppos,frames,qframe,sframe,sstrand,qcovs,qcovhsp,qlen,slen
0,C5495,AMWY02099822.1,1,20,1769,1750,TGACAGAAGAGAGTGAGCAC,TGACAGAAGAGAGTGAGCAC,0.004,37.4,...,0,100.0,1/-1,1,-1,minus,100,100,20,3308
1,C5495,AMWY02082313.1,1,20,5954,5973,TGACAGAAGAGAGTGAGCAC,TGACAGAAGAGAGTGAGCAC,0.004,37.4,...,0,100.0,1/1,1,1,plus,100,100,20,8471


In [20]:
threshold = 4
df_blastn['Nonconformity'] = df_blastn['qlen'] - (abs(df_blastn['qend'] - df_blastn['qstart']) + 1) + df_blastn['gaps'] + df_blastn['mismatch']
df_blastn = df_blastn[df_blastn['Nonconformity'] <= threshold]
print(df_blastn.shape)
df_blastn.head(2)

(80217, 28)


Unnamed: 0,qseqid,sseqid,qstart,qend,sstart,send,qseq,sseq,evalue,bitscore,...,ppos,frames,qframe,sframe,sstrand,qcovs,qcovhsp,qlen,slen,Nonconformity
0,C5495,AMWY02099822.1,1,20,1769,1750,TGACAGAAGAGAGTGAGCAC,TGACAGAAGAGAGTGAGCAC,0.004,37.4,...,100.0,1/-1,1,-1,minus,100,100,20,3308,0
1,C5495,AMWY02082313.1,1,20,5954,5973,TGACAGAAGAGAGTGAGCAC,TGACAGAAGAGAGTGAGCAC,0.004,37.4,...,100.0,1/1,1,1,plus,100,100,20,8471,0


In [21]:
# remore redundancy and hold best one base of Nonconformity value
df_blastn = df_blastn.sort_values(["Nonconformity", "evalue"], ascending = (True, True))
df_blastn = df_blastn.drop_duplicates(subset=['sseqid','sstart', 'send','sstrand'], keep='first')
df_blastn.to_csv('./Temp/filtered_out_blastn.csv')
print(df_blastn.shape)

(66445, 28)


# Result of the blastn to bed file

In [22]:
flanking_value = 200
df = df_blastn[['sseqid', 'sstart', 'send', 'sstrand','slen']]
df['ones'] = 1

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['ones'] = 1


In [23]:
def switch(row):
    if(row['sstart'] > row['send']):        
        temp = row['sstart']
        row['sstart'] = row['send']
        row['send'] = temp
    return row
df = df.apply(lambda row: switch(row), axis=1)

In [24]:
def convert(inp):
    if(inp == "plus"):
        return "forward"
    if(inp == "minus"):
        return "reverse"
    raise Exception('Error, sstrand contains illegal word! only "plus" and "minus" are allowed')
df['strand'] = df['sstrand'].apply(lambda x: convert(x))

In [25]:
def convert2sign(inp):
    if(inp == "plus"):
        return "+"
    if(inp == "minus"):
        return "-"
    raise Exception('Error, sstrand contains illegal word! only "plus" and "minus" are allowed')
df['sign'] = df['sstrand'].apply(lambda x: convert2sign(x))

In [26]:
df['hit_length'] = df.apply(lambda row: abs(row['send'] - row['sstart']) + 1 ,axis=1)

## convert sstart and send from location to index (range)

In [27]:
df['sstart'] = df['sstart'].apply(lambda x: x - 1)

In [28]:
df['downstream_flanking'] = df['sstart'].apply(lambda x:  flanking_value if x > flanking_value else x)

In [29]:
df['upstream_flanking'] = df.apply(lambda row:  flanking_value if (row['send']+flanking_value) <= row['slen'] else row['slen'] - row['send'],axis=1)

In [30]:
df['hit_start'] = df.apply(lambda row: row['downstream_flanking'] if row['sign'] == "+" else row['upstream_flanking'],axis=1)

In [31]:
df['hit_end'] = df.apply(lambda row: row['downstream_flanking'] + row['hit_length'] if row['sign'] == "+" else row['upstream_flanking'] + row['hit_length'],axis=1)

In [32]:
df['sstart'] = df['sstart'].apply(lambda x: max(x - flanking_value, 0))
df['send'] = df.apply(lambda row: min(row['send'] + flanking_value , row['slen']),axis=1)

In [33]:
df['tag'] = df.apply(lambda row: f">{row['sseqid']}:{row['sstart']}-{row['send']}({row['sign']})",axis=1)
df['reformated_tag'] = df['tag'].apply(lambda t: reformat(t))
df[['tag', 'reformated_tag', 'hit_start', 'hit_end']].to_csv('./Temp/hit_index_info.csv')#, index=False)

In [34]:
df['location_tag'] = df.apply(lambda row: f">{row['sseqid']}|{row['sign']}|{row['sstart'] + 1}-{row['send']}|{row['hit_start']+1}-{row['hit_end']}",axis=1)
df[['location_tag']].to_csv('./Temp/pipe_seprated_location_list.csv',index=False)

In [35]:
df[['sseqid','sstart','send','strand','ones', 'sign']].to_csv('./Temp/extension_index.bed', 
        index=False, header=False, sep="\t")

# Extention


In [36]:
# !sudo apt-get install bedtools

In [37]:
!bedtools getfasta -fi ./input_genome.fna -fo ./Temp/extended_original.txt -s -bed ./Temp/extension_index.bed
!rm input_genome.fna.fai

index file ./input_genome.fna.fai not found, generating...


In [34]:
# todo: remove duplicated
'''
df = fasta_to_df("./Temp/extended.txt")
df = df.drop_duplicates(subset=['tag'], keep='first')
df_to_fasta(df,"./Temp/extended.txt")
len(df['tag'].unique())
''';

# Convert hit region to upper case and other region to lower case

In [38]:
ext = fasta_to_df('./Temp/extended_original.txt')
info = pd.read_csv('./Temp/hit_index_info.csv')
info['tag'] = info['tag'].apply(lambda x: x[1:])
print(info.shape)
info.head(2)

(66445, 5)


Unnamed: 0.1,Unnamed: 0,tag,reformated_tag,hit_start,hit_end
0,132836,AMWY02059828.1:2832-3256(+),>AMWY020598281_2832-3256_+_,200,224
1,300170,AMWY02004761.1:1853-2277(+),>AMWY020047611_1853-2277_+_,200,224


In [39]:
ext = ext.sort_values(by=['tag']).reset_index()
ext['help_tag'] = ext.apply(lambda r: r['tag']+ str(r.name),axis=1)
del ext['tag']

info = info.sort_values(by=['tag']).reset_index()
info['help_tag'] = info.apply(lambda row: row['tag']+ str(row.name),axis=1)
def redefined_tag(row):
    tag = row['tag']
    [sstart, send] = tag.split(':')[-1].split('(')[0].split('-')
    sstart = int(sstart) + 1
    sign = tag.split('(')[-1].split(')')[0]    
    return f"{tag.split(':')[0]}|{sstart}-{send}|{row['hit_start']+1}-{row['hit_end']}|{sign}"
info['tag'] = info.apply(lambda row: redefined_tag(row),axis=1)
ext = pd.merge(ext,info,how='inner', on='help_tag')

def emphasis_hit(row):
    seq = list(row['data'].lower())            
    s = row['hit_start']
    e = row['hit_end']
    seq[s:e] = list(''.join(seq[s:e]).upper())    
    return ''.join(seq)
    
ext['data'] = ext.apply(lambda row: emphasis_hit(row),axis=1)
df_to_fasta(ext[['tag','data']],"./Temp/extended_modified.txt")

# Extended validation

In [None]:
df_blastn['hit'] = df_blastn['sseq'].apply(lambda x: x.replace('-', ''))
info = pd.read_csv('./Temp/hit_index_info.csv')
ext = fasta_to_df('./Temp/extended2.txt')

counter = 0
for index in df_blastn.index:
    hit = df_blastn['hit'][index]
    row = info[info['Unnamed: 0']== index].reset_index()
    tag = row['tag'][0][1:]
    hs = row['hit_start'][0]
    he = row['hit_end'][0]        
    tag = tag.replace('(+)',f"|{hs}-{he}(+)")
    tag = tag.replace('(-)',f"|{hs}-{he}(-)")            
    seq = ext[ext['tag']==tag]['data'].iloc[0]
    seq = ext[ext['tag']==tag]['data'].iloc[0]    
    if(seq[hs:he] != hit):
        print(tag, df_blastn['slen'][index])
        print(seq[hs:he])
        print(hit)
        print('\n\n')
        counter += 1                

In [178]:
df = pd.read_csv('check1.csv')
df.columns = ['tag', 'data']
def do(x):
    x = x[1:]
    x = x.split('|')
    s = int(x[2].split('-')[0]) - 1
    e = x[2].split('-')[1]
    return f"{x[0]}:{s}-{e}({x[1]})"
df['tag'] = df['tag'].apply(lambda x: do(x))
df.head(2)

Unnamed: 0,tag,data
0,AMWY02000003.1:4084-4506(+),ATACAATTGTCACATagtttacattattaatttccgcttaatttat...
1,AMWY02000003.1:4391-4810(+),ttctaattgcaaatttatgttatatttttaaatagaaaggGAGATT...


In [176]:
for t in df['tag']:
    check_seq = df[df['tag']==t]['data'].iloc[0]
    ext_seq = ext[ext['tag']==t]['data'].iloc[0]
    if(check_seq != ext_seq):
        print('error')        

In [167]:
ext = fasta_to_df('./Temp/extended.txt')
ext.head(2)

Unnamed: 0,tag,data
0,AMWY02059828.1:2832-3256(+),AAAGAATCAGCAATGGAAAAATAACCGGTTCTTAATTCAGcataac...
1,AMWY02004761.1:1853-2277(+),actaataatgCATGGCCATATATATCAAATCTACCATATgccattt...


# RNA 2d prediction

## Mfold

In [11]:
'''
# installation
!wget http://www.unafold.org/download/mfold-3.6.tar.gz
!tar -xvf ./mfold-3.6.tar.gz; rm ./mfold-3.6.tar.gz
%cd ./mfold-3.6
!./configure
!make
!make install
%cd ..
!sudo apt install texlive-font-utils
''';

In [12]:
#todo : add all hyperparameter(options) to GUI

In [110]:
counter = 0
base = "./secondary_structure/mfold/"
!rm -r {base}
!mkdir -p {base}
df = fasta_to_df('./Temp/extended_modified.txt')

for index, row in df.iterrows():    
    tag = reformat(row['tag'])
    if(not os.path.exists(base + tag)):
        os.makedirs(base + tag)            
    with open(base + f"{tag}/SEQ.FASTA",'w') as file:
        file.write(f">{row['tag']}\n{row['data']}")
    counter += 1    
    if(counter >= 1000):
        break

In [111]:
remove_lock = False
def run_mfold(tag):
    tag = reformat(tag)
    %cd {base + tag}
    !mfold  SEQ="SEQ.FASTA" T=20     
    if(not remove_lock):
        !find . -not -name "*.ct" -not -name "*.pdf" -not -name "*SEQ.FASTA" -not -type d -delete
    %cd ../../..

if __name__ == '__main__':        
    pool = mp.Pool(mp.cpu_count())  
    pool.map(run_mfold, df['tag'].iloc[:1000])

/home/jupyter/plant_microRNA_prediction/secondary_structure/mfold/AMWY020000301|687-1105|201-219|-/home/jupyter/plant_microRNA_prediction/secondary_structure/mfold/AMWY020001671|2603-3023|201-221|+/home/jupyter/plant_microRNA_prediction/secondary_structure/mfold/AMWY020000031|4085-4506|201-222|+/home/jupyter/plant_microRNA_prediction/secondary_structure/mfold/AMWY020000501|377-795|201-219|-/home/jupyter/plant_microRNA_prediction/secondary_structure/mfold/AMWY020001481|2932-3190|201-218|+/home/jupyter/plant_microRNA_prediction/secondary_structure/mfold/AMWY020002101|3109-3527|201-219|+/home/jupyter/plant_microRNA_prediction/secondary_structure/mfold/AMWY020001921|3070-3487|201-218|-/home/jupyter/plant_microRNA_prediction/secondary_structure/mfold/AMWY020002411|5665-6082|201-218|+/home/jupyter/plant_microRNA_prediction/secondary_structure/mfold/AMWY020002961|1232-1655|201-224|+/home/jupyter/plant_microRNA_prediction/secondary_structure/mfold/AMWY020002851|8518-8936|201-219|-/home/jupyter

15	140,10,20,30,16	5	2	16	150,15	40,50,60,4	14	14	3160,70,80,90,16		170,180,100,110,6	17	190,8	1	120,130,16	18	2	15	8	6	16	140,150,200,16	1	160,17	16	210,6	3	5	17	170,15	220,230,15	4	180,190,240,9	1	200,17	7	250,2	19	3	16	210,260,17	
Structure plots generated.
All done.
220,230,270,17	7	240,2	280,290,17	1	4	7	250,300,6	
Structure plots generated.
All done.
16	18	260,270,16	310,280,5	18	320,330,/home/jupyter/plant_microRNA_prediction2	3	
290,1	340,350,8	/home/jupyter/plant_microRNA_prediction/secondary_structure/mfold/AMWY020002411|8269-8690|201-222|+
4	300,310,320,17	/home/jupyter/plant_microRNA_prediction360,370,380,8	
18	18	mfold version 3.6
REUSE= NO
330,/home/jupyter/plant_microRNA_prediction/secondary_structure/mfold/AMWY020001481|2932-3190|201-221|+8	
390,400,410, 
End of Fill
3	18	2	340,350,360,5	Save file created using nafold.
Minimum folding energy is -159.90 kcal/mol.
Energy increment is 8.00 kcal/mol.
SEQ.pnt created.
Sequence length is 422
17	17	mfold version 3.6
H-num file

10	/home/jupyter/plant_microRNA_prediction/secondary_structure/mfold/AMWY020002741|1881-2301|201-221|+
Structure plots generated.
All done.


RNA free energy files (version 2.3) at 20 degrees created.
10	Suboptimal foldings created.
11	17	16	13	14	Energy dot plot created.
10	mfold version 3.6
10,20,30,18	4	40,50,60,70,80,15	REUSE= NO
8	90,100,9	/home/jupyter/plant_microRNA_prediction14	
/home/jupyter/plant_microRNA_prediction/secondary_structure/mfold/AMWY020002881|1667-2086|201-220|+1	110,120,SEQ.pnt created.
Sequence length is 421

8	19	5	11	11	3	RNA free energy files (version 2.3) at 20 degrees created.17	130,140,
Structure plots generated.
All done.
11	
10,mfold version 3.6
150,160,18	14	170,20,30,40,50,60,REUSE= NO
11	12	180,SEQ.pnt created.
5	70,80,90,15	190,Sequence length is 420
19	
Structure plots generated.
All done.
200,100,110,16	/home/jupyter/plant_microRNA_predictionRNA free energy files (version 2.3) at 20 degrees created.

/home/jupyter/plant_microRNA_prediction/seconda

8	
/home/jupyter/plant_microRNA_prediction/secondary_structure/mfold/AMWY020002891|2-420|201-219|-350,360,60,70,80,6	RNA free energy files (version 2.3) at 20 degrees created.
190,200,
2	15	Energy dot plot created.
10	370,380,390,13	90,100,10,20,210,16	/home/jupyter/plant_microRNA_predictionmfold version 3.6
400,410, 
End of Fill
Save file created using nafold.

/home/jupyter/plant_microRNA_prediction/secondary_structure/mfold/AMWY020003341|4703-5044|201-221|+
30,40,50,60,70,110,120,Minimum folding energy is -143.50 kcal/mol.
Energy increment is 7.17 kcal/mol.
220,80,11	7	7	23	2	90,100,REUSE= NO
130,140,230,240,mfold version 3.6
H-num file created from plot file.
8	7	110,120,
Structure plots generated.
All done.
REUSE= NO
9	SEQ.pnt created.
Sequence length is 419
150,160,1	250,260,130,SEQ.pnt created.
Sequence length is 342
1,2,3,4,RNA free energy files (version 2.3) at 20 degrees created.

Structure plots generated.
All done.
170,180,140,270,3	5,6,7,8,9,9	24	14	190,200,210,10,20,30,RN

10,11,12,13,14,1,2,3,4,14	240,5	80,90,100,RNA free energy files (version 2.3) at 20 degrees created.
Suboptimal foldings created.
Energy dot plot created.
1	12	15,16,17,18,19,20,1	
Structure plots generated.
All done.
11	10,11,12,13,5,6,7,8,250,110,/home/jupyter/plant_microRNA_prediction10	21,22,23,
/home/jupyter/plant_microRNA_prediction/secondary_structure/mfold/AMWY020000511|6024-6442|201-219|-10,20,30,40,50,60,
260,14,15,16, 
9,10,11,12,120,6	24,25,270,130,140,15	13, 
70,80,90,/home/jupyter/plant_microRNA_prediction26,27,28,29,mfold version 3.6

150,/home/jupyter/plant_microRNA_prediction/secondary_structure/mfold/AMWY020000381|375-652|57-78|-100,110,120,130,280,
REUSE= NO
30, 
1	160,140,5	290,300,/home/jupyter/plant_microRNA_prediction
/home/jupyter/plant_microRNA_prediction/secondary_structure/mfold/AMWY020003061|6941-7355|201-215|+7	26	
1	mfold version 3.6
150,160,170,310,13	SEQ.pnt created.
Sequence length is 419
7	15	6	180,320,330,REUSE= NO
170,180,20	RNA free energy files (ve

130,140,21	5	mfold version 3.6
Suboptimal foldings created.
Energy dot plot created.
4	5	6	150,18	7	REUSE= NO
H-num file created from plot file.
10	Suboptimal foldings created.
Energy dot plot created.
1	160,13	27	SEQ.pnt created.
4	1,2,3,4,170,180,Suboptimal foldings created.
11	12	Sequence length is 419
5,6,7,8,
Structure plots generated.
12	19	190,All done.
Energy dot plot created.
10	13	Suboptimal foldings created.
Energy dot plot created.
RNA free energy files (version 2.3) at 20 degrees created.
10,9,10,11,12,13,200,9	6	6	10	20,30,40,50,14,15, 
5	7	60,70,80,22	19	210,11	90,100,8	28	220,230,14	2	5	/home/jupyter/plant_microRNA_prediction110,120,
/home/jupyter/plant_microRNA_prediction/secondary_structure/mfold/AMWY020003871|3055-3474|201-220|+
240,13	14	130,140,12	11	13	1	250,mfold version 3.6
150,7	11	7	Suboptimal foldings created.
Energy dot plot created.
10	REUSE= NO
260,270,
Structure plots generated.
All done.
160,170,23	280,8	6	180,3	1	SEQ.pnt created.
Sequence length is 420


10,20,30,270,1,2,REUSE= NO
6	Suboptimal foldings created.
Energy dot plot created.
10	6	40,50,60,6	Suboptimal foldings created.
Energy dot plot created.
10	3,4,5,6,7,280,290,21	2	70,80,90,5	SEQ.pnt created.
Sequence length is 418
8,9,10,11,100,110,300,310,6	/home/jupyter/plant_microRNA_prediction
RNA free energy files (version 2.3) at 20 degrees created.
/home/jupyter/plant_microRNA_prediction/secondary_structure/mfold/AMWY020002741|1954-2372|201-219|+
/home/jupyter/plant_microRNA_prediction
12,13, 
120,130,22	2	7	320,330,340,/home/jupyter/plant_microRNA_prediction/secondary_structure/mfold/AMWY020001361|3637-4056|201-220|+
10,20,30,5	140,350,360,370,mfold version 3.6
40,50,60,1	13	150,8	4	380,390,400,410,mfold version 3.6
11	REUSE= NO
70,80,160,7	7	3	 
End of Fill
Save file created using nafold.
Minimum folding energy is -128.00 kcal/mol.
Energy increment is 6.40 kcal/mol.
11	1	22	REUSE= NO
90,100,110,7	170,Suboptimal foldings created.
Energy dot plot created.
SEQ.pnt created.
Sequenc

All done.
RNA free energy files (version 2.3) at 20 degrees created.
10,20,30,40,1,2,3,4,5,340,350,360,370,290,300,10,20,30,3	H-num file created from plot file.
150,H-num file created from plot file.
340,350,Suboptimal foldings created.
Energy dot plot created.
230,380,390,400,410,310,6,7,8,9,50,60,70,1	40,50,60,70,10	160,170,360,370,380, 
End of Fill
Save file created using nafold.
Minimum folding energy is -114.60 kcal/mol.
13	
Structure plots generated.
All done.
1,2,3,240,1,2,320,330,80,90,100,110, 
End of Fill
Save file created using nafold.
Minimum folding energy is -141.10 kcal/mol.
Energy increment is 7.05 kcal/mol.
10,11,12,13,14,80,90,Energy increment is 5.73 kcal/mol.
180,4,5,6,7,8,9,250,340,350,11	120,130,/home/jupyter/plant_microRNA_prediction
14	100,110,/home/jupyter/plant_microRNA_prediction/secondary_structure/mfold/AMWY020000411|3319-3666|201-222|+3,4,5,6,7,1	
15,16,17,18,19,10,11, 
120,130,6	13	140,150,8, 
360,370,380,390,260,
Structure plots generated.
All done.
H-nu

3	9,10,11,12,100,110,3	170,11	REUSE= NO
5	 
310,320,7	13, 
180,3	23	SEQ.pnt created.
Sequence length is 418
Suboptimal foldings created.
Energy dot plot created.
10	120,130,SEQ.pnt created.
Sequence length is 420
RNA free energy files (version 2.3) at 20 degrees created.
1	330,340,350,360,190,7	140,150,370,380,390,400,410, 
End of Fill
Save file created using nafold.
10,20,30,200,Minimum folding energy is -106.80 kcal/mol.
Energy increment is 5.34 kcal/mol.
RNA free energy files (version 2.3) at 20 degrees created.
10,6	8	160,170,40,50,60,70,13	Suboptimal foldings created.
9	20,30,40,50,60,70,210,Suboptimal foldings created.
Energy dot plot created.
80,90,100,180,190,4	3	Energy dot plot created.
10	H-num file created from plot file.
80,90,2	220,8	12	10	2	200,Suboptimal foldings created.
Energy dot plot created.
6	110,120,4	11	230,100,110,120,1,2,3,4,210,10	130,140,240,5,6,7,8,130,8	20	4	150,220,230,140,150, 
250,9	240,250,7	160,1	160,170,180,260,11	170,180,260,270,280,3	5	4	
Structure 

/home/jupyter/plant_microRNA_prediction/secondary_structure/mfold/AMWY020002591|8479-8900|201-222|-80,90,
110,120,1	180,200,Suboptimal foldings created.
310,320,100,SEQ.pnt created.
130,210,190,Suboptimal foldings created.
Energy dot plot created.
10	Energy dot plot created.
1	mfold version 3.6
110,120,13	Sequence length is 416
330,340,140,200,14	1	/home/jupyter/plant_microRNA_prediction220,230,7	130,
350,360,370,380,RNA free energy files (version 2.3) at 20 degrees created.
REUSE= NO
/home/jupyter/plant_microRNA_prediction/secondary_structure/mfold/AMWY020002001|10002-10422|201-221|+210,220,150,160,
10,20,30,40,50,SEQ.pnt created.
240,250,390,400,410, 
End of Fill
19	140,150,/home/jupyter/plant_microRNA_predictionSuboptimal foldings created.
170,230,
14	/home/jupyter/plant_microRNA_prediction/secondary_structure/mfold/AMWY020001841|252-670|201-219|+8	60,70,260,Save file created using nafold.
Minimum folding energy is -136.10 kcal/mol.
Sequence length is 422

Structure plots generated.

4,5,6,7,8,3	40,50,21,22,23,24,8	1	20	5	60,70,9,10,13	25, 

Structure plots generated.
All done.
SEQ.pnt created.
Sequence length is 422
11, 
80,90,100,8	7	12	11	Suboptimal foldings created.
Energy dot plot created.
Suboptimal foldings created.
Energy dot plot created.
26	110,120,4	RNA free energy files (version 2.3) at 20 degrees created.
10	10	130,140,6	10,20,30,40,50,60,8	3	5	2	150,Suboptimal foldings created.
Energy dot plot created.
70,80,90,/home/jupyter/plant_microRNA_prediction9	
/home/jupyter/plant_microRNA_prediction/secondary_structure/mfold/AMWY020002321|5520-5938|201-219|+160,170,6	
10	21	100,110,4	20	1	180,120,130,140,8	Suboptimal foldings created.
13	9	12	190,150,mfold version 3.6
5	Energy dot plot created.
10	11	200,27	160,170,REUSE= NO
9	4	210,220,7	11	6	180,190,
Structure plots generated.
All done.
230,3	SEQ.pnt created.
7	11	21	22	200,5	2	Sequence length is 419
240,210,RNA free energy files (version 2.3) at 20 degrees created.
13	250,9	14	6	220,10,20,30,
Structure plo

10	250,220,230,50,60,70,23,24,25,26,Suboptimal foldings created.
15,16, 
7,8,9,10,11,210,390,400,410,420, 
End of Fill
260,27,28,29,/home/jupyter/plant_microRNA_prediction80,90,14	240,9	12,13,14,
Energy dot plot created.
10	220,230,Save file created using nafold.
Minimum folding energy is -138.90 kcal/mol.
Energy increment is 6.95 kcal/mol.
100,110,270,30,31,3	5	1	/home/jupyter/plant_microRNA_prediction/secondary_structure/mfold/AMWY020000811|5090-5507|201-218|+250,260,22	
15,16,17,120,240,280,290,9	 
270,4	
Structure plots generated.
All done.
130,140,6	
Structure plots generated.
All done.
250,260,300,H-num file created from plot file.
1,mfold version 3.6
13	1	18,19,20,21,22,280,150,310,2,3,Suboptimal foldings created.
Energy dot plot created.
270,280,290,23,24,25,320,160,10	11	4,5,6,7,8,300,290,REUSE= NO
26,27,28,170,15	11	330,340,9,10,11,12,310,320, 
300,310,320,/home/jupyter/plant_microRNA_prediction180,
Structure plots generated.
All done.
SEQ.pnt created.
Sequence length is 418



12	280,2	11	H-num file created from plot file.
11	10	290,6	
Structure plots generated.
All done.
2	1,2,2	4	300,310,3,4,5,6,7,8,320,18	2	
Structure plots generated.
All done.
16	9,10,11,12,13, 
330,340,5	350,360,20	17	6	8	370,380,390,400,19	/home/jupyter/plant_microRNA_prediction15	
/home/jupyter/plant_microRNA_prediction/secondary_structure/mfold/AMWY020000471|9036-9454|201-219|-
3	13	410, 
End of Fill
Save file created using nafold.
11	12	3	12	5	7	3	Minimum folding energy is -129.30 kcal/mol.
Energy increment is 6.46 kcal/mol.
/home/jupyter/plant_microRNA_predictionSuboptimal foldings created.
Energy dot plot created.
mfold version 3.6

/home/jupyter/plant_microRNA_prediction/secondary_structure/mfold/AMWY020002371|533-804|201-219|+
3	10	H-num file created from plot file.
REUSE= NO
3	mfold version 3.6
19	1,2,3,4,21	REUSE= NO
17	SEQ.pnt created.
Sequence length is 419
6	5,6,7,8,18	SEQ.pnt created.
7	1	16	9	9,10,11,12,14	Sequence length is 272
RNA free energy files (version 2.3) at 20 

3	210,220,H-num file created from plot file.
3	18	280,290,170,280,290,230,25	1,2,3,4,300,310,Suboptimal foldings created.
Energy dot plot created.
mfold version 3.6
300,310,180,240,8	320,330,5,6,7,8,9,10,9	10	REUSE= NO
320,330,32	250,260,190,200,210,340,350,/home/jupyter/plant_microRNA_prediction11,12,13,Suboptimal foldings created.SEQ.pnt created.
Sequence length is 420

/home/jupyter/plant_microRNA_prediction/home/jupyter/plant_microRNA_prediction/secondary_structure/mfold/AMWY020002921|2640-3061|201-222|-340,350,220,230,

/home/jupyter/plant_microRNA_prediction/secondary_structure/mfold/AMWY020002481|3479-3899|201-221|-270,360,370,20	7	
18	14,15,16,
Energy dot plot created.
10	2	360,370,380,390,240,250,280,290,RNA free energy files (version 2.3) at 20 degrees created.
380,390,400,410,420, 
End of Fill
400,410, 
End of Fill
Save file created using nafold.
17,18,19,20,21,300,8	12	4	9	4	260,270,280,290,Save file created using nafold.
Minimum folding energy is -169.00 kcal/mol.
Energy i

16	10,11,12,13,120,130,5	160,170,20,21,140,/home/jupyter/plant_microRNA_prediction
1,27	17	/home/jupyter/plant_microRNA_prediction/secondary_structure/mfold/AMWY020000501|1-282|201-219|-14,15,16,19	140,150,180,190,200,
Structure plots generated.
All done.

 
150,2,3,4,5,6,7,160,170,17, 
9	4	Suboptimal foldings created.
Energy dot plot created.
210,220,230,240,8,9,10,11,12,160,170,180,180,mfold version 3.6
250, 
End of Fill
Save file created using nafold.
10	190,13,14,15,12	16	12	REUSE= NO
Minimum folding energy is -89.10 kcal/mol.
Energy increment is 4.46 kcal/mol.
20	200,190,200,16,17,18,12	1	210,9	13	
/home/jupyter/plant_microRNA_predictionSEQ.pnt created.
Sequence length is 28219, 
1	H-num file created from plot file.
/home/jupyter/plant_microRNA_prediction/secondary_structure/mfold/AMWY020001671|2375-2791|201-217|+210,220,
6	18	
1,2,3,4,5,220,Suboptimal foldings created.
28	1	RNA free energy files (version 2.3) at 20 degrees created.
230,240,6,7,8,9,10,11,12,
Structure plots genera

/home/jupyter/plant_microRNA_prediction/secondary_structure/mfold/AMWY020003891|9777-10195|201-219|-1	27,28,29,180,
mfold version 3.6
30,40,50,15	290,400,410, 
End of Fill
Save file created using nafold.
12	190, 
3	60,70,80,90,REUSE= NO
Minimum folding energy is -152.60 kcal/mol.
Energy increment is 7.63 kcal/mol.
mfold version 3.6
300,310,100,110,1	REUSE= NO
320,330,SEQ.pnt created.

Structure plots generated.
All done.
200,14	13	120,22	340,350,Sequence length is 310
RNA free energy files (version 2.3) at 20 degrees created.
/home/jupyter/plant_microRNA_prediction210,5	
Structure plots generated.
All done.

H-num file created from plot file.
1,13	6	12	SEQ.pnt created.
Sequence length is 419
/home/jupyter/plant_microRNA_prediction/secondary_structure/mfold/AMWY020004361|2620-3036|201-217|-130,140,
360,12	220,2,3,4,5,6,10,20,30,40,370,380,390,400,150,160,13	230,7,8,9,10,mfold version 3.6
50,60,70,80,90,RNA free energy files (version 2.3) at 20 degrees created.
16	410, 
End of Fill
Save 

/home/jupyter/plant_microRNA_prediction
140,4,5,6,7,8,9,/home/jupyter/plant_microRNA_prediction/secondary_structure/mfold/AMWY020001081|1319-1736|201-218|-
10,11,12,Suboptimal foldings created.
280,1	Sequence length is 418
250,260,270,5	10,11,12,13,14,150,9	13,14, 
RNA free energy files (version 2.3) at 20 degrees created.
10,Energy dot plot created.
1	280,290,300, 
End of Fill
Save file created using nafold.
9	15	290,300,8	mfold version 3.6
15,16,Minimum folding energy is -78.10 kcal/mol.
Energy increment is 3.90 kcal/mol.
20,30,40,50,160,170,310, 
60,70,80,180,REUSE= NO
18	16	H-num file created from plot file.
320,330,7	3	90,100,190,14	19	24	1,2,340,350,23	
Structure plots generated.
All done.
200,SEQ.pnt created.
Sequence length is 418
110,5	3,4,360,210,120,130,20	Suboptimal foldings created.
Energy dot plot created.
2	
Structure plots generated.
All done.
370,380,390, 
RNA free energy files (version 2.3) at 20 degrees created.
6	220,10	140,
Structure plots generated.
All done.
400,

100,110,120,70,80,240,250,1	340,350,360,19	360,370,290,300,60,70,80,90,100,/home/jupyter/plant_microRNA_prediction12	
1,2,3,4,5,130,140,/home/jupyter/plant_microRNA_prediction/secondary_structure/mfold/AMWY020004431|479-897|201-219|+20	90,100,
15	310,260,370,380,390,380,390,110,120,150,160,19	
Structure plots generated.
All done.
110,120,400,410,420, 
End of Fill
Save file created using nafold.

Structure plots generated.
All done.
20	18	8	6,7,8,9,10,320,330,340,270,280,400,410,420, 
End of Fill
Save file created using nafold.
130,140,mfold version 3.6
Suboptimal foldings created.
Minimum folding energy is -158.60 kcal/mol.
Energy increment is 7.93 kcal/mol.
170,180,Minimum folding energy is -129.30 kcal/mol.
Energy increment is 6.46 kcal/mol.
130,140, 
150,290,350,360,370,5	REUSE= NO
5	9	Energy dot plot created.
10	190,150,300,310,380,390,400,410, 
End of Fill
Save file created using nafold.160,SEQ.pnt created.
H-num file created from plot file.
H-num file created from plot file.
160,

180,190,REUSE= NO
250,Minimum folding energy is -139.20 kcal/mol.
Energy increment is 6.96 kcal/mol.
2	15	100,110,Suboptimal foldings created.
Energy dot plot created.
10	260,10,20,30,40,24	2	3	120,130,200,4	SEQ.pnt created.
Sequence length is 420
H-num file created from plot file.
3	270,280,50,60,70,140,210,14	1,2,290,300,80,90,100,/home/jupyter/plant_microRNA_prediction220,3	RNA free energy files (version 2.3) at 20 degrees created.
150,
8	/home/jupyter/plant_microRNA_prediction/secondary_structure/mfold/AMWY020005751|11448-11866|201-219|+3,4,5, 
110,3	310,320,
2	230,160,170,10,20,30,9	330,340,350,3	120,130,9	240,360,370,380,mfold version 3.6
40,50,60,180,Suboptimal foldings created.
Energy dot plot created.
1	140,150,390,400,410, 
End of Fill
3	250,REUSE= NO
Suboptimal foldings created.
Energy dot plot created.
10	190,3	70,80,90,11	Save file created using nafold.
Minimum folding energy is -131.80 kcal/mol.
16	4	25	160,170,5	260,100,110,4	200,SEQ.pnt created.
180,Energy increment is 

210,18,19,/home/jupyter/plant_microRNA_predictionREUSE= NO
150,160,14	16	10,20,30,40,REUSE= NO
10,20,30,100,110,120,210,SEQ.pnt created.
Sequence length is 418

70,80,90,20,21, 
/home/jupyter/plant_microRNA_prediction/secondary_structure/mfold/AMWY020006971|269-688|201-220|-220,170,40,50,60,50,60,
130,140,220,180,100,230,240,SEQ.pnt created.
Sequence length is 419
70,70,80,RNA free energy files (version 2.3) at 20 degrees created.
10,11	150,110,230,190,11	250,260,RNA free energy files (version 2.3) at 20 degrees created.
80,90,160,90,13	240,4	120,130,20,30,40,50,200,210,mfold version 3.6
2	270,100,110,10,20,30,100,110,120,170,13	REUSE= NO
15	140,150,250,60,70,80,90,24	5	3	220,230,13	280,290,180,130,140,40,50,60,70,260,270,120,100,110,1	240,250,SEQ.pnt created.
150,300,310,320,190,200,160,80,90,120,130,130,140,280,15	170,180,330,340,350,360,160,170,210,260,Sequence length is 420
100,110,290,300,Suboptimal foldings created.
150,140,150,180,190,270,280,220,230,370,380, 
End of Fill
Save f

11	6	7,8,9,10,11,/home/jupyter/plant_microRNA_prediction
/home/jupyter/plant_microRNA_prediction/secondary_structure/mfold/AMWY020004991|1861-2279|201-219|-240,250,
6	26	12,13,14,15,REUSE= NO
18	260,270,5	16,17,18,
Structure plots generated.
All done.
17	SEQ.pnt created.
Sequence length is 420
H-num file created from plot file.
13	mfold version 3.6
23	280,290,300,Suboptimal foldings created.
Energy dot plot created.
19,20,21,14	1,2,REUSE= NO
10	22, 
24	310,320,330,12	Suboptimal foldings created.

Structure plots generated.
All done.
3,4,RNA free energy files (version 2.3) at 20 degrees created.
10,2	SEQ.pnt created.
340,350,360,370,380, 
End of Fill
Save file created using nafold.
Energy dot plot created.
10	12	5,6,7,8,9,10,11,20,30,40,50,60,70,80,6	18	Sequence length is 419
7	Minimum folding energy is -134.10 kcal/mol.
Energy increment is 6.71 kcal/mol.
27	/home/jupyter/plant_microRNA_prediction
12,13,14,15,90,100,110,RNA free energy files (version 2.3) at 20 degrees created.
10,7	/ho

130,140,230,REUSE= NO
21	RNA free energy files (version 2.3) at 20 degrees created.
10,110,120,130,6	150,160,240,2	7	4	Suboptimal foldings created.
Energy dot plot created.
20,30,SEQ.pnt created.
170,250,140,150,4	40,50,60,10	14	Sequence length is 418
180,15	160,170,1	260,70,80,6	RNA free energy files (version 2.3) at 20 degrees created.
Suboptimal foldings created.

Structure plots generated.
All done.
180,190,270,280,90,13	Energy dot plot created.
10	190,200,10,20,30,40,50,6	290,100,110,21	200,7	3	210,60,70,80,90,300,310,120,130,5	3	210,220,7	8	320,330,340,100,110,22	220,5	140,11	15	5	230,350,360,/home/jupyter/plant_microRNA_prediction120,
230,150,160,/home/jupyter/plant_microRNA_prediction/secondary_structure/mfold/AMWY020006991|2778-3198|201-221|+240,250,260,
130,20	370,380,16	170,7	240,140,270,180,390,400,410, 
14	mfold version 3.6
11	280,290,300,150,160,4	7	250,End of Fill
Save file created using nafold.
Minimum folding energy is -149.40 kcal/mol.
Energy increment is 7.47 kcal/mo

7	9	400,410, 
End of Fill
Save file created using nafold.
200,210,mfold version 3.6
5	230,3	110,120,150,160,9	270,280,9	Suboptimal foldings created.
Energy dot plot created.
10	240,Minimum folding energy is -156.30 kcal/mol.
Energy increment is 7.82 kcal/mol.
220,REUSE= NO
3	130,140,170,21	250,290,300,8	SEQ.pnt created.
Sequence length is 418
8	
Structure plots generated.
All done.
H-num file created from plot file.
4	12	180,190,200,SEQ.pnt created.
Sequence length is 419
230,240,250,310,320,330,340,350,360,1,2,3,4,RNA free energy files (version 2.3) at 20 degrees created.
10,150,160,260,270,26	210,220,4	20,30,40,50,RNA free energy files (version 2.3) at 20 degrees created.
370,380, 
End of Fill
Save file created using nafold.
5,6,7,8, 
260,270,230,240,250,260,170,180,280,290,10,20,30,40,50,280,Minimum folding energy is -138.00 kcal/mol.
Energy increment is 6.90 kcal/mol.
60,70,80,90,
Structure plots generated.
All done.
4	270,280, 
End of Fill
Save file created using nafold.
300,310,1


1,2,3,12	110,300,Suboptimal foldings created.
Energy dot plot created.
1	/home/jupyter/plant_microRNA_prediction/secondary_structure/mfold/AMWY020005821|375-793|201-219|- 
H-num file created from plot file.
REUSE= NO
360,370,380,
4,5,6,7,8,9,RNA free energy files (version 2.3) at 20 degrees created.
/home/jupyter/plant_microRNA_prediction310,320,120,130,4	
1,2,10,11,12,13,390,400,410,420, 
End of Fill
/home/jupyter/plant_microRNA_prediction/secondary_structure/mfold/AMWY020008331|1451-1868|201-218|-
330,340,SEQ.pnt created.
13	10,20,30,40,50,60,140,150,mfold version 3.6
3,4,5,6,7,8,9,14,15,16,17,18,Save file created using nafold.
Minimum folding energy is -125.00 kcal/mol.
Energy increment is 6.25 kcal/mol.
Suboptimal foldings created.
Energy dot plot created.
350,360,70,80,90,10,11,12,REUSE= NO
160,Sequence length is 423
RNA free energy files (version 2.3) at 20 degrees created.
4	3	mfold version 3.6
10	19,20,21,22,29	/home/jupyter/plant_microRNA_prediction
370,380,390,400,/home/jupy


Suboptimal foldings created.
REUSE= NO
15	10,20,30,40,3	9	260,10,20,30,40,50,Energy dot plot created.
10	SEQ.pnt created.
Sequence length is 420

Structure plots generated.
All done.
mfold version 3.6
8	15	50,60,8	270,13	60,70,3	RNA free energy files (version 2.3) at 20 degrees created.
REUSE= NO
28	280,11	70,80,90,80,90,10,20,30,5	/home/jupyter/plant_microRNA_prediction290,17	
100,/home/jupyter/plant_microRNA_prediction/secondary_structure/mfold/AMWY020008111|12161-12578|201-218|-100,110,120,
SEQ.pnt created.
40,50,60,70,300,310,110,120,3	80,90,4	130,14	mfold version 3.6
/home/jupyter/plant_microRNA_prediction320,
130,100,110,Sequence length is 419
/home/jupyter/plant_microRNA_prediction/secondary_structure/mfold/AMWY020005931|315-733|201-219|+9	
11	140,150,140,150,4	330,340,350,16	120,RNA free energy files (version 2.3) at 20 degrees created.
10,20,30,40,
Structure plots generated.
All done.
REUSE= NO
160,160,360,370,mfold version 3.6
2	9	50,60,70,80,14	170,16	4	130,140,SEQ.pnt crea

20,30,40,260,Minimum folding energy is -129.80 kcal/mol.
Energy increment is 6.49 kcal/mol.
190,Minimum folding energy is -155.50 kcal/mol.
Energy increment is 7.78 kcal/mol.
40,50,60,70,8	SEQ.pnt created.
1	20	50,60,70,REUSE= NO
H-num file created from plot file.
270,280,80,90,16	Sequence length is 399
200,210,220,/home/jupyter/plant_microRNA_prediction3	3	80,90,13	
9	/home/jupyter/plant_microRNA_prediction/secondary_structure/mfold/AMWY020006941|288-705|201-218|+H-num file created from plot file.
1,2,3,SEQ.pnt created.
Sequence length is 419
290,100,110,RNA free energy files (version 2.3) at 20 degrees created.

7	230,240,20	100,110,1,2,3,4,120,130,4,5,6,RNA free energy files (version 2.3) at 20 degrees created.
300,10,20,30,40,50,7	250,260,270,120,130,17	10,20,30,40,140,mfold version 3.6
5,6,7,8,60,70,80,280,290,7,8,9,10,310,320,140,13	16	50,60,70, 
150,160,330,340,350,9	REUSE= NO
90,100,150,300,310,320,330,11,12,13,14,15,16,2	360,370,380,
Structure plots generated.
All done.
170,21

21	90,100,Energy dot plot created.
1	180,16	mfold version 3.6
110,/home/jupyter/plant_microRNA_prediction190,8	1	22	
13	H-num file created from plot file.
/home/jupyter/plant_microRNA_prediction/secondary_structure/mfold/AMWY020006381|2498-2916|201-219|-120,
2	3	200,REUSE= NO
1,2,3,4,8	130,140,SEQ.pnt created.
14	5,6,7,210,14	150,Sequence length is 419
mfold version 3.6
7	2	220,230,8,160,170,RNA free energy files (version 2.3) at 20 degrees created.
REUSE= NO
14	12	11	180,240,22	9,10,11,12,13,10,20,30,40,50,60,26	2	9	250,14	17	14,15, 
SEQ.pnt created.
70,190,9	2	260,4	Sequence length is 419
80,90,200,270,280,23	30	9	RNA free energy files (version 2.3) at 20 degrees created.
10,20,30,290,15	100,110,120,210,40,50,60,300,310,1	220,8	130,140,23	3	70,80,12	320,330,15	13	3	230,
Structure plots generated.
All done.
150,160,170,90,100,110,120,27	15	18	340,350,360,180,240,250,Suboptimal foldings created.
Energy dot plot created.
10	130,5	190,3	370,380,390,400,410, 
End of Fill

Structure plots 

Sequence length is 419
14	6	180,5	15	9	Minimum folding energy is -156.20 kcal/mol.
Energy increment is 7.81 kcal/mol.
8,9,10,11,RNA free energy files (version 2.3) at 20 degrees created.
SEQ.pnt created.
190,mfold version 3.6
H-num file created from plot file.
10,20,30, 
Sequence length is 417
3	3	200,4	1,2,H-num file created from plot file.
40,50,REUSE= NO
8	RNA free energy files (version 2.3) at 20 degrees created.
10,20,30,40,210,220,18	/home/jupyter/plant_microRNA_prediction
33	13	/home/jupyter/plant_microRNA_prediction/secondary_structure/mfold/AMWY020006441|7473-7892|201-220|+SEQ.pnt created.
Sequence length is 419
11	3,4,5,6,60,70,80,6	1,2,50,60,70,
230,7,8, 
4	3,4,5,6,7,Suboptimal foldings created.
Energy dot plot created.
80,90,100,4	90,100,110,240,RNA free energy files (version 2.3) at 20 degrees created.
mfold version 3.6
7	10	8,9,10,110,15	16	10,20,30,120,130,250,
Structure plots generated.
All done.
6	11,12,13,120,130,40,50,60,140,150,REUSE= NO
260,4	14,15,16,140,150,5	270

9	1	
10	2	19	300,RNA free energy files (version 2.3) at 20 degrees created.
8	2	RNA free energy files (version 2.3) at 20 degrees created.
310,320,10,20,30,40,50,5	21	1	10,20,30,40,330,340,350,6	60,70,15	12	3	4	Suboptimal foldings created.
Energy dot plot created.
50,60,70,360,370,15	8	1	80,90,100,3	380,390,400,410, 
End of Fill
10	80,90,100,3	2	110,120,130,3	110,120,
Structure plots generated.
All done.
Save file created using nafold.
Minimum folding energy is -96.30 kcal/mol.
Energy increment is 4.82 kcal/mol.
11	9	140,150,3	130,140,1	7	22	160,170,H-num file created from plot file.
150,160,16	2	6	1,2,3,4,13	180,16	170,180,9	/home/jupyter/plant_microRNA_prediction
4	190,5,6,7,8,9,5	/home/jupyter/plant_microRNA_prediction/secondary_structure/mfold/AMWY020005751|9054-9471|201-218|+190,200,4	
2	11	200,10,11,12, 
3	210,4	1	210,220,4	
Structure plots generated.
All done.

Structure plots generated.
All done.
220,230,mfold version 3.6
230,20	240,4	240,REUSE= NO
1	250,260,3	7	250,
Structure 

190,RNA free energy files (version 2.3) at 20 degrees created.
130,140,200,180,200,11	
Structure plots generated.
All done.
330,190,REUSE= NO
10,20,30,40,50,230,
Structure plots generated.
All done.
200,150,160,210,210,220,340,350,360,SEQ.pnt created.
Sequence length is 420
29	210,220,60,70,80,90,240,250,170,180,220,230,200,H-num file created from plot file.
3	230,13	RNA free energy files (version 2.3) at 20 degrees created.
370,380,390,400,190,20	260,100,110,1,2,3,4,230,240,3	22	210,220,240,12	10,20,30,40,270,280,240,410,420, 
End of Fill
Save file created using nafold.
Minimum folding energy is -134.10 kcal/mol.
Energy increment is 6.71 kcal/mol.
5,6,7,8,25	200,250,120,130,230,250,260,2	7	50,60,/home/jupyter/plant_microRNA_prediction
9,10,11,12,/home/jupyter/plant_microRNA_prediction/secondary_structure/mfold/AMWY020008631|7298-7714|201-217|+250,290,
140,150,160,/home/jupyter/plant_microRNA_prediction270,
/home/jupyter/plant_microRNA_prediction/secondary_structure/mfold/AMWY020006811

250,/home/jupyter/plant_microRNA_prediction/secondary_structure/mfold/AMWY020006221|4580-4993|201-219|+REUSE= NO

5	60,70,80,9	180,RNA free energy files (version 2.3) at 20 degrees created.
/home/jupyter/plant_microRNA_prediction28	260,
210,H-num file created from plot file.
/home/jupyter/plant_microRNA_prediction/secondary_structure/mfold/AMWY020006961|4782-5200|201-219|+SEQ.pnt created.
Sequence length is 418

10,20,30,40,90,100,15	190,220,mfold version 3.6
1,2,3,4,5,6,7,8,17	RNA free energy files (version 2.3) at 20 degrees created.270,50,60,200,230,110,REUSE= NO
8	
10,20,70,80,/home/jupyter/plant_microRNA_prediction9,10,11,12,13,14,15,Suboptimal foldings created.
280,

Structure plots generated.
All done.
mfold version 3.6
240,30,40,50,60,/home/jupyter/plant_microRNA_prediction/secondary_structure/mfold/AMWY020007661|1007-1425|201-219|-210,220,120,130,
290,REUSE= NO
Energy dot plot created.
10	16,17,18,19, 
230,9	250,90,100,SEQ.pnt created.
Sequence length is 414
70,80,90,140,150,3

190,3	20	2,3,4,5,6,22	30,40,50,270,70,80,13	200,23	/home/jupyter/plant_microRNA_prediction60,70,80,7,8,9,10,11,
280,3	29	/home/jupyter/plant_microRNA_prediction/secondary_structure/mfold/AMWY020007821|742-1160|201-219|+90,100,

/home/jupyter/plant_microRNA_prediction210,mfold version 3.6
/home/jupyter/plant_microRNA_prediction/secondary_structure/mfold/AMWY020010141|790-1206|201-217|+12,13,14,15,290,90,100,
110,7	220,REUSE= NO
mfold version 3.6
300,310,16,17,18,110,120,12	120,230,1	23	320,330,19,20,21,2	2	SEQ.pnt created.
mfold version 3.6
130,130,140,22,REUSE= NO
340,350,240,Sequence length is 418
150,11	4	13	REUSE= NO
4	140,150, 
21	SEQ.pnt created.
Sequence length is 419
360,370,380,390,RNA free energy files (version 2.3) at 20 degrees created.
250,260,2	SEQ.pnt created.
Sequence length is 417
RNA free energy files (version 2.3) at 20 degrees created.
160,160,23	10,20,30,40,50,14	4	270,280,400,410, 
End of Fill
Save file created using nafold.
170,180,190,10,20,30,2	290,60,70,RNA fre

230,240,2	mfold version 3.6
250,9	410, 
End of Fill
Save file created using nafold.
10,11,12,13,14,260,RNA free energy files (version 2.3) at 20 degrees created.
7	7	250,REUSE= NO
34	260,270,280,15,16,17,10,20,30,40,Minimum folding energy is -110.10 kcal/mol.
Energy increment is 5.50 kcal/mol.
260,16	270,280,50,60,70,18,19,20,21,22,270,280,20	H-num file created from plot file.
SEQ.pnt created.
Sequence length is 419
290,300,310,5	/home/jupyter/plant_microRNA_prediction4	
290,2	80,90,1,2,3,4,5,320,330,23,24, 
15	RNA free energy files (version 2.3) at 20 degrees created.
12	/home/jupyter/plant_microRNA_prediction/secondary_structure/mfold/AMWY020008721|450-867|201-218|+290,300,310,340,13	6,7,8,9,33	
10,20,30,16	100,110,8	300,310,320,3	40,50,60,70,320,330,10,11,12,13,8	350,360,mfold version 3.6
120,130,140,80,90,8	330,340,350,360,340,350,370,380,390,400,14,15,16,17,18,19,20,150,
Structure plots generated.
All done.
REUSE= NO
370,380,390,100,110,21,22, 
3	360,370,380,410, 
End of Fill
Save

25,26,3	7,8,9,10,23	1,2,3,27,28,330,340,350,21	11,12,13,4	 
4,5,6,7,8,360,370,380,16	8	9	9,10,11,1	390,400,410, 
End of Fill
14,15,16, 
15	18	9	12,13,14,15,16,Save file created using nafold.
Suboptimal foldings created.
Energy dot plot created.

Structure plots generated.
All done.
9	/home/jupyter/plant_microRNA_prediction
13	/home/jupyter/plant_microRNA_prediction/secondary_structure/mfold/AMWY020010341|808-1225|201-218|-
17, 
Minimum folding energy is -114.60 kcal/mol.
Energy increment is 5.73 kcal/mol.
10	6	8	5	H-num file created from plot file.
14	mfold version 3.6
5	2	4	1,2,3,4,5,2	REUSE= NO
/home/jupyter/plant_microRNA_prediction
Structure plots generated.
All done.

6,7,8,9,17	/home/jupyter/plant_microRNA_prediction/secondary_structure/mfold/AMWY020011271|8342-8762|201-221|+9	
SEQ.pnt created.
Sequence length is 418
Suboptimal foldings created.
Energy dot plot created.
10	10,11,12,
Structure plots generated.
All done.
20	16	RNA free energy files (version 2.3) at 20 degrees creat

5	mfold version 3.6
REUSE= NO
5	240,250,25	3	400,410, 
End of Fill
Save file created using nafold.
290,REUSE= NO
12	260, 
End of Fill
Save file created using nafold.
300,Minimum folding energy is -156.10 kcal/mol.
Energy increment is 7.80 kcal/mol.
13	SEQ.pnt created.
Sequence length is 420
Suboptimal foldings created.
Energy dot plot created.
Suboptimal foldings created.
Energy dot plot created.
10	Minimum folding energy is -106.50 kcal/mol.
Energy increment is 5.33 kcal/mol.
1	22	9	19	310,SEQ.pnt created.
Sequence length is 419
RNA free energy files (version 2.3) at 20 degrees created.
23	10	12	RNA free energy files (version 2.3) at 20 degrees created.
10,20,30,320,330,H-num file created from plot file.
H-num file created from plot file.
1,2,3,11	10,20,30,/home/jupyter/plant_microRNA_prediction
340,350,360,1,2,3,4,40,50,60,70,/home/jupyter/plant_microRNA_prediction/secondary_structure/mfold/AMWY020010841|1115-1533|201-219|-4,5,6,7,8,9,10,40,50,60,
26	12	Suboptimal foldings created.
E

10,11,12,H-num file created from plot file.
14,15,16,17,18,19,360,370,380,390,400,270,280,REUSE= NO
10,20,30,40,50,230,6	13,14,15,16,20,21,22,23,390,400,410,420, 
End of Fill
1,2,3,4,H-num file created from plot file.
410, 
End of Fill
Save file created using nafold.
Minimum folding energy is -141.40 kcal/mol.
Energy increment is 7.07 kcal/mol.
240,250,290,300,310,320,60,70,80,24,25,26,8	17,18,19,20,21,22,SEQ.pnt created.
Sequence length is 418
Save file created using nafold.
Minimum folding energy is -145.60 kcal/mol.
Energy increment is 7.28 kcal/mol.
1,2,3,4,15	260,15	5,6,7,8,9,10,330,340,2	90,100,19	 
1	18	6	 
RNA free energy files (version 2.3) at 20 degrees created.
5,6,7,8,270,350,360,H-num file created from plot file.
11,12,13,14,15,110,120,17	10,20,H-num file created from plot file.
9,10,11,12,13,14,370,380,390,9	12	280,290,20	14	16,17,18,1,2,3,4,5,6,130,140,400,410, 
End of Fill
Save file created using nafold.
1,2,3,4,5,300,15,16,17,18,30,40,50,60,70,7,8,9,10,11,150,19,20,21,

280,290,130,140,mfold version 3.6
14	20	90,100,110,300,310,170,150,17	REUSE= NO
300,120,180,320,330,21	9	7	160,170,11	310,320,330,130,140,190,13	SEQ.pnt created.
Sequence length is 422
340,350,5	21	150,340,19	/home/jupyter/plant_microRNA_prediction360,
8	200,180,350,360,/home/jupyter/plant_microRNA_prediction/secondary_structure/mfold/AMWY020009591|4904-5320|201-217|-18	RNA free energy files (version 2.3) at 20 degrees created.
210,370,380,390,160,
190,200,220,400,410, 
End of Fill
Save file created using nafold.
10,20,30,40,370,380,390,20	170,8	4	210,400,410, 
End of Fill
Save file created using nafold.
230,240,mfold version 3.6
Minimum folding energy is -126.60 kcal/mol.
Energy increment is 6.33 kcal/mol.
180,190,3	50,60,70,1	Minimum folding energy is -146.10 kcal/mol.
Energy increment is 7.30 kcal/mol.
250,1	80,90,100,21	220,230,REUSE= NO
260,H-num file created from plot file.
200,12	8	240,110,120,6	14	SEQ.pnt created.
270,210,1,2,22	
Structure plots generated.
All done.
250,260,130


11,12,1,2,3,4,5,/home/jupyter/plant_microRNA_prediction/secondary_structure/mfold/AMWY020009051|4309-4731|201-223|-7,8,9,10,11,/home/jupyter/plant_microRNA_prediction260,

/home/jupyter/plant_microRNA_prediction/secondary_structure/mfold/AMWY020008761|289-708|201-220|+13,14,15,16,220,150,160,
3	120,130,12,13,14,15,5	270,1,2,3,4,6,7,8,9,21	230,19	170,180,17,13	16,17,mfold version 3.6
140,150,160,12	5,6,7,8,2	mfold version 3.6
240,10, 
190,18,170,180,190,200,280,12	18,19,20,REUSE= NO
9,10,11,12,13, 
1	6	4	 
250,290,300,310,320,200,210,220,230,240, 
End of Fill
REUSE= NO
1	21, 
SEQ.pnt created.
260,Save file created using nafold.
Minimum folding energy is -66.10 kcal/mol.
Energy increment is 3.31 kcal/mol.
210,220,330,340,2	SEQ.pnt created.
Sequence length is 423
11	270,280,Suboptimal foldings created.
Energy dot plot created.
10	RNA free energy files (version 2.3) at 20 degrees created.
10,20,230,350,360,Sequence length is 420
H-num file created from plot file.
22	290,240,30,40,50,60,4	

6	5	90,100,H-num file created from plot file.

Structure plots generated.
All done.
6	3	H-num file created from plot file.
1,2,50,60,70,80,110,120,13	3	15	9	1,2,3,17	12	3,4,5,6,90,100,130,4,5,6,7,8,110,120,7,8,9,10,11,140,150,9,10,11,12,13,130,160,15	Suboptimal foldings created.
Energy dot plot created.
10	12,13, 
19	14,15,16,17,1	/home/jupyter/plant_microRNA_prediction140,150,12	
4	170,180,/home/jupyter/plant_microRNA_prediction/secondary_structure/mfold/AMWY020008791|4612-5032|201-221|-
160,18,19,20,21,22,23,7	6	2	8	11	170,
Structure plots generated.
All done.
190,mfold version 3.6
24,25,26, 
7	180,16	4	1	13	4	REUSE= NO
200,14	190,210,Suboptimal foldings created.
Energy dot plot created.200,SEQ.pnt created.
Sequence length is 421
220,
10	1	210,220,16	2	230,/home/jupyter/plant_microRNA_prediction13	5	230,11	
RNA free energy files (version 2.3) at 20 degrees created.
10,20,30,/home/jupyter/plant_microRNA_prediction/secondary_structure/mfold/AMWY020011211|2208-2590|164-183|-240,3	
240,1

RNA free energy files (version 2.3) at 20 degrees created.
3	110,350,360,200,1	/home/jupyter/plant_microRNA_predictionSave file created using nafold.
Minimum folding energy is -111.30 kcal/mol.
Energy increment is 5.57 kcal/mol.
360,370,380,390,
10,20,9	370,380,/home/jupyter/plant_microRNA_prediction/secondary_structure/mfold/AMWY020011301|5746-6169|201-224|-120,
7	210,30,40,50,60,400, 
End of Fill
Save file created using nafold.
Minimum folding energy is -113.60 kcal/mol.
Energy increment is 5.68 kcal/mol.
8	12	130,140,390,400,410,420, 
End of Fill
8	220,H-num file created from plot file.70,230,H-num file created from plot file.
mfold version 3.6
150,2	3	3	80,90,Save file created using nafold.
Minimum folding energy is -124.70 kcal/mol.

1,2,3,4,240,
Structure plots generated.
All done.
1,2,REUSE= NO
17	160,170,14	100,110,Energy increment is 6.24 kcal/mol.
5,6,7,8,SEQ.pnt created.
250,3,4,5,6,7,1	180,14	2	22	120,4	9,10,11,12,H-num file created from plot file.
1,2,3,260,8,9,Sequence le

260,210,330,340,4	15	330,340,10,20,30,40,50,60,70,mfold version 3.6
Energy increment is 2.88 kcal/mol.
 
End of Fill
Save file created using nafold.
Minimum folding energy is -123.10 kcal/mol.
Energy increment is 6.16 kcal/mol.
350,360,370,220,270,280,H-num file created from plot file.
1,350,360,8	11	H-num file created from plot file.
1,80,90,100,REUSE= NO
290,300,380,390,400,230,240,2,3,4,5,6,7,
Structure plots generated.
All done.
21	370,380,2,3,4, 
410, 
End of Fill
Save file created using nafold.
Minimum folding energy is -156.80 kcal/mol.
SEQ.pnt created.
250,110,120,130,23	8,9,10,11,/home/jupyter/plant_microRNA_prediction310,320,

/home/jupyter/plant_microRNA_prediction/secondary_structure/mfold/AMWY020010921|2744-3162|201-219|+H-num file created from plot file.
390,400,410, 
End of Fill
Save file created using nafold.
4	Energy increment is 7.84 kcal/mol.
Sequence length is 419
140,330,340,260,3	Suboptimal foldings created.
Energy dot plot created.
1	1,2,3,4,5,6,12,13,14,25	RNA f

/home/jupyter/plant_microRNA_prediction
/home/jupyter/plant_microRNA_prediction/secondary_structure/mfold/AMWY020012051|220-639|201-220|-
9	3	240,270,6	230,18	3	Suboptimal foldings created.
Energy dot plot created.
1	250,280,240,mfold version 3.6
18	H-num file created from plot file.
260,270,290,11	250,REUSE= NO
2	1,2,3,Suboptimal foldings created.
Energy dot plot created.
280,/home/jupyter/plant_microRNA_prediction300,310,22	260,
1	10	6	/home/jupyter/plant_microRNA_prediction/secondary_structure/mfold/AMWY020011761|1387-1804|201-218|+17	4,5,6,7,290,320,330,270,280,
8,9,10,290,300,310,SEQ.pnt created.
Sequence length is 420
5	340,350,11,12,4	
Structure plots generated.
All done.
26	mfold version 3.6
300,310,RNA free energy files (version 2.3) at 20 degrees created.
10,320,330,7	13,14,15,11	360,370,380,390,400,410, 
4	REUSE= NO
19	4	320,330,20,30,40,50,340,350,19	16, 
12	End of Fill
Save file created using nafold.
Minimum folding energy is -132.50 kcal/mol.
Energy increment is 6.62 kcal

14	16	310,210,/home/jupyter/plant_microRNA_prediction
/home/jupyter/plant_microRNA_prediction/secondary_structure/mfold/AMWY020009201|20449-20872|201-224|+90,100,110,Sequence length is 417
320,330,22	
220,120,130,140,23	RNA free energy files (version 2.3) at 20 degrees created.
10,4	340,350,11	150,230,9	13	Suboptimal foldings created.
Energy dot plot created.
20,30,40,360,370,380,390,mfold version 3.6
16	3	10	400,410, 
End of Fill
Save file created using nafold.
18	160,240,250,50,60,70,80,19	6	Minimum folding energy is -95.60 kcal/mol.
Energy increment is 4.78 kcal/mol.
170,REUSE= NO
18	260,3	7	13	15	90,100,110,17	180,SEQ.pnt created.
H-num file created from plot file.
120,130,270,280,290,9	190,Sequence length is 424
2	1,2,3,4,140,150,200,300,310,320,330,
Structure plots generated.
All done.
5	RNA free energy files (version 2.3) at 20 degrees created.
10,20,24	210,340,350,360,370,380,5,6,7,8,9,14	17	12	160,170,30,40,50,60,11	19	1	220,390,400,410, 
End of Fill
Save file created using na

17	24,25,390,400,410,/home/jupyter/plant_microRNA_prediction2,3,4,5,26,
/home/jupyter/plant_microRNA_prediction/secondary_structure/mfold/AMWY020012381|2780-3201|201-222|-4	
4	 
End of Fill
Save file created using nafold.
7	8	24	6,7,8,9,10,31	27,28, 
7	Minimum folding energy is -90.90 kcal/mol.
Energy increment is 4.54 kcal/mol.
11,12,13,8	16	mfold version 3.6
14,15,16,11	6	REUSE= NO
5	9	Suboptimal foldings created.
Energy dot plot created.
H-num file created from plot file.
8	 
2	1	1,2,3,10	SEQ.pnt created.

Structure plots generated.
All done.
18	4,5,6,7,8,Sequence length is 422

Structure plots generated.
All done.
9,10,11,12,13,5	32	RNA free energy files (version 2.3) at 20 degrees created.
5	9	25	14,15,16,8	10,20,30,40,9	 
8	17	50,60,70,6	Suboptimal foldings created.
12	7	3	/home/jupyter/plant_microRNA_prediction
/home/jupyter/plant_microRNA_prediction/secondary_structure/mfold/AMWY020010621|6403-6822|201-220|+

Structure plots generated.
All done.
9	Energy dot plot created.
10	80

17	160,14,15,16,17,18,370,380,390,400,410, 
End of Fill
3,4,5,6,7,8,Energy increment is 6.07 kcal/mol.
280,290,2,3,4,4	4	180,170,19,20,21,22,Save file created using nafold.
Minimum folding energy is -141.70 kcal/mol.
Energy increment is 7.08 kcal/mol.
9,10,H-num file created from plot file.
5,6,7,8,9,300,/home/jupyter/plant_microRNA_predictionH-num file created from plot file.
190,
23,24,25,/home/jupyter/plant_microRNA_prediction/secondary_structure/mfold/AMWY020010211|1311-1658|201-219|+180,11	4	
10,11,12,1,2,3,1,2,3,4,310,320,11,12,13,14,15,16,1	26,27,28,190,200,330,340,13,14,3	200,210, 
4,5,6,5,6,7,8,9,H-num file created from plot file.
/home/jupyter/plant_microRNA_prediction29, 
mfold version 3.6

2	
Structure plots generated.
All done.
/home/jupyter/plant_microRNA_prediction/secondary_structure/mfold/AMWY020009691|1698-2116|201-219|+350,360,
210,220,5	15,16, 
REUSE= NO
7,8,9,10,11,10,11,12,13,14,15,16,12	370,380,390,230,220,1,2,3,4,5,6,17,18,19,20,21,12,13,14,400,410, 
End of Fill

30,40,50,60,RNA free energy files (version 2.3) at 20 degrees created.
10,Suboptimal foldings created.
3	Sequence length is 419
RNA free energy files (version 2.3) at 20 degrees created.
190,20,30,40,50,60,320,330,290,300,H-num file created from plot file.
5	20,30,40,50,70,80,3	70,80,90,100,10,20,200,210,Energy dot plot created.
10	310,320,340,350,360,370,/home/jupyter/plant_microRNA_prediction16	1,2,60,70,90,100,
/home/jupyter/plant_microRNA_prediction/home/jupyter/plant_microRNA_prediction/secondary_structure/mfold/AMWY020010321|8242-8660|201-219|-
220,30,40,50,60,110,5	/home/jupyter/plant_microRNA_prediction/secondary_structure/mfold/AMWY020012531|506-927|201-222|+380,390,400,410, 
End of Fill
Suboptimal foldings created.
Energy dot plot created.

32	330,340,4	110,
3,4,5,6,7,8,9,230,20	120,80,90,70,80,90,100,10	Save file created using nafold.
Minimum folding energy is -114.80 kcal/mol.
Energy increment is 5.74 kcal/mol.
350,360,370,120,130,10,11,240,130,100,110,1	110,120,140,380,390

320,330,340,340,350,100,110,17	8	11	10	350,360,360,370,Suboptimal foldings created.
Energy dot plot created.
120,26	4	12	370,380,390,400,410, 
380,390,400,410, 
End of Fill
Save file created using nafold.

Structure plots generated.
All done.
10	130,140,14	5	End of Fill
Save file created using nafold.
Minimum folding energy is -141.20 kcal/mol.
Energy increment is 7.06 kcal/mol.
150,Minimum folding energy is -142.80 kcal/mol.
Energy increment is 7.14 kcal/mol.
9	/home/jupyter/plant_microRNA_prediction
6	/home/jupyter/plant_microRNA_prediction/secondary_structure/mfold/AMWY020011371|14315-14731|201-217|-
17	160,170,23	23	H-num file created from plot file.
180,190,18	13	H-num file created from plot file.
mfold version 3.6
3	Suboptimal foldings created.
1,2,3,200,210,11	1,2,3,4,5,REUSE= NO
9	4,5,6,7,8,9,Energy dot plot created.
10	18	12	220,230,240,/home/jupyter/plant_microRNA_prediction27	10,11,12,13,5	6,7,8,9,10,
/home/jupyter/plant_microRNA_prediction/secondary_structure/mfold/AMWY0200

6	7	21	12	1	210,200,1	22, 
mfold version 3.6
22	12	220,230,REUSE= NO
H-num file created from plot file.
210,SEQ.pnt created.240,250,11	1,2,3,4,5,/home/jupyter/plant_microRNA_prediction
/home/jupyter/plant_microRNA_prediction/secondary_structure/mfold/AMWY020014081|10643-11062|201-220|+/home/jupyter/plant_microRNA_prediction

/home/jupyter/plant_microRNA_prediction/secondary_structure/mfold/AMWY020011171|4677-5096|201-220|+
220,260,270,8	16	24	
Sequence length is 419
6,7,8,9,8	8	20230,280,RNA free energy files (version 2.3) at 20 degrees created.
10,11,5	mfold version 3.6
mfold version 3.6
	10,20,30,40,50,290,300,240,12,13,14,16	7	13	REUSE= NO
13	20	22	2	60,70,310,320,330,15,16,17,18,REUSE= NO
8	23	250,260,SEQ.pnt created.
80,90,340,350,360,19, 
Sequence length is 420
270,SEQ.pnt created.
Sequence length is 420
Suboptimal foldings created.
Energy dot plot created.
370,380,390,400,12	100,110,RNA free energy files (version 2.3) at 20 degrees created.
280,10	25	410, 
End of Fill
Save file 

170,90,100,410, 
End of Fill
Save file created using nafold.
18	11	/home/jupyter/plant_microRNA_prediction180,190,
110,120,/home/jupyter/plant_microRNA_prediction/secondary_structure/mfold/AMWY020012021|5305-5726|201-222|-
Minimum folding energy is -161.50 kcal/mol.
Energy increment is 8.07 kcal/mol.
6	17	23	130,200,13	3	14	3	14	140,H-num file created from plot file.
23	mfold version 3.6
20	3	12	210,/home/jupyter/plant_microRNA_prediction150,/home/jupyter/plant_microRNA_prediction

1,2,3,4,/home/jupyter/plant_microRNA_prediction/secondary_structure/mfold/AMWY020011721|5107-5524|201-218|-/home/jupyter/plant_microRNA_prediction/secondary_structure/mfold/AMWY020014631|1-280|62-80|+
/home/jupyter/plant_microRNA_prediction
REUSE= NO
1	220,230,
160,170,/home/jupyter/plant_microRNA_prediction/secondary_structure/mfold/AMWY020013681|227-646|201-220|-
5,6,7,
Structure plots generated.
All done.
180,8,9,10,11,mfold version 3.6
SEQ.pnt created.
Sequence length is 422
240,mfold version 3.6
REUSE= 

250,260,2,3,4,5,6,7,230, 
End of Fill
Save file created using nafold.
Minimum folding energy is -152.40 kcal/mol.
Energy increment is 7.62 kcal/mol.
310,320,330,240,90,100,10	8,9,10,11,12,270,280,340,350,160,170,250,19	H-num file created from plot file.
13,14,15,
Structure plots generated.
All done.
H-num file created from plot file.
240,110,120,290,7	9	360,370,180,1,2,3,4,5,6,260,270,16, 
1,2,3,4,250,130,140,17	21	4	190,200,300,310,320,7,8,9,10,6	380,390,400,410, 
End of Fill
Save file created using nafold.
280,24	150,260,5,6,7,14	330,11,12,13,14,15,290,300,210,220,13	Minimum folding energy is -110.60 kcal/mol.
Energy increment is 5.53 kcal/mol.
160,340,350,360,370,16,17,18,19, 
8,9,310,270,280,230,3	170,180,23	H-num file created from plot file.
Suboptimal foldings created.
Energy dot plot created.
10	380,390,400,320,330,10,11,12,13,14, 
11	/home/jupyter/plant_microRNA_prediction
240,250,190,290,300,1,2,3,/home/jupyter/plant_microRNA_prediction/secondary_structure/mfold/AMWY020015581|

4	mfold version 3.6
11	330,340,350,3	7	7	10,20,30,40,50,REUSE= NO
360,370,380,SEQ.pnt created.
Sequence length is 419
1	60,70,80,390,400,410, 
End of Fill
Save file created using nafold.
2	21	11	90,100,110,RNA free energy files (version 2.3) at 20 degrees created.
Minimum folding energy is -109.00 kcal/mol.
Energy increment is 5.45 kcal/mol.
6	120,130,6	
Structure plots generated.
All done.
10,20,30,11	5	4	
Structure plots generated.
All done.
H-num file created from plot file.
19	22	140,14	7	40,50,60,70,5	16	1,2,3,4,5,6,150,160,80,90,12	7,8,9,10,170,100,110,4	8	120,180,190,8	11, 
/home/jupyter/plant_microRNA_prediction
/home/jupyter/plant_microRNA_prediction12	
200,/home/jupyter/plant_microRNA_prediction/secondary_structure/mfold/AMWY020014801|1245-1664|201-220|-/home/jupyter/plant_microRNA_prediction/secondary_structure/mfold/AMWY020015031|5065-5483|201-219|-2	
130,140,150,
2	210,220,160,3	7	
Structure plots generated.
All done.
1	230,17	7	12	Suboptimal foldings created.
Energy dot p

8	290,3	20	11,12,13,150,310,SEQ.pnt created.
Sequence length is 376
8,Suboptimal foldings created.
Energy dot plot created.
8	300,310,RNA free energy files (version 2.3) at 20 degrees created.
14,15,160,170,15	320,330,RNA free energy files (version 2.3) at 20 degrees created.
 
10	10,20,30,40,16,17, 
320,180,1	340,350,360,/home/jupyter/plant_microRNA_prediction10,20,30,40,50,
11	/home/jupyter/plant_microRNA_prediction/secondary_structure/mfold/AMWY020014661|102-520|201-219|+
50,60,70,330,340,350,190,370,380,60,70,80,90,360,370,380,200,13	390,400,410,420, 
End of Fill
Save file created using nafold.
1	Suboptimal foldings created.
Energy dot plot created.80,90,100,110,390,400,410, 
End of Fill
Save file created using nafold.
mfold version 3.6
210,220,22	7	Minimum folding energy is -120.10 kcal/mol.
Energy increment is 6.00 kcal/mol.
100,110,120,130,Minimum folding energy is -118.20 kcal/mol.
Energy increment is 5.91 kcal/mol.

1	230,13	9	REUSE= NO
120,130,240,140,150,21	SEQ.pnt created.


28	200,210,REUSE= NO
90,190,250,7	220,230,SEQ.pnt created.
Sequence length is 419
17	/home/jupyter/plant_microRNA_prediction100,110,2	
/home/jupyter/plant_microRNA_prediction/secondary_structure/mfold/AMWY020013381|759-1134|201-220|+
200,H-num file created from plot file.
5	260,2	230,240,250,RNA free energy files (version 2.3) at 20 degrees created.
10,8	120,130,270,240,250,260,1,2,3,4,5,6,7,20,30,40,50,60,140,210,
Structure plots generated.
All done.
mfold version 3.6
REUSE= NO
280,290,260,8,9,10,270,280,70,80,90,150,160,17	220,SEQ.pnt created.
8	300,290,11,12, 
100,110,270,280,170,230,Sequence length is 376
310,320,330,300,310,3	120,290,300,180,190,240,RNA free energy files (version 2.3) at 20 degrees created.
10,340,350,16	320,29	310,8	130,140,200,20,30,40,50,60,360,370,380,330,340,350,320,330,150,160,250,18	/home/jupyter/plant_microRNA_prediction210,
/home/jupyter/plant_microRNA_prediction/secondary_structure/mfold/AMWY020013041|1-365|146-165|+70,80,90,
3	360,370,380,390,400,390,40

25	12	350,360,12	140,130,140,120,130,3	1,2,3,370,380,390,400,150,160,150,160,15	4	15	5	140,150,160,170,410,420, 
End of Fill
Save file created using nafold.
Minimum folding energy is -124.90 kcal/mol.
Energy increment is 6.25 kcal/mol.
4,5,6,7,8,9,170,170,180,3	180,10,11,12,180,11	190,200,210,220,H-num file created from plot file.
1	190,13, 
190,26	230,240,250, 
End of Fill
Save file created using nafold.
7	200,1,2,3,4,5,200,210,7	Minimum folding energy is -79.70 kcal/mol.
Energy increment is 3.98 kcal/mol.
210,220,6,7,8,9,10,17	H-num file created from plot file.
13	26	220,13	230,11,12,13,14,1,2,3,4,5,6,7,8,5	4	230,15,16,17,18,240,16	9,10,11,12, 
16	6	240,19,20,250,12	4	250,260,270,21, 
Suboptimal foldings created.
Energy dot plot created.
10	2	260,270,280,27	280,8	Suboptimal foldings created.
Energy dot plot created.
10	290,300,
Structure plots generated.
All done.
18	290,300,310,320,310,320,14	27	330,340,14	330,6	5	350,360,370,17	340,350,360,370,7	17	380,390,400,410, 
End of Fill
5	3

mfold version 3.6
240,230,20,21,22,23,1,2,3,4,5,Minimum folding energy is -120.50 kcal/mol.
Energy increment is 6.03 kcal/mol.

Structure plots generated.
All done.
13	REUSE= NO
250,260,240,24,25,26,7	6,7,8,9,5	H-num file created from plot file.
SEQ.pnt created.
Sequence length is 421
270,250,260,10,11,12,13,27,28,29,30,31,1,2,3,4,21	RNA free energy files (version 2.3) at 20 degrees created.
10,20,280,270,14	16	14,15, 
 
5,6,7,8,30,40,50,60,290,300,280,290,9,10,11,12,13,14,15	70,80,90,310,5	12	17	300,310,320,330,19	/home/jupyter/plant_microRNA_prediction15,16,17, 

100,110,/home/jupyter/plant_microRNA_prediction/secondary_structure/mfold/AMWY020013421|1-295|201-219|-20	
320,330,340,340,350,360,120,130,350,360,370,380,370,380,390,400,410, 
End of Fill
mfold version 3.6
8	140,390,400,410, 
End of Fill
Save file created using nafold.
14	Save file created using nafold.
Minimum folding energy is -103.90 kcal/mol.
Energy increment is 5.20 kcal/mol.
REUSE= NO
150,Minimum folding energy is -12

9	16	260,270,
Structure plots generated.
All done.
5	280,H-num file created from plot file.
14	mfold version 3.6
REUSE= NO
Suboptimal foldings created.
Energy dot plot created.
10	15	290,1,2,3,SEQ.pnt created.
Sequence length is 419
Suboptimal foldings created.
Energy dot plot created.
19	300,310,8	4,5,6,7,8,9,RNA free energy files (version 2.3) at 20 degrees created.
10	9	320,10,11,12,13,14,15,16,6	10,20,30,40,1	/home/jupyter/plant_microRNA_prediction
/home/jupyter/plant_microRNA_prediction/secondary_structure/mfold/AMWY020014111|619-1037|201-219|+
4	330,340,350,17,18,19,50,60,70,360,370,20,21,22,23,24,80,90,100,17	17	mfold version 3.6
REUSE= NO

Structure plots generated.
All done.
380,390,400,410, 
End of Fill
6	25,26,27,28,29,110,120,SEQ.pnt created.
Sequence length is 419
Save file created using nafold.
Minimum folding energy is -102.50 kcal/mol.
Energy increment is 5.12 kcal/mol.
30, 
130,RNA free energy files (version 2.3) at 20 degrees created.
15	11	140,150,9	10,20,30,40,H-num

Suboptimal foldings created.
Energy dot plot created.
260,mfold version 3.6
RNA free energy files (version 2.3) at 20 degrees created.
10	21	30	270,REUSE= NO
10,20,30,40,50,16	14	280,SEQ.pnt created.
Sequence length is 382
60,70,80,290,300,RNA free energy files (version 2.3) at 20 degrees created.
10,20,90,100,9	22	19	310,30,40,50,60,17	3	110,120,320,330,8	1	3	70,80,90,130,140,340,350,100,150,360,370,380,390,15	22	3	4	110,120,160,170,400,410, 
End of Fill
Save file created using nafold.
11	130,180,190,17	Minimum folding energy is -111.90 kcal/mol.
Energy increment is 5.59 kcal/mol.
15	140,200,
Structure plots generated.
All done.
150,23	18	H-num file created from plot file.
1,2,1	4	210,160,9	3,4,5,6,7,20	220,230,170,4	8,9,10,11,12,240,180,5	16	13,14,15,16,250,190,18	23	4	17,18,19,20,260,/home/jupyter/plant_microRNA_prediction
200,210,16	/home/jupyter/plant_microRNA_prediction/secondary_structure/mfold/AMWY020015211|2950-3368|201-219|-12	
21,22, 
270,280,290,mfold version 3.6
REUSE= NO


1	250,260,260,Sequence length is 419
280,13	2,3,4,5,6,7,8,270,290,300,RNA free energy files (version 2.3) at 20 degrees created.
10,20,30,40,270,280,9	9,10,11,12,13,14,280,290,50,60,70,80,310,320,290,300,14	15,16,17,18,19,300,90,100,110,330,340,350,360,310,320,14	20,21, 
310,120,130,330,340,350,/home/jupyter/plant_microRNA_prediction
370,380,390,400,410, 
End of Fill
/home/jupyter/plant_microRNA_prediction/secondary_structure/mfold/AMWY020014161|1-332|201-217|-
320,330,140,150,160,360,370,380,390,Save file created using nafold.
Minimum folding energy is -95.80 kcal/mol.
Energy increment is 4.79 kcal/mol.
7	4	340,350,170,mfold version 3.6
REUSE= NO
400,410, 
End of Fill
Save file created using nafold.
Minimum folding energy is -124.70 kcal/mol.
Energy increment is 6.24 kcal/mol.
2	14	14	360,370,380,180,190,SEQ.pnt created.
Sequence length is 332
H-num file created from plot file.
1,2,
Structure plots generated.
8	All done.
390,400,410, 
End of Fill
Save file created using nafold.
200,H-

10	370,380,390,400,410, 
End of Fill
240,180,Save file created using nafold.
Minimum folding energy is -115.50 kcal/mol.
Energy increment is 5.78 kcal/mol.
250,22	190,200,260,7	4	210,H-num file created from plot file.
1,2,270,7	220,11	3,4,5,6,7,8,13	280,8	16	11	230,240,250,9,10,11,12,13,290,260,270,14,15,300,310,11	280,290,300,310,320, 
320,330,330, 
End of Fill
Save file created using nafold.
Minimum folding energy is -99.90 kcal/mol.
Energy increment is 5.00 kcal/mol.
340,350,8	23	H-num file created from plot file.
360,370,380,390,8	5	12	12	1,2,3,4,400,410, 
End of Fill
Save file created using nafold.
Minimum folding energy is -131.10 kcal/mol.
Energy increment is 6.55 kcal/mol.
9	5,6,7,8,9,10, 
14	17	Suboptimal foldings created.
Energy dot plot created.
H-num file created from plot file.
10	1,2,3,4,5,6,7,12	6	8,9,10,11,12,13,Suboptimal foldings created.
Energy dot plot created.
10	9	24	14,15,16,17,18,19,9	
Structure plots generated.
All done.
20,21,22,23,13	18	24,25,26,27, 
13	15	11

19	Suboptimal foldings created.
Energy dot plot created.
10	170,180,Save file created using nafold.
Minimum folding energy is -110.30 kcal/mol.
Energy increment is 5.51 kcal/mol.
H-num file created from plot file.
1,2,190,3,4,5,6,7,8,H-num file created from plot file.
1,200,9,10, 
2,3,4,5,6,7,8,9,10,3	210, 
7	220,230,1	240,11	Suboptimal foldings created.
Energy dot plot created.
10	Suboptimal foldings created.
Energy dot plot created.
10	250,260,270,280,290,300,8	4	310,320,330,340,350,360,20	12	1	370,380,390,1	400,410, 
End of Fill
Save file created using nafold.
Minimum folding energy is -137.20 kcal/mol.
Energy increment is 6.86 kcal/mol.
H-num file created from plot file.
1,2,3,4,9	5	5,6,7,8,9,10,11,2	12,13,14,15,13	2	2	16,17,18, 

Structure plots generated.
All done.
6	3	14	3	3	/home/jupyter/plant_microRNA_prediction
/home/jupyter/plant_microRNA_prediction/secondary_structure/mfold/AMWY020014981|4144-4566|201-223|+
Suboptimal foldings created.
Energy dot plot created.
10	mfold vers

In [61]:
'''
base = "secondary_structure/mfold/"
for directory in glob.glob(f"{base}*"):    
    tag = directory[len(base):]
    ct_files = glob.glob(f'{directory}/*.ct')        
    try:
        ct_files.remove(f'{base}{tag}/SEQ.ct')
    except:
        print(directory)
        print(ct_files)
        print("*****************")
    for file in ct_files:        
        shutil.copy(file, './1.ct')
        #dot = ct2dot_bracket('./1.ct')
        #dot = dot.split('\n')
        #with open('./2.ct', 'w') as stream:
            #stream.write(bracket_to_ct(tag, dot[0] , dot[1] , "(0)"))        
        #ct1 = '\n'.join(reformatCT('./1.ct').split('\n')[1:])
        #ct2 = '\n'.join(reformatCT('./2.ct').split('\n')[1:])
        #if(ct1 != ct2):
            #print(file)
        ct = reformatCT('./1.ct')
        [nucleotide, index, values] = get_ct_data(ct)        
        #print(is_nested( index,  values))
        if(not is_nested( index,  values)):
            print("************")             
'''

## Mxfold2

In [57]:
#!wget https://github.com/keio-bioinformatics/mxfold2/releases/download/v0.1.1/mxfold2-0.1.1.tar.gz
#!pip3 install mxfold2-0.1.1.tar.gz
#!rm mxfold2-0.1.1.tar.gz

In [58]:
!mxfold2 predict ./extended.txt > secondary_structure/mxfold2_result.txt

>AMWY02059828.1:2832-3256(+)
AAAGAATCAGCAATGGAAAAATAACCGGTTCTTAATTCAGcataacaaattattcaattataatatagcTGTAAAAGAAATCTAAGTCTATTTGATATAGATCGGAATTTacgcaaattaaaaatttccaaataaGCAGTTCCGACCTGAGATCTGAACCGAAAACGCAAGATCCATCTAAACTCTCACCTCGGTCTCCGATTCAGTTGATGCAAGGCGGGATCCAATTCGCCTTTTCATTCAATTACATTCACCAATAACAGCTCGCCATCtggcttttaataaaaagttgcCAATCGGTTCCCGACCTGCACCAAGCGAATTAGAGACCGCCGGTAACTGAATCATTCTACATTAATCCCCGACTCCTCCTTTTACACATAGCAACTTCGCCCAAGAagactaaaaagaaaaggaagctAAC
........................((((...........((..........................))..........................................................................................................................(((...(((...............((((..........)))).................................(((....))).....................)))...)))..........................))))........................................................................................ (35.0)
>AMWY02004761.1:1853-2277(+)
actaataatgCATGGCCATATATATCAAATCTACCATATgccatttaataattttccttttttcttcttctttctttttctctct

>AMWY02002487.1:2516-2939(+)
tctctcttctttttcttccttaacTTTTCATCCAGCTTCAACCTCCATTTAGATCaaagttattgaatttttttttcatcttatttatgtaaatatatattgtttccTGCGGAAACGAATCCATGAACAACAGTCAATCAGTCATTGTTTGCTGATGCAGCGTCATCAAGATTCGCATGCTGATGGGTCGAGCAAAGCAGTGAGAATCTTGATGATGCTGCATCGGCCATAATTGACTATAtctcgtcatcatcatcatcatcatccagtTTCAACCTCCATGTAAATCaagttattgaattatttggtAAATAGATACTGATTCCCGCAGAATTGAATCAATGAACAACAGTCAATCAGTCATTGTTTGCTGATGCAGCATCATCAAGATTCACATGCGAATGGGTCGAACCAAAGCAGTGA
............................................................................................................((((....(....)............................((....)).(((...........))).........((.((.((...((..................)).))..)).)).................................................................................................))))........................................((....))....................((....((......))...))..... (30.2)
>AMWY02089812.1:0-419(-)
TCtgtctatatttattttcttctcattcACTGTAGTAATTTAAGCCTATACAGTTCTGAGTTGACCNatttctttatataaagtTNATTTC

>AMWY02001968.1:1243-1665(+)
CTACCTAAACTCCATGCATGGCTCGTGCTAGCTTTctggtttcttcttttttctttaagggcttattataaatttgcaGCAAGCCTAAACCCTTCTTAATTTCAAGATCTCTCTTcatttgattctttctttctttttctagggattcttcttcttcttcttcttcttgtttgctGCTGGTGTATGTTGGTTTGAGAGATTGAAGCTGCCAGCATGATCTGGTAATATGGAAcctaatattatacatatacatatctatatctatatatatagatagatagttttagatttactctttaattatattaattcctCCACCTATAGTTTTAGAGttactctttaattatattaattcttccACCTCTTTGTATAGATAGATAGAAACATATAGAAGGTCTTAGATTTCCTTTGCTTTTAGATCC
...((....(.((..((...((....))...............................(((....................)))..........................................................................................)).)).)......))........................................................................................................................................................................................................................................ (27.0)
>AMWY02039981.1:1214-1636(-)
atttatgttttctattttataattaaaaaataaaaaaaaaataataaggtaatctctctctctttttcttttatattcatatgaGGTCT

In [143]:
df = fasta_to_df('secondary_structure/mxfold2_result.txt')
df = df.apply(lambda row: bracket_row(row) , axis=1)
df.head(2)

Unnamed: 0,tag,data,bracket
0,AMWY02059828.1:2832-3256(+),AAAGAATCAGCAATGGAAAAATAACCGGTTCTTAATTCAGcataac...,........................((((...........((........
1,AMWY02004761.1:1853-2277(+),actaataatgCATGGCCATATATATCAAATCTACCATATgccattt...,.................................................


In [145]:
base = "./secondary_structure/mxfold2/"
!rm -r {base}
!mkdir -p {base}
for index, row in df.iterrows():    
    if(not os.path.exists(base + reformat(row['tag']))):
        os.makedirs(base + reformat(row['tag']))        
    tag = reformat(row['tag'])
    with open(base + f"{tag}/{tag}.ct",'w') as file:
        bracket = row['bracket'].split(' ')[0]
        deltaG = row['bracket'].split(' ')[1]
        ct = bracket_to_ct(row['tag'], row['data'], bracket, deltaG)
        file.write(ct)    

## SPOT-RNA

In [8]:
#!git clone https://github.com/jaswindersingh2/SPOT-RNA.git
#%cd SPOT-RNA
#!wget 'https://www.dropbox.com/s/dsrcf460nbjqpxa/SPOT-RNA-models.tar.gz' || wget -O SPOT-RNA-models.tar.gz 'https://app.nihaocloud.com/f/fbf3315a91d542c0bdc2/?dl=1'
#!tar -xvzf SPOT-RNA-models.tar.gz && rm SPOT-RNA-models.tar.gz
#!sudo apt-get install python3.6
#!python3.6 -m pip install tensorflow==1.14.0 # or for gpu: tensorflow-gpu==1.14.0
#! python3.6 -m pip install -r requirements.txt

In [23]:
base = "./secondary_structure/spot_rna/"
!rm -r {base}
!mkdir -p {base}

In [29]:
!python3.6 ./SPOT-RNA/SPOT-RNA.py  --inputs ./extended.txt  --outputs '{base}'  --cpu 32 --plots True

>> Opening FASTA file...
>> Converting FASTA file from multiline to single line and writing to file.
>> Done!

Preparing tfr records file for SPOT-RNA:
100%|█████████████████████████████████████████████| 1/1 [00:04<00:00,  4.21s/it]

Predicting for SPOT-RNA model 0
100%|█████████████████████████████████████████████| 1/1 [00:05<00:00,  5.23s/it]

Predicting for SPOT-RNA model 1
100%|█████████████████████████████████████████████| 1/1 [00:09<00:00,  9.51s/it]

Predicting for SPOT-RNA model 2
100%|█████████████████████████████████████████████| 1/1 [00:13<00:00, 13.10s/it]

Predicting for SPOT-RNA model 3
100%|█████████████████████████████████████████████| 1/1 [00:16<00:00, 16.44s/it]

Predicting for SPOT-RNA model 4
100%|█████████████████████████████████████████████| 1/1 [00:18<00:00, 18.18s/it]

Post Processing and Saving Output

Finished!

Processsing Time 203.32813096046448 seconds


In [48]:
!rm {base}/*.bpseq
!rm {base}/*.prob
for file in glob.glob(f"{base}*.ct"):    
    f = file[len(base):-3] # .ct        
    f = reformat(f)        
    if(not os.path.exists(base + f)):
        os.makedirs(base + f)  
    header = reformatCT(file).split("\n")[0]    
    with open(f"{base}{f}.dot", 'w') as stream:        
        stream.write(ct2dot_bracket(file))
    !RNAeval "{base}{f}.dot" -T 20 -v 
    #shutil.move(file, f"{base}{f}/{f}.ct")    

rm: cannot remove './RNA_secondary_structure/spot_rna//*.bpseq': No such file or directory
rm: cannot remove './RNA_secondary_structure/spot_rna//*.prob': No such file or directory
[36mExternal loop[0m                           : [32m -371[0m
[36mInterior loop[0m (  3, 32) [1mAU[0m; (  4, 31) [1mGC[0m: [32m -256[0m
[36mInterior loop[0m (  4, 31) [1mGC[0m; (  5, 30) [1mAU[0m: [32m -294[0m
[36mInterior loop[0m (  5, 30) [1mAU[0m; (  6, 29) [1mAU[0m: [32m -122[0m
[36mHairpin  loop[0m (  6, 29) [1mAU[0m              : [32m  716[0m
[36mInterior loop[0m ( 38, 71) [1mCG[0m; ( 39, 70) [1mAU[0m: [32m -255[0m
[36mInterior loop[0m ( 39, 70) [1mAU[0m; ( 40, 69) [1mGC[0m: [32m -256[0m
[36mInterior loop[0m ( 40, 69) [1mGC[0m; ( 41, 68) [1mCG[0m: [32m -403[0m
[36mInterior loop[0m ( 41, 68) [1mCG[0m; ( 49, 64) [1mAU[0m: [32m  618[0m
[36mInterior loop[0m ( 49, 64) [1mAU[0m; ( 50, 63) [1mUA[0m: [32m -155[0m
[36mInterior loop[0m 

In [68]:
df = fasta_to_df('./secondary_structure/spot_rna/AMWY02059828.1:2832-3256(+).dot')
df = df.apply(lambda row: bracket_row(row) , axis=1)
bracket = df['bracket'][0].split(' ')[0]
ct = bracket_to_ct(df['tag'][0], df['data'][0], bracket, "(0)")
print(ct)

   424 dG =       0.0 AMWY02059828.1:2832-3256(+)
     1 A      0      2      0       1
     2 A      1      3      0       2
     3 A      2      4     32       3
     4 G      3      5     31       4
     5 A      4      6     30       5
     6 A      5      7     29       6
     7 U      6      8      0       7
     8 C      7      9      0       8
     9 A      8     10      0       9
    10 G      9     11      0      10
    11 C     10     12      0      11
    12 A     11     13      0      12
    13 A     12     14      0      13
    14 U     13     15      0      14
    15 G     14     16      0      15
    16 G     15     17      0      16
    17 A     16     18      0      17
    18 A     17     19      0      18
    19 A     18     20      0      19
    20 A     19     21      0      20
    21 A     20     22      0      21
    22 U     21     23      0      22
    23 A     22     24      0      23
    24 A     23     25      0      24
    25 C     24     26      0      25


## Vienna package

In [92]:
#!wget https://www.tbi.univie.ac.at/RNA/download/ubuntu/ubuntu_20_04/viennarna_2.4.18-1_amd64.deb -O viennarna.deb
#!sudo dpkg -i ./viennarna.deb
#!sudo apt-get -f install
#!rm viennarna.deb

In [131]:
base = "./secondary_structure/viennarna/"
!rm -r {base}
!rm ./secondary_structure/viennarna_result.txt
!mkdir -p {base}

rm: cannot remove './secondary_structure/viennarna_result.txt': No such file or directory


In [132]:
%cd {base}
!RNAfold --jobs=0 --infile ../../Temp/extended_modified.txt  --noPS -T 20 > ../viennarna_result.txt
%cd ../../

/home/jupyter/plant_microRNA_prediction/secondary_structure/viennarna
/home/jupyter/plant_microRNA_prediction


In [95]:
df = fasta_to_df('secondary_structure/viennarna_result.txt')
df = df.apply(lambda row: bracket_row(row) , axis=1)
print(df.shape)
df.head(2)

(21225, 3)


Unnamed: 0,tag,data,bracket
0,AMWY02000003.1:4084-4506|200-222(+),auacaauugucacauaguuuacauuauuaauuuccgcuuaauuuau...,....((((((...))))))................((((((...((...
1,AMWY02000003.1:4391-4810|200-219(+),uucuaauugcaaauuuauguuauauuuuuaaauagaaagggagauu...,.....((.(((......))).))......(((((((.(((((((.....


In [125]:
for index, row in df.iterrows():    
    tag = reformat(row['tag'])
    if(not os.path.exists(base + tag)):
        os.makedirs(base + tag)      
    with open(base + f"{tag}/{tag}.ct",'w') as file:
        bracket = row['bracket'].split(' ')[0]
        deltaG = row['bracket'].split(' ')[1]
        ct = bracket_to_ct(row['tag'], row['data'], bracket, deltaG, False)
        file.write(ct)    

In [126]:
import glob
for file in glob.glob(f"{base}*.ps"):    
    f = file[len(base):-6] # _ss.ps 
    f = reformat(f)        
    shutil.move(file, f"{base}{f}/{f}.ps")    

## ContraFold

In [39]:
#!wget http://contra.stanford.edu/contrafold/contrafold_v2_02.tar.gz
#!tar -xvzf contrafold_v2_02.tar.gz && rm contrafold_v2_02.tar.gz
#%cd contrafold/src
#!make clean
#!make 
# to file must changed to be complieable # utility.hpp and optimization.c++ files

In [42]:
counter = 0
base = "./secondary_structure/contrafold/"
!rm -r {base}
!mkdir -p {base}
df = fasta_to_df('./Temp/extended.txt')

for index, row in df.iterrows():    
    tag = reformat(row['tag'])
    if(not os.path.exists(base + tag)):
        os.makedirs(base + tag)            
    with open(base + f"{tag}/{tag}.FASTA",'w') as file:
        file.write(f">{row['tag']}\n{row['data']}")
    counter += 1    
    if(counter >= 10):
        break

In [43]:
def run_contrafold(tag):
    tag = reformat(tag)    
    %cd contrafold/src
    !./contrafold predict ../..{base[1:]}{tag}/{tag}.FASTA > ../..{base[1:]}{tag}/{tag}.dot
    with open(f"../..{base[1:]}{tag}/{tag}.dot", 'r') as file:
        text = file.read()
    text = [l for l in text.split("\n") if l[:len(">structure")] != ">structure"]    
    header = text[0]
    with open(f"../..{base[1:]}{tag}/{tag}.dot", 'w') as file:
        file.write('\n'.join(text[1:]))    
    !RNAeval  ../..{base[1:]}{tag}/{tag}.dot -T 20 > ../..{base[1:]}{tag}/{tag}.dotdg    
    with open(f"../..{base[1:]}{tag}/{tag}.dotdg", 'r') as file:
        text = file.read()
    with open(f"../..{base[1:]}{tag}/{tag}.dot", 'w') as file:
        file.write(header + "\n" + text)    
    
    df = fasta_to_df(f'../..{base[1:]}{tag}/{tag}.dot')
    df = df.apply(lambda row: bracket_row(row) , axis=1)        
    tag = reformat(df['tag'][0])
    with open(f'../..{base[1:]}{tag}/{tag}.ct','w') as file:
        bracket = df['bracket'][0].split(' ')[0]        
        deltaG = df['bracket'][0].split(' ')[1]
        ct = bracket_to_ct(df['tag'][0], df['data'][0], bracket, deltaG, False)
        file.write(ct)    
    #!rm ../..{base[1:]}{tag}/{tag}.dot
    #!rm ../..{base[1:]}{tag}/{tag}.dotdg
    !rm ../..{base[1:]}{tag}/{tag}.FASTA
    %cd ../../        

if __name__ == '__main__':        
    pool = mp.Pool(mp.cpu_count() - 1)  
    pool.map(run_contrafold, df['tag'].iloc[:10])

In [56]:
s = 'CUCCCCUUGUCUACCAUCCCCAACUAGCGAGAGAGACAUUACCUACCUGAAUAGAAGAUCUCUCUCGAGCUCUCGagcucucucuuuuucuauaUCUCUGUCUCUUUGUGUCUCUGGAGCUUGUACUAACAUUAAUAUCGUGCACCAGCAGCAGUUGAAGCUGCCAGCAUGAUCUAAACUUCCUUCUCUGUAAAGGAUAGAUCGGAUCAUGUGGUAGCUUCACCUGUUGAUGGGAUCACGAAAGCGCCCCUCUUACUACUCUACAUUAAUUCUUUCUCGUUAUACAACCUCCCAGUAAGCAUGCUUUCAAAACCAACUUGAGuaaguuaauuuguuuagcuuuuguuuuuggcucuuccuuuacuuuaaauuuucucaucuggguuuuuguuauauauauguacuguuuuauauauguauuccu'
d = '............................((((((((..(...(((......))).)..))))))))(((((....)))))...................................((((.((((...(((.......(((((..(((.((((((.((((((((((.(((((((((.(.(((((((.......))))).)).).))))))))))))))))))).)))))).)))...))))).....................................)))..)))).))))....((((()))))..((((((....((((.(((((((.....)))(((.........)))................)))).))))....))))))....(((((((((((......)))))))))))....'
print(s[300],s[301])
print(d[300],d[301])

A U
( )


In [55]:
'''path = 'secondary_structure/contrafold/AMWY020333941_469-893_-_/AMWY020333941_469-893_-_.dot'
!RNAeval  {path} -T 20 -v'''; 

[36mExternal loop[0m                           : [32m -364[0m
[36mInterior loop[0m ( 29, 66) [1mGC[0m; ( 30, 65) [1mAU[0m: [32m -294[0m
[36mInterior loop[0m ( 30, 65) [1mAU[0m; ( 31, 64) [1mGC[0m: [32m -256[0m
[36mInterior loop[0m ( 31, 64) [1mGC[0m; ( 32, 63) [1mAU[0m: [32m -294[0m
[36mInterior loop[0m ( 32, 63) [1mAU[0m; ( 33, 62) [1mGC[0m: [32m -256[0m
[36mInterior loop[0m ( 33, 62) [1mGC[0m; ( 34, 61) [1mAU[0m: [32m -294[0m
[36mInterior loop[0m ( 34, 61) [1mAU[0m; ( 35, 60) [1mGC[0m: [32m -256[0m
[36mInterior loop[0m ( 35, 60) [1mGC[0m; ( 36, 59) [1mAU[0m: [32m -294[0m
[36mInterior loop[0m ( 36, 59) [1mAU[0m; ( 39, 56) [1mUA[0m: [32m  103[0m
[36mInterior loop[0m ( 39, 56) [1mUA[0m; ( 43, 54) [1mCG[0m: [32m  305[0m
[36mInterior loop[0m ( 43, 54) [1mCG[0m; ( 44, 53) [1mUA[0m: [32m -256[0m
[36mInterior loop[0m ( 44, 53) [1mUA[0m; ( 45, 52) [1mAU[0m: [32m -165[0m
[36mHairpin  loop[

# Visualization

In [25]:
#https://github.com/ViennaRNA/forna
#http://varna.lri.fr/

# CT Analizer

In [14]:
# only select those not ran before
base = "./secondary_structure/mfold/"
df = fasta_to_df('./Temp/extended_modified.txt')

index_list =[]
for index, row in df.iterrows():    
    tag = reformat(row['tag'])    
    if(len(glob.glob(f'{base + tag}/*.ct')) != 0):
        index_list.append(index)
df = df.iloc[index_list,:]
print(df.shape)

(1000, 2)


In [15]:
tag = df.iloc[3,:]['tag']
print(f'{base}{reformat(tag)}/SEQ_1.ct')
ct = reformatCT(f'{base}{reformat(tag)}/SEQ_1.ct')
data = tag.split('|')
hit_start = int(data[2].split('-')[0]) - 1 
hit_end = int(data[2].split('-')[1])
sign = data[3]

ct_head = ct.split('\n')[0]

if("dG = " in ct_head):    
    dG_patter = "dG = " 
elif("dG= " in ct_head):    
    dG_patter = "dG= "
elif("dG=" in ct_head):    
    dG_patter = "dG="
elif("dG =" in ct_head):    
    dG_patter = "dG ="
else:
    print('there is no dG')

dG = float(ct_head.split(dG_patter)[-1].split(' ')[0])
[nucleotide, index, values] = get_ct_data(ct)

./secondary_structure/mfold/AMWY020000141|194-614|201-221|+/SEQ_1.ct


In [16]:
hit_seq = ''.join(nucleotide[hit_start:hit_end])
hit_range = index[hit_start:hit_end]
hit_len = len(hit_range)
inc_srange = values[hit_start:hit_end] # Incomplete_Star_range

In [17]:
if(sum(inc_srange == 0) == hit_len):
    complementarity_in_hit_region = "no"
elif(sum(inc_srange != 0) == hit_len):
    complementarity_in_hit_region = "fully_connected"
else:    
    complementarity_in_hit_region = "yes"
print(complementarity_in_hit_region)

yes


In [18]:
# Hit_self_complementarity
hit_self_complementarity = not ((inc_srange <= hit_start) | (inc_srange > hit_end)).all()
hit_self_complementarity

False

In [19]:
nonzero_data_srange  = inc_srange[inc_srange!=0]
istar_min = nonzero_data_srange.min()
istar_max = nonzero_data_srange.max()
if(hit_self_complementarity):
    istar_min = np.nan
    istar_max = np.nan
print(istar_min)
print(istar_max)

369
388


In [20]:
if(hit_self_complementarity):
    continuous_pairing = "undifined"
else:
    if(hit_end < istar_min and (hit_start+1) > istar_min):
        continuous_pairing = "no"
    else:
        continuous_pairing = "yes"
print(continuous_pairing)

yes


In [29]:
### miR type
if(continuous_pairing == "yes" and complementarity_in_hit_region != "no" and hit_self_complementarity == "no"):
    if( hit_end < istar_min):
        mir_type = "5p" 
    if( hit_start > istar_max):
        mir_type = "3p"     
else:
    if(continuous_pairing == "no" and hit_self_complementarity == "yes"):
        mir_type = "discontinuous star strand and hit self complementarity"
    elif(continuous_pairing == "no"):
        mir_type = "discontinuous star strand"
    elif(hit_self_complementarity == "yes"):
        mir_type = "hit self complementarity"

if(complementarity_in_hit_region == "no"):
    mir_type = "no complementarity in hit region"    
print(mir_type)

5p


In [22]:
c = 0
i = hit_end - 3 - c
while(values[i] == 0 and i >= hit_start):
    c += 1
    i = hit_end - 3 - c
    
if(i >= hit_start):
    star_start = values[i] - c
    if(star_start < 1):
        print("star_end error!")
else:
    star_start = np.nan
print(star_start, c)

371 0


In [23]:
if(hit_start - 2 >= 0 ):
    a = 0    
else:
    a = abs(hit_start - 2)
    
i = hit_start - 2 + a
while(values[i] == 0 and i <= hit_end):
    a += 1
    i = hit_start - 2 + a
    
if(i <= hit_end):
    star_end = values[i] + a
else:
    star_end = np.nan
print(star_end, a)

390 0


In [24]:
star_range = index[star_start - 1:star_end]
star_seq = ''.join(nucleotide[star_start - 1:star_end])

In [25]:
if(mir_type == '5p'):
    num_of_linking_residues = str(star_start - hit_end - 1)
elif(mir_type == '3p'):
    num_of_linking_residues = str(hit_start - star_end)
elif(mir_type == "discontinuous star strand"):
    num_of_linking_residues = "discontinuous star strand"
elif(mir_type == "no complementarity in hit region"):
    print('error')
print(num_of_linking_residues)

149


In [203]:
# self Hit_self_complementarity
star_branching = not ((values[star_range-1] < star_start) | (values[star_range-1] > star_end)).all()
star_branching

False

In [71]:
def getBOI_5p():
    # first calc latest non zero value
    for i in range(hit_end-1, 0, -1):
        if(values[i] != 0):
            last_v = values[i]
            last_i = i
            place = i
            break            
            
    for i in range(place-1, 0, -1):        
        v = values[i]
        if(v == 0):
            continue
        if(v < last_v):
            return [last_i + 1, last_v]                                                
        
        if((v - last_v) >= 3):
            s1 = set(range(last_v+1, v))
            s2 = set([values[i-1] for i in range(last_v+1, v)])
            if(len(s1.intersection(s2)) > 0):
                return [last_i + 1, last_v]    
        last_v = v            
        last_i = i            
    for i in range(0,hit_end):
        if(values[i] != 0 ):
            return [i + 1, values[i]]    
    print("Error")
                
def getBOI_3p():
    # first calc latest non zero value
    for i in range(hit_start, len(values)):    
        if(values[i] != 0):
            last_v = values[i]
            last_i = i
            place = i
            break            
            
    for i in range(place + 1, len(values)):
        v = values[i]
        if(v == 0):
            continue
        if(v > last_v):
            return [last_i + 1, last_v]                                                
        
        if((last_v - v) >= 3):
            s1 = set(range(v+1, last_v))
            s2 = set([values[i-1] for i in range(v+1, last_v)])
            if(len(s1.intersection(s2)) > 0):
                return [last_i + 1, last_v]    
        last_v = v            
        last_i = i            
    for i in range(hit_start, len(values)):
        if(values[i] != 0 ):
            return [i + 1, values[i]]    
    print("Error")            
    
    
#BOI, Branch of interest
if(mir_type not in ['3p','5p']):
    pass
else:
    if(mir_type == '5p'):
        [boi_start, boi_end] = getBOI_5p()
    if(mir_type == '3p'):
        [boi_start, boi_end] = getBOI_3p()    

# BLASTX or DIMOND

# DIAMOND

https://github.com/bbuchfink/diamond

In [None]:
'''
import hashlib
import os
parallel = [os.path.join(dp, f) for dp, dn, filenames in os.walk("./PRNA_secondary_structure") for f in filenames ]
series = [os.path.join(dp, f) for dp, dn, filenames in os.walk("./SRNA_secondary_structure") for f in filenames ]
for i in range(len(parallel)):        
    md5_hash = hashlib.md5()
    with open(parallel[i],"rb") as file:        
        md5_hash.update(file.read())
        digest1 = md5_hash.hexdigest()                
    md5_hash = hashlib.md5()
    with open(series[i],"rb") as file:        
        md5_hash.update(file.read())
        digest2 = md5_hash.hexdigest()                
    if(digest1 != digest2):
        print(parallel[i])
        print(series[i])
        print("***********")     
'''