In [1]:
# library import
from utils import *
from plotly.subplots import make_subplots
import plotly.graph_objects as go
import plotly.express as px

In [37]:
def cons_from_deepte_parser(cons_file):
    df = pd.DataFrame(columns=["Seq name", "Annotation", "Length"])
    
    for seq_record in SeqIO.parse(cons_file, "fasta"):
        
        name = seq_record.name
        order = name.split("__")[1]
        if "_LTR" in order:
            order = "LTR"
        elif "_MITE" in order:
            order = "MITE"
        elif "_DNA" in order:
            order = "DNA"
        elif "_Helitron" in order:
            order = "DNA"
        elif "_SINE" in order:
            order = "SINE"
        elif "_LINE" in order:
            order = "LINE"
        else:
            continue
        
        seq_len = len(seq_record.seq)
        df2 = pd.DataFrame({'Seq name': [seq_record.name],'Annotation': [order], 'Length': [seq_len]})
        df = pd.concat([df, df2], ignore_index = True, axis = 0)
    return df

In [41]:
RM2_data = cons_from_deepte_parser("../../DeepTE/RM2/results/opt_DeepTE.fasta")
EDTA_data = cons_from_deepte_parser("../../DeepTE/EDTA/results/opt_DeepTE.fasta")
MITE_data = cons_from_deepte_parser("../../DeepTE/MITE/results/opt_DeepTE.fasta")
repbase_data = parse_repbase("../test/raw_lib/RepBase.fasta")

In [111]:
dict_data = {"RepeatModeler2": (RM2_data,"#392F5A"), "EDTA": (EDTA_data, "#D99AC5"), "MITE-Tracker": (MITE_data, "#FF8811"), "RepBase": (repbase_data, "#14BDEB")}

In [205]:
annot_lst = [("LTR"), ("DNA"), ("MITE"), ("LINE"), ("SINE"), ("nLTR")]
tool_lst = ["RepeatModeler2", "EDTA", "MITE-Tracker", "RepBase"]
fig = make_subplots(rows=6, cols=1, subplot_titles=tuple(annot_lst))
annot_lst = [("LTR", 100), ("DNA", 100), ("MITE", 200), ("LINE", 200), ("SINE", 200), ("nLTR", 200)]

i = 0
for c in range(1):
    for r in range(6):
        order = annot_lst[i][0]
        
        for tool in tool_lst:
            df = dict_data[tool][0]            
            lengths = df[df["Annotation"]==order]["Length"]
            
            trace = go.Histogram(
                x=df[df["Annotation"]==order]["Length"],
                marker_color=dict_data[tool][1],
                name=tool,
                #nbinsx=annot_lst[i][1]
                )
            fig.append_trace(trace, row=r+1, col=c+1)            
        i += 1    
    
    
fig.layout['barmode'] = 'group'
names = set()
fig.for_each_trace(
    lambda trace:
        trace.update(showlegend=False)
        if (trace.name in names) else names.add(trace.name))
fig['layout']['xaxis'].update(title_text='1', range=[0,1000])
fig['layout']['xaxis2'].update(title_text='2', range=[0,4000])
fig['layout']['xaxis3'].update(title_text='3', range=[0,800])
fig['layout']['xaxis4'].update(title_text='4', range=[0,8000])
fig['layout']['xaxis5'].update(title_text='5', range=[0,800])
fig['layout']['xaxis6'].update(title_text='6')

fig.update_layout(
    autosize=False,
    width=900,
    height=1000,)
fig.show()