# DeepTFactor
Summary of [DeepTFactor](https://bitbucket.org/kaistsystemsbiology/deeptfactor/src/master/) results from project: `[{{ project().name }}]` 

## Description
A deep learning-based tool for the prediction of transcription factors.

In [None]:
import pandas as pd
from pathlib import Path
from IPython.display import display, Markdown, HTML
import itables.options as opt
from itables import to_html_datatable as DT
import plotly.graph_objects as go
opt.css = """
.itables table td { font-style: italic; font-size: .8em;}
.itables table th { font-style: oblique; font-size: .8em; }
"""
opt.classes = ["display", "compact"]
opt.lengthMenu = [5, 10, 20, 50, 100, 200, 500]

import warnings
warnings.filterwarnings('ignore')

In [None]:
report_dir = Path("../")

In [None]:
df = pd.read_csv("../tables/df_deeptfactor.csv", index_col=0)
df = df[df.deeptfactor_prediction == True]

In [None]:
faa_dictionary = []
df_gtdb = pd.read_csv("../tables/df_gtdb_meta.csv")
for genome_id in df_gtdb.genome_id:
    with open(f"../../../interim/prokka/{genome_id}/{genome_id}.faa", "r") as f:
        data = f.readlines()
    aa_dict = [i.strip("\n").strip(">").split(" ", 1) for i in data if i.startswith(">")]
    df_aa = pd.DataFrame(aa_dict, columns=["locus_tag", "annotation"]).set_index("locus_tag")
    df_aa["genome_id"] = genome_id
    faa_dictionary.append(df_aa)
df_aa = pd.concat(faa_dictionary)

In [None]:
df_deeptf = pd.merge(df.reset_index().drop(columns='genome_id'), df_aa.reset_index(), on="locus_tag", how="outer")
df_deeptf.deeptfactor_prediction = df_deeptf.deeptfactor_prediction.fillna(False)
df_deeptf = df_deeptf.fillna(0)

In [None]:
display(HTML(DT(df_deeptf, columnDefs=[{"className": "dt-center", "targets": "_all", "searchable": True}], maxColumns=df_deeptf.shape[1], maxBytes=0)))

In [None]:
def generate_sunburst_plot(data, outfile_sunburst, outfile_barh):
    # Calculate the necessary information
    total_annotated = data["deeptfactor_prediction"].sum()
    hypothetical_count = data[data["deeptfactor_prediction"] & data["annotation"].str.contains("hypothetical", case=False)].shape[0]
    non_annotated_hypothetical_count = data[~data["deeptfactor_prediction"] & data["annotation"].str.contains("hypothetical", case=False)].shape[0]
    non_hypothetical_annotated = data[data["deeptfactor_prediction"] & ~data["annotation"].str.contains("hypothetical", case=False)]
    annotation_counts = non_hypothetical_annotated["annotation"].value_counts()

    # Sunburst plot data
    labels = ["Locus Tags", "Annotated by deepTF", "Not Annotated", 
              "Hypothetical Proteins", "Other Proteins", 
              "Not Annotated - Hypothetical", "Not Annotated - Other"]
    parents = ["", "Locus Tags", "Locus Tags", 
               "Annotated by deepTF", "Annotated by deepTF", 
               "Not Annotated", "Not Annotated"]
    values = [len(data), total_annotated, len(data) - total_annotated, 
              hypothetical_count, total_annotated - hypothetical_count, 
              non_annotated_hypothetical_count, len(data) - total_annotated - non_annotated_hypothetical_count]

    # Create the sunburst plot
    fig1 = go.Figure(go.Sunburst(
        labels=labels,
        parents=parents,
        values=values,
        maxdepth=2,
        marker=dict(colors=['#f5f5f5', '#66b2ff', '#ff9999', '#ffcccc', '#99ccff', '#ffdddd', '#aaddff'])
    ))
    fig1.update_layout(height=800, title="Sunburst Plot of Locus Tags and Protein Types")
    fig1.write_html(outfile_sunburst)

    # Create the horizontal bar chart for annotations
    fig2 = go.Figure(go.Bar(
        y=annotation_counts.index,
        x=annotation_counts.values,
        marker_color='#66b2ff',
        orientation='h'
    ))
    fig2.update_layout(title="Distribution of Annotations for Non-Hypothetical, deepTF-Annotated Entries", 
                       yaxis_title="Annotation", 
                       xaxis_title="Count",
                       height=800)
    fig2.write_html(outfile_barh)

In [None]:
outfile1 = Path(f"assets/figures/deeptf_sunburst.html")
outfile1.parent.mkdir(parents=True, exist_ok=True)

outfile2 = Path(f"assets/figures/deeptf_barh.html")
outfile2.parent.mkdir(parents=True, exist_ok=True)
generate_sunburst_plot(df_deeptf, outfile1, outfile2)

In [None]:
display(HTML(filename=str(outfile1)))

In [None]:
display(HTML(filename=str(outfile2)))

## References

<font size="2">

{% for i in project().rule_used['deeptfactor']['references'] %}
- *{{ i }}*
{% endfor %}

</font>