# Annotated genomes
List of annotated genomes from: `[{{ project().name }}]` 

## Description
Summary table of annotated genbank files from each genomes.

In [None]:
import pandas as pd
from pathlib import Path
import altair as alt
import json
import warnings
warnings.filterwarnings('ignore')

from IPython.display import display, Markdown, HTML
from itables import to_html_datatable as DT
import itables.options as opt
opt.css = """
.itables table td { font-style: italic; font-size: .8em;}
.itables table th { font-style: oblique; font-size: .8em; }
"""
opt.classes = ["display", "compact"]
opt.lengthMenu = [5, 10, 20, 50, 100, 200, 500]

In [None]:
report_dir = Path("../")
metadata = report_dir / "metadata/project_metadata.json"
with open(metadata, "r") as f:
    metadata = json.load(f)
project_name = [i for i in metadata.keys()][0]

In [None]:
summary = []
for item in (report_dir / "genbank").glob("*.txt"):
    df = pd.read_csv(item, sep=":", skiprows=1, header=None, index_col=0).rename(columns={1:item.stem}).T
    server_path = "<a href='{{ project().file_server() }}/genbank/"
    df.loc[item.stem, "gbk file"] = server_path + f"{item.stem}.gbk' target='_blank''>{item.stem}.gbk</a>"
    df.loc[item.stem, "CDS table"] = server_path + f"{item.stem}.tsv' target='_blank''>{item.stem}.tsv</a>"
    summary.append(df)
df = pd.concat(summary).sort_index()

In [None]:
display(HTML(DT(df, columnDefs=[{"className": "dt-center", "targets": "_all"}],)))

## Summary Statistics

In [None]:
source = df.copy()
source["dataset"] = project_name

# Create a list of charts, one for each column to plot
colors = ['#66c2a5', '#fc8d62', '#8da0cb', '#e78ac3', '#a6d854', '#ffd92f', '#e5c494']

charts = []

for idx, col in enumerate(['contigs', 'bases', 'CDS', 'rRNA', 'repeat_region', 'tRNA', 'tmRNA']):
    data_range = source[col].max() - source[col].min()
    buffer = 0.10 * data_range  # 10% buffer
    ymin = source[col].min() - buffer
    ymax = source[col].max() + buffer
    chart = alt.Chart(source).mark_boxplot(size=50, median=dict(color='black')).encode(
        y=alt.Y(f'{col}:Q', title=None, scale=alt.Scale(domain=(ymin, ymax))),
        x=alt.X("dataset:N", axis=None),  # This is used to align the boxplots vertically
        color=alt.value(colors[idx]),  # Color of the boxplot
        opacity=alt.value(0.7)  # Opacity of the boxplot
    ).properties(title=f'{col}', width=100, height=150)
    
    charts.append(chart)

alt.hconcat(*charts)

## References

<font size="2">

{% for i in project().rule_used['prokka-gbk']['references'] %}
- *{{ i }}*
{% endfor %}

</font>