# antiSMASH 
Summary of BGCs detected in each genome for: `[{{ project().name }}]`

## Description
> antiSMASH allows the rapid genome-wide identification, annotation and analysis of secondary metabolite biosynthesis gene clusters in bacterial and fungal genomes.

In [70]:
import pandas as pd
from pathlib import Path
from IPython.display import display, Markdown, HTML
import json
import altair as alt

import warnings
warnings.filterwarnings('ignore')

from itables import to_html_datatable as DT
import itables.options as opt
opt.classes = ["display", "compact"]
opt.lengthMenu = [5, 10, 20, 50, 100, 200, 500]


report_dir = Path("../")

In [71]:
antismash_table = report_dir / "tables/df_antismash_6.1.1_summary.csv"
gtdb_table = report_dir / "tables/df_gtdb_meta.csv"
ncbi_table = report_dir / "tables/df_ncbi_meta.csv"

df_antismash = pd.read_csv(antismash_table, index_col=0)
df_gtdb = pd.read_csv(gtdb_table, index_col=0)
df_ncbi = pd.read_csv(ncbi_table, index_col=0)

df_raw = pd.DataFrame(index=df_antismash.index)
df_raw["Genome ID"] = df_antismash['genome_id']

df_raw['Organism name'] = df_ncbi.loc[df_raw.index, 'organism']
df_raw['GTDB species'] = [df_gtdb.loc[idx, 'Organism'].split('__')[1] for idx in df_raw.index]
df_raw['BGCs'] = df_antismash.loc[df_raw.index, 'bgcs_count']
df_raw['Incomplete BGCs'] = df_antismash.loc[df_raw.index, 'bgcs_on_contig_edge']

df = df_raw.copy()
for i in df.index:
    gid = df.loc[i, 'Genome ID']
    server_path = "<a href='{{ project().file_server() }}/antismash/6.1.1/"
    df.loc[i, "Genome ID"] = server_path + f"{gid}/index.html' target='_blank''>{gid}</a>"
df = df.reset_index(drop=True)

## Result Summary

In [72]:
region = df_antismash.bgcs_count
incomplete = df_antismash.bgcs_on_contig_edge
text = f"""AntiSMASH detected {int(region.sum())} BGCs from {len(region)} genomes with the median of {int(region.median())}. Out of these, {'{:.2%}'.format(1 - incomplete.sum()/region.sum())} are deemed as complete."""
display(Markdown(text))

AntiSMASH detected 206 BGCs from 5 genomes with the median of 38. Out of these, 99.51% are deemed as complete.

> Note: Here the incomplete BGCs are denoted by those that were identified to be on the contig edge by antiSMASH and thus are likely to be incomplete.

In [83]:
source = df_raw

base = alt.Chart(source)

bar = base.mark_bar().encode(
    x=alt.X('BGCs:Q', bin=True, axis=alt.Axis(title='BGCs')), 
    y=alt.X('count()', axis=alt.Axis(title='Genomes')),
    color='GTDB species',
    tooltip=['Genome ID', 'Organism name', 'GTDB species', 'BGCs', 'Incomplete BGCs']
).interactive()

bar 

## Summary Table
Click on the genome ids to get the antiSMASH result.

[Download Table]({{ project().file_server() }}/tables/df_antismash_6.1.1_summary.csv){:target="_blank" .md-button}

In [69]:
display(HTML(DT(df, columnDefs=[{"className": "dt-center", "targets": "_all"}],)))

Genome ID,Organism name,GTDB species,BGCs,Incomplete BGCs
Loading... (need help?),,,,


## References
<font size="2">
{% for i in project().rule_used['antismash']['references'] %}
- {{ i }} 
{% endfor %}
</font>