# BiG-SCAPE
Summary of GCFs found in each genome from project `[{{ project().name }}]` using [BiG-SCAPE](https://github.com/medema-group/BiG-SCAPE)

[BiG-SCAPE result]({{ project().file_server() }}/bigscape/result_as{{project().dependency_version()}}){:target="_blank" .md-button}

## BGC Distribution
[BiG-SCAPE](https://github.com/medema-group/BiG-SCAPE) constructs sequence similarity networks of Biosynthetic Gene Clusters (BGCs) and groups them into Gene Cluster Families (GCFs). BiG-SCAPE does this by rapidly calculating a distance matrix between gene clusters based on a comparison of their protein domain content, order, copy number and sequence identity.

In [None]:
import pandas as pd
from pathlib import Path
import json

from IPython.display import display, Markdown, HTML
from jinja2 import Template
from itables import to_html_datatable as DT
import itables.options as opt
import altair as alt
import seaborn as sns

from scipy.cluster.hierarchy import linkage, leaves_list
from scipy.spatial.distance import pdist

opt.css = """
.itables table td { font-style: italic; font-size: .8em;}
.itables table th { font-style: oblique; font-size: .8em; }
"""
opt.classes = ["display", "compact"]
opt.lengthMenu = [5, 10, 20, 50, 100, 200, 500]

import warnings
warnings.filterwarnings('ignore')

report_dir = Path("../")

dependency_version = report_dir / "metadata/dependency_versions.json"
with open(dependency_version, "r") as file:
    dependency_version = json.load(file)
antismash_version = dependency_version["antismash"]

In [None]:
# Set Cutoff
cutoff = "0.30"

# Read tables that are generated when BiGSCAPE rule is TRUE
antismash_table = report_dir / f"tables/df_antismash_{antismash_version}_summary.csv"
gtdb_table = report_dir / "tables/df_gtdb_meta.csv"

#ncbi_table = report_dir / "tables/df_ncbi_meta.csv"
bigscape_dir = report_dir /f"bigscape/for_cytoscape_antismash_{antismash_version}/"
bgc_table = [i for i in bigscape_dir.glob("*_df_clusters_0.30.csv")][0]
gcf_table = [i for i in bigscape_dir.glob("*_df_families_0.30.csv")][0]
mibig_table = [i for i in bigscape_dir.glob("*_df_known_0.30.csv")][0]
gcf_presence_table = [i for i in bigscape_dir.glob("*_df_family_presence_0.30.csv")][0]
network_table = [i for i in bigscape_dir.glob("*_df_network_0.30.csv")][0]

df_antismash = pd.read_csv(antismash_table).set_index("genome_id", drop=False)
df_gtdb = pd.read_csv(gtdb_table, index_col=0)

# correct organism name
for idx in df_gtdb.index:
    if df_gtdb.loc[idx, "Organism"] == "s__":
        genus = df_gtdb.loc[idx, 'Genus'].split('__')[1]
        df_gtdb.loc[idx, 'Organism'] = f"s__{genus} sp." 

df_bgcs = pd.read_csv(bgc_table, index_col=0)
df_gcf_presence = pd.read_csv(gcf_presence_table, index_col=0)
df_gcfs = pd.read_csv(gcf_table, index_col=0)
df_mibig = pd.read_csv(mibig_table, index_col=0)
df_network = pd.read_csv(network_table, index_col=0)

## Sequence Similarity Network

In [None]:
import pandas as pd
import networkx as nx
import plotly.graph_objects as go
import numpy as np
import scipy.spatial as spatial

from pathlib import Path
import json, shutil, yaml

In [None]:
def create_node_trace(G, node_trace_category, color, showtextlabel=False, nodesize=10, nodeopacity=0.8, 
                      nodesymbol="circle", linewidth=1, linecolor="black", textposition="top center", showlegend=False):
    if showtextlabel:
        markermode = "markers+text"
    else:
        markermode = "markers"
    nodes = np.array([node for node in G.nodes() if G.nodes[node]["node_trace"] == node_trace_category])
    pos = np.array([G.nodes[node]['pos'] for node in nodes.flatten()]).reshape(-1, 2)
    xs, ys = pos[:, 0], pos[:, 1]
    texts = np.array([G.nodes[node]['text'] for node in nodes])
    node_trace = go.Scatter(
        x=xs.tolist(),
        y=ys.tolist(),
        text=texts.tolist(),
        textposition=textposition,
        mode=markermode,
        hoverinfo='text',
        name=node_trace_category,
        showlegend=showlegend,
        marker=dict(
            symbol=nodesymbol,
            opacity=nodeopacity,
            showscale=False,
            color=color,
            size=nodesize,
            line=dict(width=linewidth, color=linecolor)))
    return node_trace

In [None]:
def create_edge_trace(Graph, name, showlegend=False, color='#888', width=0.5, opacity=0.8, dash="solid"):
    edge_trace = go.Scatter(
        x=[],
        y=[],
        name=name,
        opacity=opacity,
        line=dict(width=width,color=color, dash=dash),
        hoverinfo='none',
        mode='lines',
        showlegend=showlegend)

    edges = np.array([edge for edge in Graph.edges() if G.edges[edge]["relation_type"] == name])
    pos = np.array([Graph.nodes[e]['pos'] for e in edges.flatten()]).reshape(-1, 2)
    xs = np.insert(pos[:, 0], np.arange(2, len(pos[:, 0]), 2), None)
    ys = np.insert(pos[:, 1], np.arange(2, len(pos[:, 1]), 2), None)
    edge_trace['x'] = xs
    edge_trace['y'] = ys

    return edge_trace

In [None]:
path_bigscape = report_dir / f"bigscape/for_cytoscape_antismash_{antismash_version}/"

df_bigscape = pd.read_csv([i for i in path_bigscape.glob(f"*df_network*{cutoff}*")][0])
df_bigscape_cluster = pd.read_csv([i for i in path_bigscape.glob(f"*df_cluster*{cutoff}*")][0])
df_regions = pd.read_csv(report_dir / f"tables/df_regions_antismash_{antismash_version}.csv")

# clean up MIBIG ids with extra .1
for i in df_bigscape.index:
    if 'BGC' in df_bigscape.loc[i, 'Clustername 1']:
        df_bigscape.loc[i, 'Clustername 1'] = df_bigscape.loc[i, 'Clustername 1'].split(".")[0]
        #print(df_bigscape.loc[i, 'Clustername 1'])
    if 'BGC' in df_bigscape.loc[i, 'Clustername 2']:
        df_bigscape.loc[i, 'Clustername 2'] = df_bigscape.loc[i, 'Clustername 2'].split(".")[0]
        #print(df_bigscape.loc[i, 'Clustername 2'])

for i in df_bigscape_cluster.index:
    if 'BGC' in df_bigscape_cluster.loc[i, 'bgc_id']:
        df_bigscape_cluster.loc[i, 'bgc_id'] = df_bigscape_cluster.loc[i, 'bgc_id'].split(".")[0]

In [None]:
edge_bigscape = df_bigscape.rename(columns={'Clustername 1' : 'source',
                                              'Clustername 2' : 'target'})
edge_bigscape['relation_type'] = 'bigscape_similarity'
edge_bigscape['weight'] = edge_bigscape['Jaccard index'] * 30 # scale to cytoscape

In [None]:
if 'most_similar_known_cluster_id' in df_regions.columns:
    edge_knownclusterblast = df_regions.loc[:, ["bgc_id", 'most_similar_known_cluster_id',
                                                'most_similar_known_cluster_description', 'most_similar_known_cluster_type',
                                                "similarity"]]
    edge_knownclusterblast = edge_knownclusterblast.rename(columns={'bgc_id' : 'source',
                                                                    'most_similar_known_cluster_id' : 'target',
                                                                    'most_similar_known_cluster_description' : 'fam_known_compounds_0.30',
                                                                    'most_similar_known_cluster_type' : 'product'}).dropna()
    edge_knownclusterblast['relation_type'] = 'knownclusterblast'
    edge_knownclusterblast['weight'] = edge_knownclusterblast['similarity'] * 30 # scale to cytoscape
    edge_bigscape = pd.concat([edge_bigscape, edge_knownclusterblast])

In [None]:
G = nx.from_pandas_edgelist(edge_bigscape, source='source', target='target', edge_attr=[c for c in edge_bigscape.columns if c not in ["source", "target"]])

tax_dict = df_gtdb.loc[:, ['gtdb_release', 'Domain', 'Phylum', 'Class', 'Order', 'Family', 'Genus', 'Species', 'Organism']].T.to_dict()

# Iterate through the DataFrame rows and add each row as a node to the graph
for dfs in [df_bigscape_cluster, df_regions, df_mibig.reset_index()]:
    for index, row in dfs.iterrows():
        node_id = row['bgc_id']
        attributes = row.drop('bgc_id').to_dict()
        if node_id in G.nodes:
            G.add_node(node_id, **attributes)

# Add taxonomy information
for node_id in G.nodes:
    if node_id not in df_mibig.index:
        if "genome_id" in G.nodes[node_id].keys():
            genome_id = G.nodes[node_id]["genome_id"]
            for k, v in tax_dict[genome_id].items():
                G.nodes[node_id][k] = v
        else:
            G.nodes[node_id]['node_trace'] = "MIBIG_knownclusterblast"

# Display nodes with their attributes
nodes_with_attributes = [{node: G.nodes[node]} for node in G.nodes()]
nodes_with_attributes

outfile = Path(f"assets/data/bigscape_{cutoff}_as{antismash_version}.graphml")
outfile.parent.mkdir(parents=True, exist_ok=True)
nx.write_graphml(G, outfile)

In [None]:
# define layout options
options = {
    'prog': 'neato',
    'args': ' '.join(['-Gstart=10', '-Goverlap_scaling=-100'])
}

# position nodes
pos = nx.nx_agraph.graphviz_layout(G, **options)#, args='-Goverlap=false -Elen=weight')

bigscape_class_labels = df_bigscape_cluster.bigscape_class.unique()
bigscape_class_colors = sns.color_palette("colorblind", len(bigscape_class_labels)).as_hex()

for n, p in pos.items():
    G.nodes[n]['pos'] = p
    text = "<br>".join([n]+[f"{k} : {v}" for k, v in G.nodes[n].items() if k in ['product', 
                                                                                 'bigscape_class', 
                                                                                 'genome_id', 
                                                                                 'accn_id', 
                                                                                 'gcf_0.30', 
                                                                                 'fam_id_0.30', 
                                                                                 'fam_type_0.30', 
                                                                                 'fam_known_compounds_0.30', 
                                                                                 'region', 
                                                                                 'contig_edge', 
                                                                                 'region_length', 
                                                                                 'Organism']])
    G.nodes[n]['text'] = text
    
    if 'node_trace' in G.nodes[n].keys():
        pass
    elif n in df_mibig.index:
        G.nodes[n]['node_trace'] = "MIBIG"
    else:
        G.nodes[n]['node_trace'] = G.nodes[n]['bigscape_class']

# define visualization
edge_annotation_map = {'bigscape_similarity' : {'color':'black',
                                                'width':10
                                               },
                       'knownclusterblast' : {'color':'grey',
                                                'width':0.1
                                               },
                      }

node_annotation_map = {'MIBIG' : {'color':'blue',
                                  'node_symbol' : 'star'},
                       'MIBIG_knownclusterblast': {'color':'blue',
                                  'node_symbol' : 'asterisk'}, 
                       "BGC" : {'color':'blue',
                                'node_symbol' : 'circle'}
                      }

for num, bigscape_class_label in enumerate(bigscape_class_labels):
    node_annotation_map[bigscape_class_label] = {'color' : bigscape_class_colors[num],
                                                 'node_symbol' : 'circle'}

traces = []
node_trace = []
edge_trace = []

for e in edge_annotation_map.keys():
    dash = "solid"
    if 'knownclusterblast' in e:
        dash = "dot"
    edge_trace = create_edge_trace(G, e, color=edge_annotation_map[e]['color'], dash=dash, showlegend=True)
    traces.append(edge_trace)

for trace in node_annotation_map.keys():
    nodeopacity = 0.5
    showtextlabel = False
    linecolor = "black"
    linewidth = 0.5
    textposition="top left"
    node_size = 8
    if trace in bigscape_class_labels:
        nodeopacity = 0.8
    node_trace = create_node_trace(G, trace, node_annotation_map[trace]['color'], showtextlabel=showtextlabel, 
                                   nodesymbol=node_annotation_map[trace]['node_symbol'], nodeopacity=nodeopacity, 
                                   showlegend=True, linecolor=linecolor, linewidth=linewidth, nodesize=node_size,
                                   textposition=textposition)
    traces.append(node_trace)

fig = go.Figure(data=traces,
                layout=go.Layout(
                    paper_bgcolor='rgba(0,0,0,0)',
                    plot_bgcolor='white',
                    showlegend=True,
                    hovermode='closest',
                    margin=dict(b=20,l=5,r=5,t=40),
                    xaxis=dict(showgrid=False, zeroline=False, showticklabels=False, linecolor='black', mirror=True, linewidth=1),
                    yaxis=dict(showgrid=False, zeroline=False, showticklabels=False, linecolor='black', mirror=True, linewidth=1),
                    width=800, height=600)
                )

outfile = Path("assets/figures/bigscape.html")
outfile.parent.mkdir(parents=True, exist_ok=True)
fig.write_html(outfile)

display(HTML(filename=str(outfile)))

[Cytoscape Network](assets/data/bigscape_0.30_as{{project().dependency_version()}}.graphml){:target="_blank" .md-button}

Download the graphml file using the button above and import it into Cytoscape for interactive visualisation of the network.

## Result Summary

In [None]:
text_line1 = f"""BiG-SCAPE detected **{int(df_gcfs.shape[0])}** GCFs of the **{int(df_bgcs.shape[0])}** BGCs"""
display(Markdown(text_line1))

try:
    text_line2 = f"""- Number of known GCFs: **{int(df_gcfs.value_counts('fam_type')["known_family"])}**"""
except KeyError:
    text_line2 = "- Number of known GCFs: 0"
    pass
display(Markdown(text_line2))

text_line3 = f"""- Number of unknown GCFs: **{int(df_gcfs.value_counts('fam_type')["unknown_family"])}**"""
display(Markdown(text_line3))

text_line4 = f"""- Number of unique GCFs: **{int(df_gcfs[df_gcfs.clusters_in_fam==1].shape[0])}**"""
display(Markdown(text_line4))

In [None]:
H = 600
W = 250

source = df_bgcs.copy()
for idx in source.index:
    genome_id = source.loc[idx, "genome_id"]
    try:
        source.loc[idx, 'Genus'] = df_gtdb.loc[genome_id, 'Genus'].split("__")[-1]
        source.loc[idx, 'Organism'] = df_gtdb.loc[genome_id, 'Organism'].split("__")[-1]
    except:
        pass

alt.data_transformers.disable_max_rows()
chart_class = alt.Chart(source).mark_bar().encode(
    y= alt.Y('genome_id', axis=alt.Axis(title='Genome ID'), 
             sort=alt.EncodingSortField(field="Genus", op="count", order='descending')),
    x= alt.X('count(product)', axis=alt.Axis(title='Number of BGCs')),
    color='bigscape_class',
    tooltip=['genome_id', 'bigscape_class','count(bigscape_class)', 'product', 'Organism']
).properties(
    width=W,
    height=H,
    title = "BGCs count overview",
).interactive()
# chart_class = chart.configure_title(fontSize=20, offset=10, orient='top', anchor='middle')

source['fam_type'] = source['fam_type_0.30']

chart_known = alt.Chart(source).mark_bar().encode(
    y= alt.Y('genome_id', axis=alt.Axis(title='', labels=False),
             sort=alt.EncodingSortField(field="Genus", op="count", order='descending')),
    x= alt.X('count(product)', axis=alt.Axis(title='Number of BGCs')),
    color='fam_type',
    tooltip=['genome_id', 'fam_type', 'count(fam_type)', 'product', 'Organism']
).properties(
    width=W,
    height=H,
    title = "Known BGCs count",
).interactive()

chart = alt.hconcat(chart_class, chart_known)
chart = chart.configure_title(fontSize=10, offset=10, orient='top', anchor='middle')
chart = chart.configure_axisY(labelFontSize=8)
chart

## Summary Tables

### Genome overview
Number of BGCs of various types (known, unknown, unique) present in each genome. Additionally, number of BGCs of each of the BiG-SCAPE defined biosynthetic classes are also listed.

In [None]:
df_genomes = pd.DataFrame()
df_genomes["Genome ID"] = df_antismash['genome_id']
df_genomes = df_genomes.set_index("Genome ID", drop=False)

In [None]:
df_genomes['GTDB species'] = [df_gtdb.loc[idx, 'Organism'].split('__')[1] for idx in df_genomes.index]
df_genomes['BGCs'] = df_antismash.loc[df_genomes.index, 'bgcs_count']

bigscape_class_list = df_bgcs.bigscape_class.unique()

for i in df_genomes.index:
    gid = df_genomes.loc[i, 'Genome ID']
    if "known_family" in df_bgcs['fam_type_0.30']:
        df_genomes.loc[gid, 'Known BGCs'] = df_bgcs[df_bgcs.genome_id == gid].value_counts('fam_type_0.30')['known_family']
    if "unknown_family" in df_bgcs['fam_type_0.30']:
        df_genomes.loc[gid, 'Unknown BGCs'] = df_bgcs[df_bgcs.genome_id == gid].value_counts('fam_type_0.30')['unknown_family']
    df_genomes.loc[gid, 'Unique BGCs'] = df_gcf_presence.loc[:, [str(idx) for idx in df_gcfs[df_gcfs.clusters_in_fam==1].index]].sum(1)[gid]
    
    df_bigscape_class_counts = df_bgcs[df_bgcs.genome_id == gid].value_counts('bigscape_class')
    for bigscape_class in bigscape_class_list:
        if bigscape_class in df_bigscape_class_counts.index:
            df_genomes.loc[gid, bigscape_class] = df_bigscape_class_counts[bigscape_class]

    server_path = "<a href='{{ project().file_server() }}/antismash/{{project().dependency_version()}}/"
    df_genomes.loc[i, "Genome ID"] = server_path + f"{gid}/index.html' target='_blank''>{gid}</a>"
df_genomes = df_genomes.reset_index(drop=True)

In [None]:
# Fill NaN with 0
df_genomes = df_genomes.fillna(0)

# Convert numeric columns to integer
numeric_cols = [c for c in df_genomes.columns if c not in ['Genome ID', 'GTDB species']]
for col in numeric_cols:
    df_genomes[col] = df_genomes[col].astype(int)

In [None]:
cm = sns.light_palette("green", as_cmap=True)
# Apply heatmap styling
df_genomes_styled = df_genomes.style.background_gradient(cmap=cm, axis=None)

In [None]:
display(HTML(DT(df_genomes_styled, columnDefs=[{"className": "dt-center", "targets": "_all"}], scrollX=True)))

In [None]:
regions_table = report_dir / f"tables/df_regions_antismash_{antismash_version}.csv"
df_regions = pd.read_csv(regions_table).set_index("bgc_id")

In [None]:
for i in df_bgcs.index:
    gid = df_bgcs.loc[i, 'genome_id']
    r, c = str(df_regions.loc[i, "region"]).split(".")
    as_tag = f"#r{r}c{c}"
    server_path = "<a href='{{ project().file_server() }}/antismash/{{project().dependency_version()}}/"
    df_bgcs.loc[i, "BGC ID"] = server_path + f"{gid}/index.html{as_tag}' target='_blank''>{i}</a>"
    df_bgcs.loc[i, "Genome ID"] = server_path + f"{gid}/index.html' target='_blank''>{gid}</a>"

### BGC overview
BGCs table with assignment of the GCFs based on the BiG-SCAPE.

In [None]:
rename_columns = {"product" : "antiSMASH Product",
                  "bigscape_class" : "BiG-SCAPE Class"}

df_bgcs = df_bgcs.rename(columns=rename_columns)
columns_to_display = ["antiSMASH Product", "BiG-SCAPE Class", "Genome ID", "gcf_0.30", "fam_id_0.30", "fam_type_0.30", "fam_known_compounds_0.30"]
display(HTML(DT(df_bgcs.set_index("BGC ID").loc[:, columns_to_display], columnDefs=[{"className": "dt-center", "targets": "_all", "searchable": True}], maxColumns=df_bgcs.shape[1], maxBytes=0, scrollX=True)))

### GCF overview
GCFs table with metadata and statistics.

In [None]:
df_gcfs_styled = df_gcfs.style.background_gradient(cmap=cm, axis=None)

display(HTML(DT(df_gcfs_styled, columnDefs=[{"className": "dt-center", "targets": "_all", "searchable": True}], scrollX=True)))

### GCF-presence matrix
GCF presence absence matrix across all the genomes. Note that the columns are represented by the GCF IDs that can be found in GCF overview table above.


In [None]:
df_gcf_presence_raw = pd.DataFrame(index=df_antismash.index, columns=["Genome ID"])

non_unique_families = [str(idx) for idx in df_gcfs[df_gcfs.clusters_in_fam>1].index]
df_gcf_presence_raw[non_unique_families] = df_gcf_presence[non_unique_families]

df_gcf_presence_final = df_gcf_presence_raw.copy()
df_gcf_presence_raw = df_gcf_presence_raw.drop(columns=["Genome ID"])

for gid in df_gcf_presence_final.index:
    server_path = "<a href='{{ project().file_server() }}/antismash/{{project().dependency_version()}}/"
    df_gcf_presence_final.loc[gid, "Genome ID"] = server_path + f"{gid}/index.html' target='_blank''>{gid}</a>"
    
df_gcf_presence_final = df_gcf_presence_final.reset_index(drop=True)

In [None]:
# Perform hierarchical clustering and get an ordered list of index and column names
linkage_matrix = linkage(pdist(df_gcf_presence_raw.values))
df_gcf_presence_ordered = df_gcf_presence_raw.copy()
df_gcf_presence_ordered = df_gcf_presence_raw.iloc[leaves_list(linkage_matrix)]

# sort the x axis based on sum
df_gcf_presence_ordered['sum'] = df_gcf_presence_ordered.sum(axis=1)

# Sort the DataFrame based on the sum
df_gcf_presence_sorted = df_gcf_presence_ordered.sort_values('sum', ascending=False)

# Drop the sum column
df_gcf_presence_sorted = df_gcf_presence_sorted.drop(columns='sum')

# Melt the DataFrame to long format for Altair
source = df_gcf_presence_ordered.drop(columns='sum').reset_index().melt(id_vars='genome_id', var_name='gcf_id', value_name='count')

# Calculate the percentage distribution of each gcf_id
source_bar = df_gcf_presence_ordered.drop(columns='sum').sum(axis=0) / len(df_gcf_presence_ordered.index)

# Create a DataFrame from source_bar
df_bar = source_bar.reset_index()

# Rename the columns
df_bar.columns = ['gcf_id', 'percentage_distribution']

In [None]:
# Create an ordered list of gcf_id values based on the percentage distribution
ordered_gcf_ids = df_gcf_presence_raw.iloc[leaves_list(linkage_matrix)].columns.to_list()
ordered_genome_ids = df_gcf_presence_raw.iloc[leaves_list(linkage_matrix)].index.to_list()

# Create a selection slider
slider = alt.binding_range(min=0, max=1, step=0.05, name='Minimum GCF Distributon Across Genome: ')
slider_selection = alt.selection_single(name="Percentage", fields=['percentage_distribution'], bind=slider, value=[{'percentage_distribution': 0}])

# Create the heatmap
base = alt.Chart(source).properties(height=600)  # Set the height and width of the heatmap
heatmap = base.mark_rect().encode(
    alt.Y('genome_id:O', title="Genome IDs", sort=ordered_genome_ids),
    alt.X('gcf_id:O', title="GCF IDs", sort=ordered_gcf_ids),
    color='count:Q'
).transform_lookup(
    lookup='gcf_id',
    from_=alt.LookupData(df_bar, 'gcf_id', ['percentage_distribution'])
).add_selection(
    slider_selection
).transform_filter(
    'datum.percentage_distribution >= Percentage.percentage_distribution'
)

bar_base = alt.Chart(df_bar).properties(height=100)  # Set the height and width of the bar chart
# Create the bar chart
bar_chart = bar_base.mark_bar().encode(
    alt.X('gcf_id:O', title="", sort=ordered_gcf_ids),
    alt.Y('percentage_distribution:Q', title="GCF Distribution").axis(format='%'),
).add_selection(
    slider_selection
).transform_filter(
    'datum.percentage_distribution >= Percentage.percentage_distribution'
)

# Bind the x-axis of the heatmap and the bar chart
chart = alt.vconcat(bar_chart, heatmap).resolve_scale(x='shared')

outfile = Path("assets/figures/bigscape_gcf_presence.html")
outfile.parent.mkdir(exist_ok=True, parents=True)
chart.save(outfile)

with open(str(outfile), 'r') as f:
    html_string = f.read()

html_string = '<div style="width:100%; overflow:auto">' + html_string + '</div>'

display(HTML(html_string))

### MIBIG overview
Information on the known clusters from MIBIG database detected in the genomes using BiG-SCAPE.

In [None]:
mibig_columns = {'product' : 'BGC type',
                 'compounds' : 'Compounds',
                 'chem_acts' : 'Activity',
                 'bgc_id' : 'MIBIG ID',
                 'Genomes' : 'Genomes'
                }

df_mibig_final = df_mibig[df_mibig.index.isin(df_gcfs.mibig_ids.unique())].reset_index()
df_mibig_final["Genomes"] = pd.Series()
df_mibig_final = df_mibig_final.rename(columns=mibig_columns)

In [None]:
for mibig_id in df_mibig_final.index:
    try:
        fam_id = str(int((df_mibig.loc[mibig_id, 'fam_id_0.30'])))
        df_mibig_final.loc[mibig_id, 'Genomes'] = df_gcf_presence[fam_id].sum()
        df_mibig_final.loc[mibig_id, 'GCF ID'] = fam_id
    except KeyError:
        fam_id = None

    server_path = "<a href='https://mibig.secondarymetabolites.org/repository/" 
    df_mibig_final.loc[mibig_id, "MIBIG ID"] = server_path + f"{mibig_id}/' target='_blank''>{mibig_id}</a>"

df_mibig_final['Genomes'] = df_mibig_final['Genomes'].fillna(0).astype(int)
df_mibig_final = df_mibig_final.reset_index(drop=True)

In [None]:
df_mibig_final_styled = df_mibig_final.style.background_gradient(cmap=cm, axis=None)


display(HTML(DT(df_mibig_final_styled, columnDefs=[{"className": "dt-center", "targets": "_all", "searchable": True}], scrollX=True)))

## References
<font size="2">
{% for i in project().rule_used['bigscape']['references'] %}
- {{ i }} 
{% endfor %}
</font>