# ARTS2
Summary of [ARTS2](link) results from project: `[{{ project().name }}]` 

## Description
This report presents the results from ARTS2 (Antibiotic Resistant Target Seeker Version 2), a bioinformatic tool for targeted genome mining of antibiotic resistance targets. The analysis focused on identifying genes with signatures of antibiotic resistances, gene duplication, horizontal gene transfer, and proximity to biosynthetic gene cluster (BGC).

Here, only ARTS2 profile with BGC proximity are shown.

Table Description:

 - **Dup**: Uncommon gene duplication. Highlight potential repurposed primary metabolism genes.
 - **Res**: Hits to known resistance models.
 - **BGC**: Cross reference locations or proximity to secondary metabolite BGCs.
 - **HGT**: Phylogenetic incongruence. Highlight essential genes with evidence of inter-genus horizontal gene transfer (HGT).


In [None]:
# Import necessary libraries for data manipulation, plotting, and file handling
import pandas as pd
import altair as alt
import ast
import json
import networkx as nx
from pathlib import Path
import plotly.graph_objects as go
import seaborn as sns

# Import libraries for displaying data and markdown in Jupyter notebooks
from IPython.display import display, Markdown, HTML

# Import library for creating HTML data tables
from itables import to_html_datatable as DT
import itables.options as opt

# Suppress warnings
import warnings
warnings.filterwarnings('ignore')

# Set CSS for HTML data tables
cm = sns.light_palette("green", as_cmap=True)
opt.css = """
.itables table td { font-style: italic; font-size: .8em;}
.itables table th { font-style: oblique; font-size: .8em; }
"""

# Set classes for HTML data tables
opt.classes = ["display", "compact"]

# Set length menu for HTML data tables
opt.lengthMenu = [5, 10, 20, 50, 100, 200, 500]

In [None]:
with open("../metadata/project_metadata.json", "r") as f:
    project_configuration = json.load(f)
with open("../metadata/dependency_versions.json", "r") as f:
    dependency_version = json.load(f)

In [None]:
project_name = [i for i in project_configuration.keys()][0]
antismash_version = dependency_version["antismash"]
report_dir = Path("../")

df_arts = pd.read_csv(report_dir / f"tables/df_arts_bgctable_as-{antismash_version}.csv")
df_bgcs = pd.read_csv(report_dir / f"tables/df_regions_antismash_{antismash_version}.csv", index_col=0)
df_tax = pd.read_csv(report_dir / "tables/df_gtdb_meta.csv", index_col=0)
df_arts_core = pd.read_csv(report_dir / f"tables/df_arts_coretable_as-{antismash_version}.csv")
df_arts_hits = pd.read_csv(report_dir / f"tables/df_arts_allhits_as-{antismash_version}.csv")

In [None]:
result = {}
ctr = 1
for i in df_arts.index:
    hits = ast.literal_eval(df_arts.loc[i, "Genelist"])
    bgc_id = df_arts.loc[i, "bgc_id"]
    for num, hits in enumerate(hits):
        assert len(hits) == 7
        arts_hits = {'arts_id' : int(hits[0]),
                     'genome_id' : df_arts.loc[i, "genome_id"],
                     'bgc_id' : bgc_id,
                     'bgc_type' : df_arts.loc[i, "Type"],
                     'profile' : hits[1],
                     'start' : int(hits[2]),
                     'stop' : int(hits[3]),
                     'hits_type' : hits[4],
                     'description' : hits[5],
                     'function' : hits[6]
                    }
        if arts_hits['function'] == "N/A":
            arts_hits['function'] = "ResModel"
        result[ctr] = arts_hits
        ctr = ctr + 1
df_hits = pd.DataFrame.from_dict(result).T

In [None]:
function_map = df_hits.set_index("profile").loc[:, "function"].to_dict()

In [None]:
for i in df_arts_hits.index:
    profile = df_arts_hits.loc[i, "core_gene_or_model"]
    if profile in function_map:
        df_arts_hits.loc[i, "function"] = function_map[profile]

In [None]:
# Select columns of boolean type from the dataframe
bool_cols = df_arts_hits.select_dtypes(include=['bool'])

# Create a new column 'hits' that is the sum of True values in each row
df_arts_hits['hits'] = bool_cols.sum(axis=1)

# Define a dictionary to map boolean values to symbols
# True is mapped to '✔' and False is mapped to '✖'
bool_to_symbol = {True: '✔', False: '✖'}

# Apply the mapping to the relevant columns in the dataframe
# This replaces True/False values with '✔'/'✖' in these columns
for col in ['duplication', 'phylogeny', 'known_target', 'bgc_proximity']:
    df_arts_hits[col] = df_arts_hits[col].map(bool_to_symbol)

# Rename columns for display purposes
renamed_columns = {
    "core_gene_or_model": "profile", 
    "duplication": "Dup", 
    'known_target': "Res", 
    'bgc_proximity': "BGC",
    "phylogeny": "HGT"
}

df_arts_hits = df_arts_hits.rename(columns=renamed_columns)
outfile = Path(f"assets/tables/arts_hits_as{antismash_version}.csv")
outfile.parent.mkdir(exist_ok=True, parents=True)
df_arts_hits.to_csv(outfile, index=False)

# Add links to server
for i in df_arts_hits.index:
    gid = df_arts_hits.loc[i, 'genome_id']
    server_path = "<a href='{{ project().file_server() }}/antismash/{{project().dependency_version()}}/"
    df_arts_hits.loc[i, "Genome ID"] = server_path + f"{gid}/index.html' target='_blank''>{gid}</a>"
    bgc_id = df_arts_hits.loc[i, "bgc_id"]
    if bgc_id in df_bgcs.index:
        r, c = str(df_bgcs.loc[bgc_id, "region"]).split(".")
        region_id = f"#r{r}c{c}"
        df_arts_hits.loc[i, "BGC ID"] = server_path + f"{gid}/index.html{region_id}' target='_blank''>{bgc_id}</a>"

# Select columns to display
columns_to_display = ['profile', 'name', 'product', 'function', 'hits', 'Dup', 'HGT', 'Res', 'BGC', 'BGC ID', 'Genome ID']
df_arts_hits_display = df_arts_hits.loc[:, columns_to_display]

# Display only hits with BGC proximity
df_arts_hits_display = df_arts_hits_display[df_arts_hits_display["BGC"] != '✖'].reset_index(drop=False)

In [None]:
# Display the dataframe with renamed columns and selected columns to display
# All columns are centered using the 'dt-center' class
display(HTML(DT(df_arts_hits_display, 
               columnDefs=[{"className": "dt-center", "targets": "_all"}])))

In [None]:
summary_report = f"A total of **{len(df_hits.bgc_id.unique())} BGCs** from {len(df_hits.genome_id.unique())} genomes were found to have hits with **{len(df_hits.profile.unique())} ARTS2 profile**."
display(Markdown(summary_report))

In [None]:
color = ["#264653", "#287271", "#2a9d8f", "#8ab17d", "#e9c46a", "#f4a261", "#ee8959", "#e76f51"]
function_map = df_hits.function.value_counts().to_dict()
arts_function_color_map = {}
ctr = 0
for item in function_map.keys():
    if item == "Unclassified":
        arts_function_color_map[item] = 'grey'
        ctr = ctr - 1
    elif ctr+1 > len(color):
        arts_function_color_map[item] = 'grey'
    elif item == "ResModel":
        arts_function_color_map[item] = 'red'
    else:
        arts_function_color_map[item] = color[ctr]
    ctr = ctr + 1

arts_node_mapping = df_hits.loc[:, ["profile", "description", "function"]].drop_duplicates().set_index("profile", drop=False)
bgc_id_mapping = df_hits.loc[:, ["bgc_id", "bgc_type", "genome_id"]].drop_duplicates().set_index("bgc_id", drop=False)#.T.to_dict()

for c in ["bgc_id", "bgc_type", "genome_id"]:
    arts_node_mapping[c] = None

for c in ["profile", "description", "function"]:
    bgc_id_mapping[c] = None
    
arts_node_mapping = arts_node_mapping.T.to_dict()
bgc_id_mapping = bgc_id_mapping.T.to_dict()

In [None]:
color_map = []
G = nx.from_pandas_edgelist(df_hits, source='bgc_id', target='profile')

pos = nx.nx_agraph.graphviz_layout(G)

region_score = df_arts_hits.loc[:, ["bgc_id", "hits"]].dropna().set_index("bgc_id").to_dict()['hits']
for g in G.nodes:
    # annotate ARTS model
    if g in arts_node_mapping.keys():
        for column in arts_node_mapping[g].keys():
            attrib = str(column)
            G.nodes[g][attrib] = arts_node_mapping[g][attrib]
        color = arts_function_color_map[G.nodes[g]['function']]
        color_map.append(color)
        G.nodes[g]["color"] = color
        G.nodes[g]["node_type"] = G.nodes[g]["function"]
        G.nodes[g]["text"] = f"{G.nodes[g]['profile']}<br>{G.nodes[g]['function']}<br>{G.nodes[g]['description']}"
    # annotate BGCs
    elif g in bgc_id_mapping.keys():
        for column in bgc_id_mapping[g].keys():
            attrib = str(column)
            G.nodes[g][attrib] = bgc_id_mapping[g][attrib]
        color = "blue"
        color_map.append(color)
        G.nodes[g]["color"] = color
        G.nodes[g]["node_type"] = "BGC"
        taxonomy = df_tax.loc[G.nodes[g]['genome_id'], "Organism"]
        G.nodes[g]["text"] = f"{G.nodes[g]['bgc_id']}<br>{G.nodes[g]['bgc_type']}<br>{G.nodes[g]['genome_id']}<br>{taxonomy}"
        if region_score[g] > 1:
            G.nodes[g]["node_type"] = f"BGC_with_{region_score[g]}_ARTS_hits"
            G.nodes[g]["text"] = G.nodes[g]["text"] + "<br>" + f"ARTS hits: {region_score[g]}"

In [None]:
for n, p in pos.items():
    G.nodes[n]['pos'] = p

In [None]:
edge_trace = go.Scatter(
    x=[],
    y=[],
    name="ARTS2 hit",
    line=dict(width=0.5,color='#888'),
    hoverinfo='none',
    mode='lines')

for edge in G.edges():
    x0, y0 = G.nodes[edge[0]]['pos']
    x1, y1 = G.nodes[edge[1]]['pos']
    edge_trace['x'] += tuple([x0, x1, None])
    edge_trace['y'] += tuple([y0, y1, None])

In [None]:
def create_node_trace(G, node_type, shape="circle", opacity=0.8, linewidth=0, linecolor="red"):
    node_color = []
    for node in G.nodes():
        if G.nodes[node]['node_type'] == node_type:
            node_color.append(G.nodes[node]['color'])    

    node_trace = go.Scatter(
        ids=[],
        x=[],
        y=[],
        name=node_type,
        text=[],
        mode='markers',
        hoverinfo='text',
        marker_symbol=shape,
        opacity=opacity,
        marker=dict(
            showscale=False,
            color=node_color,
            size=10,
            line=dict(width=linewidth,
                      color=linecolor)))

    for node in G.nodes():
        if G.nodes[node]['node_type'] == node_type:
            x, y = G.nodes[node]['pos']
            node_trace['ids'] += tuple([node])
            node_trace['x'] += tuple([x])
            node_trace['y'] += tuple([y])

    for node, adjacencies in enumerate(G.adjacency()):
        if G.nodes[adjacencies[0]]['node_type'] == node_type:
            node_trace['marker']['color']+=tuple([len(adjacencies[1])])
            node_type = G.nodes[adjacencies[0]]['node_type']
            description = G.nodes[adjacencies[0]]['bgc_type']
            node_info = G.nodes[adjacencies[0]]['text']
            node_trace['text']+=tuple([node_info])
    
    return node_trace

In [None]:
node_traces = list(function_map.keys())
node_traces.append("BGC")
for i in range(4):
    node_traces.append(f"BGC_with_{i}_ARTS_hits")

traces = [edge_trace]
for trace in node_traces:
    shape = "square"
    linewidth = 0
    linecolor = "black"
    if trace == "BGC":
        shape = "circle"
    elif trace == "BGC_with_2_ARTS_hits":
        shape = "circle"
        linewidth = 1
        linecolor = "orange"
    elif trace == "BGC_with_3_ARTS_hits":
        shape = "circle"
        linewidth = 3
        linecolor = "red"
    elif trace == "BGC_with_4_ARTS_hits":
        shape = "circle"
        linewidth = 5
        linecolor = "red"
    elif trace == "ResModel":
        shape = "star"
    new_trace = create_node_trace(G, trace, shape=shape, linewidth=linewidth, linecolor=linecolor)
    traces.append(new_trace)

In [None]:
fig = go.Figure(data=traces,
                layout=go.Layout(
                    paper_bgcolor='rgba(0,0,0,0)',
                    plot_bgcolor='rgba(0,0,0,0)',
                    showlegend=True,
                    hovermode='closest',
                    margin=dict(b=20,l=5,r=5,t=40),
                    xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
                    yaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
                    width=750, height=900))

fig = fig.update_layout(legend=dict(
    orientation="h",
    yanchor="top",
    y=0,
    xanchor="left",
    x=0
))

In [None]:
outfile = Path(f"assets/figures/arts_as{antismash_version}.html")
outfile.parent.mkdir(parents=True, exist_ok=True)
fig.write_html(outfile)

display(HTML(filename=str(outfile)))

In [None]:
for node in G.nodes:
    for attrib in G.nodes[node]:
        if type(G.nodes[node][attrib]) == tuple:
            G.nodes[node][attrib] = str(G.nodes[node][attrib])
        elif G.nodes[node][attrib] == None:
            G.nodes[node][attrib] = str(G.nodes[node][attrib])

outfile = Path("assets/data/arts.graphml")
outfile.parent.mkdir(parents=True, exist_ok=True)
nx.write_graphml(G, outfile)

[Download Graph](assets/data/arts.graphml){:target="_blank" .md-button}

## References
<font size="2">
{% for i in project().rule_used['arts']['references'] %}
- {{ i }} 
{% endfor %}
</font>