In [None]:
import pandas as pd
from pathlib import Path
import sqlite3

import math
import json, yaml
import altair as alt
import ast

import warnings
warnings.filterwarnings('ignore')

import networkx as nx
import plotly.graph_objects as go
import altair as alt

def shannon_div(data):
    total = sum(data.values())
    container = []
    for k, v in data.items():
        p = v / total
        val = p*(math.log(p))
        container.append(val)
    shannon = abs(sum(container))
    return shannon

In [None]:
def get_bigfam_class(gcf_id, conn, threshold="<= 900", rank=0):
    """
    Given a gcf_id, returns chemical class and chemical subclass of the model
    """
    sql_query = f"""
    SELECT 
        "gcf_membership"."gcf_id" AS "gcf_id", 
        "gcf_membership"."bgc_id" AS "bgc_id",      
        "gcf_membership"."membership_value" AS "membership_value", 
        "gcf_membership"."rank" AS "rank", 
        "Bgc Class - Bgc"."bgc_id" AS "Bgc Class - Bgc__bgc_id", 
        "Bgc Class - Bgc"."chem_subclass_id" AS "Bgc Class - Bgc__chem_subclass_id", 
        "Chem Subclass"."id" AS "Chem Subclass__id", 
        "Chem Subclass"."class_id" AS "Chem Subclass__class_id", 
        "Chem Subclass"."name" AS "Chem Subclass__name", 
        "Chem Class - Class"."id" AS "Chem Class - Class__id", 
        "Chem Class - Class"."name" AS "Chem Class - Class__name"
    FROM "gcf_membership"
    LEFT JOIN "bgc_class" "Bgc Class - Bgc" ON "gcf_membership"."bgc_id" = "Bgc Class - Bgc"."bgc_id" LEFT JOIN "chem_subclass" "Chem Subclass" ON "Bgc Class - Bgc"."chem_subclass_id" = "Chem Subclass"."id" LEFT JOIN "chem_class" "Chem Class - Class" ON "Chem Subclass"."class_id" = "Chem Class - Class"."id"
    WHERE ("gcf_membership"."gcf_id" = {gcf_id}
       AND "gcf_membership"."membership_value" {threshold} AND "gcf_membership"."rank" = {rank})
    """
    df = pd.read_sql_query(sql_query, conn)
    non_singleton_map = df.bgc_id.value_counts().to_dict()
    bgc = [k for k, v in non_singleton_map.items() if v > 1]
    df.loc[df[~df.bgc_id.isin(bgc)].index, "score"] = 1
    for i in df[df.bgc_id.isin(bgc)].index:
        score = 1 / non_singleton_map[df.loc[i, "bgc_id"]]
        df.loc[i, "score"] = score
        
    chemical_class = df.groupby("Chem Class - Class__name").sum().score / len(df.bgc_id.unique())
    chemical_class = chemical_class.to_dict()
    chemical_subclass = df.groupby("Chem Subclass__name").sum().score / len(df.bgc_id.unique())
    chemical_subclass = chemical_subclass.to_dict()
    top_chemical_class = max(chemical_class, key=chemical_class.get)
    top_chemical_subclass = max(chemical_subclass, key=chemical_subclass.get)
    bgc_member = len(df.bgc_id.unique())
    
    result = {"bgc_member" : bgc_member,
              "chemical_class_hits" : len(df),
              "top_chemical_class" : top_chemical_class,
              "top_chemical_class_proportion" : chemical_class[top_chemical_class],
              "top_chemical_subclass" : top_chemical_subclass,
              "top_chemical_subclass_proportion" : chemical_subclass[top_chemical_subclass],
              "chemical_class" : chemical_class,
              "chemical_subclass" : chemical_subclass,
              }
    return result

def get_bigfam_taxa(gcf_id, conn, threshold="<= 900", rank=0, level=5):
    """
    Given a gcf_id, returns chemical class and chemical subclass of the model
    """
    sql_all_hits = f"""
    SELECT 
        "gcf_membership"."gcf_id" AS "gcf_id", 
        "gcf_membership"."bgc_id" AS "bgc_id", 
        "gcf_membership"."membership_value" AS "membership_value", 
        "gcf_membership"."rank" AS "rank"
    FROM "gcf_membership"
    WHERE ("gcf_membership"."gcf_id" = {gcf_id}
        AND "gcf_membership"."membership_value" {threshold} AND "gcf_membership"."rank" = {rank})
    """
    
    sql_query = f"""
    SELECT 
        "gcf_membership"."gcf_id" AS "gcf_id", 
        "gcf_membership"."bgc_id" AS "bgc_id", 
        "gcf_membership"."membership_value" AS "membership_value", 
        "gcf_membership"."rank" AS "rank", 
        "Bgc Taxonomy - Bgc"."bgc_id" AS "Bgc Taxonomy - Bgc__bgc_id", 
        "Bgc Taxonomy - Bgc"."taxon_id" AS "Bgc Taxonomy - Bgc__taxon_id", 
        "Taxon"."id" AS "Taxon__id", "Taxon"."level" AS "Taxon__level", 
        "Taxon"."name" AS "Taxon__name", "Taxon Class - Level"."id" AS "Taxon Class - Level__id", 
        "Taxon Class - Level"."level" AS "Taxon Class - Level__level", 
        "Taxon Class - Level"."name" AS "Taxon Class - Level__name"
    FROM "gcf_membership"
    LEFT JOIN "bgc_taxonomy" "Bgc Taxonomy - Bgc" ON "gcf_membership"."bgc_id" = "Bgc Taxonomy - Bgc"."bgc_id" LEFT JOIN "taxon" "Taxon" ON "Bgc Taxonomy - Bgc"."taxon_id" = "Taxon"."id" LEFT JOIN "taxon_class" "Taxon Class - Level" ON "Taxon"."level" = "Taxon Class - Level"."level"
    WHERE ("gcf_membership"."gcf_id" = {gcf_id}
        AND "gcf_membership"."membership_value" {threshold}
        AND "gcf_membership"."rank" = {rank}
        AND "Taxon"."level" = {level})
    """
    df_all_hits = pd.read_sql_query(sql_all_hits, conn)
    df = pd.read_sql_query(sql_query, conn)
    taxonomic_level = df['Taxon Class - Level__name'].unique()[0]
    df = df_all_hits.merge(df, on=["bgc_id", "gcf_id", "membership_value", "rank"], how="outer").fillna("Unassigned")
    taxa_distribution = df.Taxon__name.value_counts().to_dict()
    top_genus = max(taxa_distribution, key=taxa_distribution.get)
    taxonomic_hits = len(df.bgc_id.unique())
    result = {"taxonomic_hits" : taxonomic_hits,
              "taxonomic_level" : taxonomic_level,
              "H-index" : shannon_div(taxa_distribution),
              "richness" : len(taxa_distribution.keys()),
              "top_taxa" : top_genus,
              "top_taxa_proportion" : taxa_distribution[top_genus] / taxonomic_hits,
              "taxa_distribution" : taxa_distribution,
             }
    return result

def get_bigfam_summary(gcf_id, conn, threshold="<= 900", rank=0, level=5):
    result = get_bigfam_class(gcf_id, conn)
    result.update(get_bigfam_taxa(gcf_id, conn))
    return {gcf_id : result}

In [None]:
def generate_bigfam_network(df, rank=0):
    """
    Build a networkx graph from bigscape df network
    """
    df[df['rank'] == rank]
    G = nx.from_pandas_edgelist(df, source='bgc_id', target='gcf_id', edge_attr=['membership_value',
           'rank'], edge_key='bigfam_edge_id')
    return G

In [None]:
def annotate_bigfam_models(G, df, columns = ['bgc_member', 'chemical_class_hits', 'top_chemical_class',
                                             'top_chemical_class_proportion', 'top_chemical_subclass',
                                             'top_chemical_subclass_proportion', 'taxonomic_hits', 
                                             'taxonomic_level', 'H-index', 'richness', 'top_taxa', 
                                             'top_taxa_proportion']):
    for bgc in G.nodes:
        if bgc in df.index:
            G.nodes[bgc]['node_type'] = "BiG-FAM GCFs"
            G.nodes[bgc]['text'] = f"GCF {bgc}<br>size: {df.loc[bgc, 'bgc_member']}<br>top_chemical_class: {df.loc[bgc, 'top_chemical_class']} ({df.loc[bgc, 'top_chemical_class_proportion']:.0%})\
<br>top_chemical_subclass: {df.loc[bgc, 'top_chemical_subclass']} ({df.loc[bgc, 'top_chemical_subclass_proportion']:.0%})\
<br>top_taxa: {df.loc[bgc, 'top_taxa']} ({df.loc[bgc, 'top_taxa_proportion']:.0%})"
            for c in columns:
                G.nodes[bgc][c] = df.loc[bgc, c]
    return G

In [None]:
def annotate_bigfam_antismash(G, antismash_region_path, columns = ["genome_id", "product", "contig_edge", "region_length", "most_similar_known_cluster_id", "most_similar_known_cluster_description", "most_similar_known_cluster_type", "similarity"]):
    df_antismash = pd.read_csv(antismash_region_path, index_col=0)
    df_antismash = df_antismash.loc[:, columns]
    for bgc in G.nodes:
        if bgc in df_antismash.index:
            G.nodes[bgc]['node_type'] = "BGC"
            G.nodes[bgc]['text'] = f"{bgc}<br>{df_antismash.loc[bgc, 'product']}<br>{df_antismash.loc[bgc, 'most_similar_known_cluster_description']}"
            for c in df_antismash.columns:
                G.nodes[bgc][c] = df_antismash.loc[bgc, c]
    return G

In [None]:
def create_edge_trace(G, name):
    edge_trace = go.Scatter(
        x=[],
        y=[],
        name=name,
        line=dict(width=0.5,color='#888'),
        hoverinfo='none',
        mode='lines')

    for edge in G.edges():
        x0, y0 = G.nodes[edge[0]]['pos']
        x1, y1 = G.nodes[edge[1]]['pos']
        edge_trace['x'] += tuple([x0, x1, None])
        edge_trace['y'] += tuple([y0, y1, None])
    return edge_trace

def create_node_trace(G, node_trace_category, color, showtextlabel=False, nodesize=10, nodeopacity=0.8, nodesymbol="circle", linewidth=1, linecolor="black", textposition="top center"):
    if showtextlabel:
        markermode = "markers+text"
    else:
        markermode = "markers"
    node_trace = go.Scatter(
            x=[],
            y=[],
            text=[],
            textposition=textposition,
            mode=markermode,
            hoverinfo='text',
            name=node_trace_category,
            marker=dict(
                symbol=nodesymbol,
                opacity=nodeopacity,
                showscale=False,
                color=color,
                size=nodesize,
                line=dict(width=linewidth, color=linecolor)))

    for node in G.nodes():
        if G.nodes[node]["node_trace"] == node_trace_category:
            x, y = G.nodes[node]['pos']
            node_trace['x'] += tuple([x])
            node_trace['y'] += tuple([y])
            node_trace['text'] +=tuple([G.nodes[node]['text']])
    return node_trace

## File configuration

In [None]:
with open("config.yaml", "r") as f:
    notebook_configuration = yaml.safe_load(f)
notebook_configuration

In [None]:
bgcflow_dir = Path(notebook_configuration["bgcflow_dir"])
project_name = "mq_saccharopolyspora"
report_dir = bgcflow_dir / f"data/processed/{project_name}"
FIGURE = "Figure_4"
MASH_FIGURE = "Figure_3"

query_dir = report_dir / "bigslice/query_as_6.1.1/"
df_bigfam_model = pd.read_csv(query_dir / "gcf_summary.csv")
df_bigfam_hits = pd.read_csv(report_dir / "bigslice/query_as_6.1.1/query_network.csv")
antismash_region_path = report_dir / "tables/df_regions_antismash_6.1.1.csv"
mash_path = f"assets/tables/{MASH_FIGURE}b_mash_hcluster.csv"

In [None]:
output = Path(f"assets/tables/{FIGURE}_bigfam_models.csv")

if output.is_file():
    df = pd.read_csv(output, index_col=0)
    pass
else:
    bigfam_db = bgcflow_dir / "resources/bigslice/full_run_result/result/data.db"
    conn = sqlite3.connect(bigfam_db)

    result = {}
    for i in df_bigfam_model.gcf_id:
        value = get_bigfam_summary(i, conn)
        result.update(value)

    df = pd.DataFrame.from_dict(result).T
    df.to_csv(output)

In [None]:
bigfam_network_path = report_dir / "bigslice/query_as_6.1.1/query_network.csv"

df_network = pd.read_csv(Path(bigfam_network_path))
df_cut = df[df.top_taxa_proportion <= 0.3]
#df_network = pd.read_csv(Path(bigfam_network_path))
#df_network = df_network[df_network.gcf_id.isin(df_cut.index)]

G = generate_bigfam_network(df_network)
G = annotate_bigfam_antismash(G, antismash_region_path)
G = annotate_bigfam_models(G, df)
G_raw = G.copy()

for n in df_cut.index:
    print(f"removing {n}")
    G.remove_node(n)

# position nodes
pos = nx.nx_agraph.graphviz_layout(G)
for n, p in pos.items():
    G.nodes[n]['pos'] = p
    
# Add phylogorup information
df_mash = pd.read_csv(mash_path, index_col=0)
for node in G.nodes:
    if 'genome_id' in G.nodes[node].keys():
        G.nodes[node]['phylogroup'] = df_mash.loc[G.nodes[node]['genome_id'], "phylogroup"]

In [None]:
# Calculate the number of GCFs with representation across different genera


df_genus_dist = pd.DataFrame(index=df.index)
for gcf_id in df.index:
    taxa = str(df.loc[gcf_id, "taxa_distribution"])
    taxa_dict = ast.literal_eval(taxa)
    for genus in taxa_dict.keys():
        df_genus_dist.loc[gcf_id, genus] = taxa_dict[genus]
df_genus_dist.fillna(0, inplace=True)

# Genera represented by most number of GCFs
df_genus_dist_binary = df_genus_dist > 0 
df_genus_dist_binary = df_genus_dist_binary * 1
df_genus_dist_binary.sum().sort_values(ascending=False)[:10]

In [None]:
#node_color = phylogroup
phylomap = {}
color = ["#264653", "#287271", "#2a9d8f", "#8ab17d", "#e9c46a", "#f4a261", "#ee8959", "#e76f51"]
for num, phylogroup in enumerate(df_mash.phylogroup.unique()):
    phylomap[phylogroup] = color[num]
phylomap["BiG-FAM GCFs"] = "blue"

In [None]:
color_category = 'phylogroup'
usecolormap = phylomap
node_trace_category = {}
for node in G.nodes:
    if G.nodes[node]['node_type'] == "BiG-FAM GCFs":
        nodeshape = "diamond"
        color = usecolormap["BiG-FAM GCFs"]
        cat = "BiG-FAM GCFs"
    else:            
        cat = G.nodes[node][color_category]
        color = usecolormap[cat]
        if G.nodes[node]['similarity'] > 0.8:
            cat = cat + " " + "Known (antiSMASH) > 80% similarity"
            nodeshape = "circle"
        elif G.nodes[node]['similarity'] > 0.4:
            cat = cat + " " + "Known (antiSMASH) > 40% similarity"
            nodeshape = "triangle-up"
        elif G.nodes[node]['similarity'] < 0.4:
            cat = cat + " " + "Unknown (antiSMASH) < 40% similarity"
            nodeshape = "triangle-down"
        else:
            cat = cat + " " + "Unknown"
            nodeshape = "star"
        
        linecolor = "black"
    
    G.nodes[node]['node_trace'] = cat
    node_trace_category[cat] = {"nodecolor" : color,
                                "nodeshape" : nodeshape,
                                "linecolor" : linecolor}

In [None]:
edge_trace = create_edge_trace(G, "bigslice_similarity")
traces = [edge_trace]
for cat in node_trace_category.keys():
    nodeopacity = 0.8
    color = node_trace_category[cat]["nodecolor"]
    nodeshape = node_trace_category[cat]["nodeshape"]
    linecolor = node_trace_category[cat]["linecolor"]
    node_trace = create_node_trace(G, cat, color, nodesymbol=nodeshape, nodeopacity=nodeopacity, nodesize=10, linewidth=1, linecolor=linecolor)
    traces.append(node_trace)
    
fig2 = go.Figure(data=traces,
                layout=go.Layout(
                    paper_bgcolor='rgba(0,0,0,0)',
                    plot_bgcolor='rgba(0,0,0,0)',
                    showlegend=True,
                    hovermode='closest',
                    margin=dict(b=20,l=5,r=5,t=40),
                    xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
                    yaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
                    width=1200, height=800))
fig2

In [None]:
bgc_hits = len([n for n in G_raw.nodes if G_raw.nodes[n]['node_type'] == 'BGC'])
GCF_hits = len([n for n in G_raw.nodes if G_raw.nodes[n]['node_type'] != 'BGC'])
top_taxa_saccharopolyspora = df[df.top_taxa_proportion == 1].top_taxa.value_counts().to_dict()['Saccharopolyspora']
shannon_non_zero = df[df['H-index'] != 0]
shannon_non_zero[shannon_non_zero['H-index'] > 2].shape
df.columns

In [None]:
#GCF_201888

shannon_min_non_zero_tax = ast.literal_eval(str(df.loc[shannon_non_zero['H-index'].astype(float).idxmin(), "taxa_distribution"]))
shannon_min_non_zero_tax = {k : v/sum(shannon_min_non_zero_tax.values()) for k,v in shannon_min_non_zero_tax.items()}
shannon_min_non_zero_tax_clean = [f"{k} ({v:.1%})" for k,v in sorted(shannon_min_non_zero_tax.items(), key=lambda x:x[1], reverse=True)]
df.loc[220008, 'top_taxa']
df.loc[220008, 'top_taxa_proportion'] * df.loc[220008, 'bgc_member']
def get_top_taxa(df, gcf):
    result = f"{df.loc[gcf, 'top_taxa']} ({df.loc[gcf, 'top_taxa_proportion'] * df.loc[gcf, 'bgc_member']:.0f} genomes)"
    return result

In [None]:
shannon_non_zero['H-index'].astype(float).idxmin()

The comparison of BGCs against BiG-FAM-DB can be used to investigate if detected BGCs are widely spread across other genomes from the public datasets. The BiG-SLICE-based query resulted in 389 BGCs having similarities against 130 GCF models from the BiGFAM database. From the 130 detected GCFs, there were 71 BiG-FAM GCFs that are specifically distributed in the genus Saccharopolyspora. Whereas several other GCFs included BGCs from different genera such as Streptomyces (47 genomes), Amycolatopsis (31 genomes), Kitasatospora (29 genomes), Nocardia (25 genomes), Pseudomonas_E (22 genomes), and many others. We further calculated the Shannon diversity index (H) for each of the GCFs representing the distribution of BGCs across different genera (Figure 4b). Of the 59 GCFs with positive Shannon index, 33 were highly distributed across many genera with a Shannon index of greater than 2. The GCF GCF_201888 with the lowest positive Shannon index (~0.3) contained 12,444 BGCs distributed across 43 genera with the majority belonging to Staphylococcus (~94.2%) followed by Acinetobacter (~4.2%). The known BGC in this BiG-FAM GCF coded for the biosynthesis of staphylobactin (also known as staphyloferrin B), which is a siderophore with a role in the virulence of Staphylococcus aureus (84, 85). The detailed comparative analysis of the predicted BGCs against the MIBIG entry showed that the Saccharopolypora genomes indeed possess a BGC that is very similar to the staphylobactin BGC from Staphylococcus aureus (Figure S11). Some of the other GCFs like GCF_215691  (Shannon index: 0.35), GCF_220008 (Shannon index: 0.41), and GCF_203048 (Shannon index: 0.52) were found predominantly in genera such as Psudeomonas_E (1241 genomes), Mycobacterium (227 genomes), and Streptomyces (114 genomes), respectively (Figure 4B). These examples particularly highlight the BGCs that are potentially transferred across different phylogenetic groups through horizontal gene transfer events.

In [None]:
text1 = f"The comparison of BGCs against BiG-FAM-DB can be used to investigate if detected BGCs are widely spread across other genomes from the public datasets. The BiG-SLICE-based query resulted in {bgc_hits} BGCs having similarities against {GCF_hits} GCF models from the BiGFAM database."
text2 = f"From the {GCF_hits} detected GCFs, there were {top_taxa_saccharopolyspora} BiG-FAM GCFs that are specifically distributed in the genus Saccharopolyspora. Whereas several other GCFs included BGCs from different genera such as Streptomyces (47 genomes), Amycolatopsis (31 genomes), Kitasatospora (29 genomes), Nocardia (25 genomes), Pseudomonas_E (22 genomes), and many others."
text3 = f"We further calculated the Shannon diversity index (H) for each of the GCFs representing the distribution of BGCs across different genera (Figure 4b). Of the {shannon_non_zero.shape[0]} GCFs with positive Shannon index, {shannon_non_zero[shannon_non_zero['H-index'] > 2].shape[0]} were highly distributed across many genera with a Shannon index of greater than 2. The GCF GCF_{shannon_non_zero['H-index'].idxmin()} with the lowest positive Shannon index (~{shannon_non_zero['H-index'].min():.1f}) contained {df.loc[shannon_non_zero['H-index'].idxmin(), 'bgc_member']} BGCs distributed across {len(shannon_min_non_zero_tax.keys())} genera with the majority belonging to {shannon_min_non_zero_tax_clean[0]}) followed by {shannon_min_non_zero_tax_clean[1]}."
text4 = f"The known BGC in this BiG-FAM GCF coded for the biosynthesis of staphylobactin (also known as staphyloferrin B), which is a siderophore with a role in the virulence of Staphylococcus aureus (84, 85). The detailed comparative analysis of the predicted BGCs against the MIBIG entry showed that the Saccharopolypora genomes indeed possess a BGC that is very similar to the staphylobactin BGC from Staphylococcus aureus (Figure S11)."
text5 = f"Some of the other GCFs like GCF_215691 (Shannon index: {df.loc[215691, 'H-index']:.2f}), GCF_220008 (Shannon index: {df.loc[220008, 'H-index']:.2f}), and GCF_203048 (Shannon index: {df.loc[203048, 'H-index']:.2f}) were found predominantly in genera such as {get_top_taxa(df, 215691)}, {get_top_taxa(df, 220008)}, and {get_top_taxa(df, 203048)}, respectively (Figure 4B). These examples particularly highlight the BGCs that are potentially transferred across different phylogenetic groups through horizontal gene transfer events."
final_text = " ".join([text1, text2, text3, text4, text5])

In [None]:
with open(f"assets/figures/{FIGURE}/{FIGURE}b_text.txt", "w") as f:
    f.write(final_text)

In [None]:
mapping = df_bigfam_hits.gcf_id.value_counts()
for gcf in df.index:
    df.loc[gcf, "dataset_hits"] = mapping[gcf]

In [None]:
domain = list(df[df["top_taxa_proportion"] > 0.5]["top_taxa"].value_counts().to_dict().keys())
domain.append("Other")
print(domain)
r = []
range_ = ["#264653", "#287271", "#2a9d8f", "#8ab17d", "#e9c46a", "#f4a261", "#ee8959", "#e76f51", "#edede9"]
for num, d in enumerate(domain):
    if num < len(range_):
        r.append(range_[num])
    else:
        r.append("white")
#range_ = ["#264653", "white", "white", "white", "white", "white", "white", "white", "white"]

In [None]:
source = df.copy()
source = source.reset_index().rename(columns={"index":"BiG-FAM_id"})
for i in source.index:
    if source.loc[i, "top_taxa_proportion"] <= 0.5:
        source.loc[i, "top_taxa"] = "Other"
        
chart_one = alt.Chart(source).mark_point().encode(
    alt.Y('H-index:Q',
          scale=alt.Scale(domain=(-0.5, 5)),
          axis=alt.Axis(title="Shannon Index (H')")
         ),
    alt.X('bgc_member:Q',
          scale=alt.Scale(type="log"),
          axis=alt.Axis(title="Member Size")
         ),
    alt.Size('dataset_hits',
             scale=alt.Scale(type='pow', domain=(1, 30)), 
             title="Number of hits in dataset"
            ),
    alt.Color("top_taxa:N", scale=alt.Scale(domain=domain, range=r), title="Top Genus (>=50%)"),
    tooltip=['BiG-FAM_id', 'bgc_member', 'chemical_class_hits', 'top_chemical_class', alt.Tooltip('top_chemical_class_proportion', format='.1%'), 
             'top_chemical_subclass', alt.Tooltip('top_chemical_subclass_proportion', format='.1%'),
             'taxonomic_level', 'richness', 'top_taxa', alt.Tooltip('top_taxa_proportion', format='.1%'), 
             alt.Tooltip('H-index:Q', format='.2f')],
).mark_point(
    filled=True,
    stroke='black',
    strokeWidth=0.5,
    opacity=0.8,
    size=1000
).configure_header(
    title=None,
    labels=False
).configure_axis(
    labelFontSize=10,
    titleFontSize=12
).configure_legend(
    labelFontSize=10,
    titleFontSize=12,
).configure_view(
    continuousHeight=250,
    continuousWidth=250,
).configure_legend(
  orient='right'
)
outfile = Path(f"assets/figures/{FIGURE}/{FIGURE}b.svg")
outfile_html = Path(f"assets/figures/{FIGURE}/{FIGURE}b.html")
outfile.parent.mkdir(parents=True, exist_ok=True)
chart_one.save(outfile)
chart_one.interactive().save(outfile_html)
chart_one.interactive()