# Visualize models performance 

In [None]:
import numpy as np
import pandas as pd
import anndata as adata

import random
import itertools

from tqdm import tqdm
from pathlib import Path
from typing import Tuple, List, Dict

import plotly.graph_objects as go
import plotly as plotly
import plotly.express as px
import plotly.figure_factory as ff
from plotly.subplots import make_subplots

from sklearn.metrics import mean_squared_error
from sklearn.metrics.pairwise import cosine_similarity as skl_cosine

from scipy.stats import pearsonr
from scipy.spatial.distance import braycurtis
from math import sqrt
from sklearn.metrics import confusion_matrix

%load_ext blackcellmagic

## Set up paths and load setting files

#### Path to subset/minor/major experiments

In [None]:
# Subset
subset_prefix = (
    "???/deconvolution_benchmarking/03_immune_lineages_experiment/subset_level"
)

# Minor
minor_prefix = (
    "???/deconvolution_benchmarking/03_immune_lineages_experiment/minor_level"
)

# Major
major_prefix = "???/deconvolution_benchmarking/01_purity_levels_experiment/include_normal_epithelial"

# Prefix to visualizations folder
viz_prefix = "???/deconvolution_benchmarking/visualizations"

#### List major/minor/subset cell types

In [None]:
subset_c_types = [
    "Endothelial",
    "CAFs",
    "PVL",
    "B cells Memory",
    "B cells Naive",
    "T_cells_c4_CD8+_ZFP36",
    "T_cells_c6_IFIT1",
    "T_cells_c7_CD8+_IFNG",
    "T_cells_c8_CD8+_LAG3",
    "T_cells_c0_CD4+_CCR7",
    "T_cells_c1_CD4+_IL7R",
    "T_cells_c2_CD4+_T-regs_FOXP3",
    "T_cells_c3_CD4+_Tfh_CXCL13",
    "T_cells_c9_NK_cells_AREG",
    "T_cells_c11_MKI67",
    "T_cells_c10_NKT_cells_FCGR3A",
    "Myeloid_c10_Macrophage_1_EGR1",
    "Myeloid_c12_Monocyte_1_IL1B",
    "Myeloid_c2_LAM2_APOE",
    "Myeloid_c1_LAM1_FABP5",
    "Cycling_Myeloid",
    "Myeloid_c4_DCs_pDC_IRF7",
    "Normal Epithelial",
    "Plasmablasts",
    "Myeloid_c8_Monocyte_2_S100A9",
    "Myeloid_c9_Macrophage_2_CXCL10",
    "Myeloid_c11_cDC2_CD1C",
    "Cancer Epithelial",
    "Myeloid_c3_cDC1_CLEC9A",
]
minor_c_types = [
    "Cancer Epithelial",
    "T cells CD4+",
    "T cells CD8+",
    "Endothelial",
    "CAFs",
    "Macrophage",
    "PVL",
    "Normal Epithelial",
    "Plasmablasts",
    "B cells Memory",
    "Monocyte",
    "Cycling_Myeloid",
    "Cycling T-cells",
    "NK cells",
    "NKT cells",
    "DCs",
    "B cells Naive",
]
major_c_types = [
    "B-cells",
    "CAFs",
    "Cancer Epithelial",
    "Endothelial",
    "Myeloid",
    "Normal Epithelial",
    "PVL",
    "Plasmablasts",
    "T-cells",
]

#### Load groundtruth

In [None]:
# We only use tumour purity = 50%
pur_lvl = 0.5

# Methods order are universal across figures
methods_order = [
    "BayesPrism",
    "Scaden",
    "MuSiC",
    "hspe",
    "DWLS",
    "CBX",
    "Bisque",
    "EPIC",
    "CPM",
]

In [None]:
# Subset
subset_truth_df = pd.read_csv(
    Path(subset_prefix).joinpath("data/results/truth.csv"), sep="\t", index_col=0
)
subset_truth_df = subset_truth_df[subset_c_types]

# Minor
minor_truth_df = pd.read_csv(
    Path(minor_prefix).joinpath("data/results/truth.csv"), sep="\t", index_col=0
)
minor_truth_df = minor_truth_df[minor_c_types]

# Major
major_truth_df = pd.read_csv(
    Path(major_prefix).joinpath("data/results/truth.csv"), sep="\t", index_col=0
)
major_truth_df = major_truth_df[major_c_types]

#### Utilities

#### Extract lineages metadata

In [None]:
# Load lineage mapping of all cell types in the data
lineages_df = pd.read_csv(
    Path(prefix).joinpath("data/Whole_miniatlas_immune_lineages.tsv"),
    sep="\t",
)

# Replace columns
lineages_df.rename(
    columns={
        "celltype_major": "Major Cell Type",
        "celltype_major_short": "Annotated Major Cell Type",
        "celltype_minor": "Minor Cell Type",
        "celltype_minor_short": "Annotated Minor Cell Type",
        "celltype_subset": "Subset Cell Type",
        "celltype_subset_short": "Annotated Subset Cell Type",
        "marker_genes": "Marker Genes",
        "counts": "Cell Counts",
        "num_of_patients": "Patient Counts",
    },
    inplace=True,
)

In [None]:
# We removed Myeloid_c7_Monocyte_3_FCGR3A, exclude it from the lineages DataFrame
lineages_df = lineages_df[
    lineages_df["Subset Cell Type"] != "Myeloid_c7_Monocyte_3_FCGR3A"
]

# We also removed 3 cell types that MuSiC dropped
lineages_df = lineages_df[
    ~lineages_df["Subset Cell Type"].isin(
        [
            "Myeloid_c0_DC_LAMP3",
            "Myeloid_c5_Macrophage_3_SIGLEC1",
            "T_cells_c5_CD8+_GZMK",
        ]
    )
]

#### Extract colour pallete

In [None]:
# Load lineage mapping of all cell types in the data
colour_pallete_df = pd.read_csv(
    Path(prefix).joinpath("data/Whole_miniatlas_colour_pallete.csv"), sep="\t"
)

# Convert to dictionary
colour_pallete_d = {
    row["all_celltype"]: {"fill": row["fill"], "line": row["line"]}
    for i, row in colour_pallete_df.iterrows()
}

## [Fig] Scatter plot of immune cell types for scaden and dwls

In [None]:
# Create an empty list to collect all coordinates for source data
all_coors_l = []

In [None]:
def plot_scatter_with_facets_by_patient(
    preds_truth_df: pd.DataFrame,
    facet_col: str,
    facet_row: str,
    color: str,
    color_map: dict,
    file_name: str,
    category_orders: Dict = {},
    plot_title: str = "",
    legend_title: str = "Cell Type",
    plot_png: bool = True,
    png_width: int = 400,
    png_height: int = 400,
    facet_row_spacing: int = 0.04,
    facet_col_spacing: int = 0.04,
) -> None:
    """Plot predictions vs groundtruth and aggregate results by methods (rows) and cell types (columns)

    Args:
        -
    """
    fig = px.scatter(
        preds_truth_df,
        x="truth",
        log_x=True,
        y="preds",
        log_y=True,
        facet_row=facet_row,
        facet_row_spacing=facet_row_spacing,
        facet_col=facet_col,
        facet_col_spacing=facet_col_spacing,
        trendline_options=dict(log_x=True, log_y=True),
        color=color,
        color_discrete_map=color_map,
        category_orders=category_orders,
    )

    fig.update_traces(marker=dict(size=2), opacity=0.5)

    # For facet plots with Plotly, a lot of the generic formatting in fig["layout"].update() does work for axes
    # Update them manually using update_traces(), update_xaxes() and update_yaxes() instead
    fig.update_yaxes(
        linecolor="black",
        linewidth=1,
        ticks="outside",
        tickmode="array",
        tickvals=[0.01, 0.1, 1, 10, 100],
        tickfont_size=7,
        matches=None,
    )
    fig.update_xaxes(
        linecolor="black",
        linewidth=1,
        ticks="outside",
        tickmode="array",
        tickvals=[0.01, 0.1, 1, 10, 100],
        tickfont_size=7,
        matches=None,
    )

    # Update titles of x- and y-axis
    fig.update_xaxes(
        title="Actual proportion (%)",
        row=1,
        title_font_size=8,
        title_standoff=2,
    )
    fig.update_yaxes(
        title="Predicted proportion (%)",
        col=1,
        title_font_size=8,
        title_standoff=2,
    )

    fig["layout"].update(
        font=dict(size=8, color="black"),
        plot_bgcolor="rgba(0,0,0,0)",
        showlegend=False,
        newshape=dict(opacity=1),
        margin=dict(t=0, l=0, r=0, b=0),
    )

    # Add 45-degree line to all subplots
    fig.add_shape(
        dict(type="line", y0=0.01, y1=99, x0=0.01, x1=99),
        row="all",
        col="all",
        line=dict(color="darkgray", dash="dot", width=1),
    )

    # For this plot, we remote annotations and annotate manually
    fig.for_each_annotation(lambda a: a.update(text=a.text.split("=")[-1]))
    for c_type in preds_truth_df[facet_col].unique():
        fig.update_annotations(selector={"text": c_type}, y=0.985)

    # Save image
    fig.write_image(
        Path(f"./figures/supp_figures/supp_fig_13_{file_name}").with_suffix(".svg"),
        width=png_width,
        height=png_height,
        scale=3,
    )

In [None]:
# Specify methods
methods = ["bprism_v2", "dwls"]
patient_color_map = {
    "CID3921": "#17BECF",
    "CID4066": "#BCBD22",
    "CID4067": "#7F7F7F",
    "CID4290A": "#E377C2",
    "CID4471": "#8C564B",
    "CID44971": "#9467BD",
    "CID4515": "#D62728",
    "CID4523": "#2CA02C",
}

#### Major cell types: B-cells, T-Cells, Myeloid

In [None]:
major_preds_truth_l = []

for method in tqdm(methods):
    # Read predictions and filter predictions and groundtruth of 50% tumour
    res_df = pd.read_csv(
        Path(major_prefix).joinpath(f"data/results/{method}.csv"),
        sep="\t",
        index_col=0,
    )
    tmp_truth_df = major_truth_df[
        major_truth_df["Cancer Epithelial"] == pur_lvl
    ].sort_index()
    tmp_res_df = res_df[res_df.index.isin(tmp_truth_df.index)].sort_index()

    # Replace values under 0.01% with 0.01%
    tmp_res_df[tmp_res_df < 0.0001] = 0.0001
    tmp_truth_df[tmp_truth_df < 0.0001] = 0.0001

    # Iterate over cell types
    for c_type in tmp_truth_df.columns.tolist():
        ctype_truth_df = tmp_truth_df[c_type]
        ctype_preds_df = tmp_res_df[c_type]

        # Concatenate predictions and groundtruth into a DataFrame
        preds_truth_df = pd.concat(
            [ctype_truth_df.sort_index(), ctype_preds_df.sort_index()], axis=1
        )
        preds_truth_df.columns = ["truth", "preds"]
        preds_truth_df = (preds_truth_df * 100).round(2)
        preds_truth_df["Patient"] = [i.split("_")[0] for i in preds_truth_df.index]
        preds_truth_df["Cell Type"] = c_type
        preds_truth_df["Method"] = method

        major_preds_truth_l.append(preds_truth_df)

major_preds_truth_df = pd.concat(major_preds_truth_l, axis=0)

# Merge with colour pallete to get colour codes
major_preds_truth_df = major_preds_truth_df.merge(
    colour_pallete_df.rename(columns={"all_celltype": "Cell Type"}),
    on=["Cell Type"],
    how="inner",
)

# Rename methods
major_preds_truth_df.replace(
    {
        "scaden": "Scaden",
        "music": "MuSiC",
        "cbx": "CBX",
        "dwls": "DWLS",
        "epic": "EPIC",
        "cpm": "CPM",
        "bisque": "Bisque",
        "bprism_v2": "BayesPrism",
    },
    inplace=True,
)

In [None]:
# Get T-cells, B-cells, Myeloid
# Also get Cancer Epithelial and Normal Epithelial so we have 5 cell types
major_preds_truth_df = major_preds_truth_df[
    major_preds_truth_df["Cell Type"].isin(["T-cells", "B-cells", "Myeloid"])
]

In [None]:
# Collate RMSE and Pearson's across metrics and method
stats_l = []

for (tmp_c_type, tmp_method), tmp_df in major_preds_truth_df.groupby(
    ["Cell Type", "Method"]
):
    rmse = sqrt(
        mean_squared_error(
            tmp_df["truth"],
            tmp_df["preds"],
        )
    )

    r = pearsonr(
        tmp_df["truth"],
        tmp_df["preds"],
    )[0]

    # Append to stats_l
    stats_l.append([tmp_method, tmp_c_type, rmse, r])

stats_df = pd.DataFrame(
    data=stats_l, columns=["Method", "Cell type", "RMSE", "Pearson's"]
)
stats_df.sort_values(["Method", "Cell type"])

In [None]:
# Plot scatter with facets by patient beautifully
plot_scatter_with_facets_by_patient(
    preds_truth_df=major_preds_truth_df,
    facet_col="Cell Type",
    facet_row="Method",
    color="Patient",
    color_map=patient_color_map,
    file_name=f"major_{'_'.join(i for i in methods)}",
    plot_png=True,
    png_width=375,
    png_height=250,
    category_orders={"Cell Type": ["T-cells", "B-cells", "Myeloid"]},
)

In [None]:
# Append to all_coors_l to create source data later
major_coors_df = major_preds_truth_df.rename(
    columns={
        "truth": "Actual proportion",
        "preds": "Predicted proportions",
        "lineage": "Lineage level",
    }
).drop(["fill", "line"], axis=1)
major_coors_df["Subplot"] = "a"
all_coors_l.append(major_coors_df)

#### Minor cell types

In [None]:
minor_attrs = {
    "T-cells": {"w": 637.5, "h": 250},
    "B-cells": {"w": 250, "h": 250},
    "Myeloid": {"w": 500, "h": 250},
}

In [None]:
minor_preds_truth_l = []

for method in tqdm(methods):
    # Read predictions and filter predictions and groundtruth of 50% tumour
    res_df = pd.read_csv(
        Path(minor_prefix).joinpath(f"data/results/{method}.csv"), sep="\t", index_col=0
    )
    tmp_truth_df = minor_truth_df[
        minor_truth_df["Cancer Epithelial"] == pur_lvl
    ].sort_index()
    tmp_res_df = res_df[res_df.index.isin(tmp_truth_df.index)].sort_index()

    # Replace values under 0.01% with 0.01%
    tmp_res_df[tmp_res_df < 0.0001] = 0.0001
    tmp_truth_df[tmp_truth_df < 0.0001] = 0.0001

    # Iterate over cell types
    for c_type in tmp_truth_df.columns.tolist():
        ctype_truth_df = tmp_truth_df[c_type]
        ctype_preds_df = tmp_res_df[c_type]

        # Concatenate predictions and groundtruth into a DataFrame
        preds_truth_df = pd.concat(
            [ctype_truth_df.sort_index(), ctype_preds_df.sort_index()], axis=1
        )
        preds_truth_df.columns = ["truth", "preds"]
        preds_truth_df = (preds_truth_df * 100).round(2)
        preds_truth_df["Patient"] = [i.split("_")[0] for i in preds_truth_df.index]
        preds_truth_df["Cell Type"] = c_type
        preds_truth_df["Method"] = method

        minor_preds_truth_l.append(preds_truth_df)

minor_preds_truth_df = pd.concat(minor_preds_truth_l, axis=0)

# Rename methods
minor_preds_truth_df.replace({"dwls": "DWLS", "bprism_v2": "BayesPrism"}, inplace=True)

In [None]:
# Collate RMSE and Pearson's across metrics and method
stats_l = []

for minor_c_type in ["T-cells", "B-cells", "Myeloid"]:
    # Plot T-cell subtypes
    minor_immune_ctypes = lineages_df[lineages_df["Major Cell Type"] == minor_c_type][
        "Minor Cell Type"
    ].unique()

    ctype_minor_preds_truth_df = minor_preds_truth_df[
        minor_preds_truth_df["Cell Type"].isin(minor_immune_ctypes)
    ]

    for (tmp_c_type, tmp_method), tmp_df in ctype_minor_preds_truth_df.groupby(
        ["Cell Type", "Method"]
    ):
        rmse = sqrt(
            mean_squared_error(
                tmp_df["truth"],
                tmp_df["preds"],
            )
        )

        r = pearsonr(
            tmp_df["truth"],
            tmp_df["preds"],
        )[0]

        # Append to stats_l
        stats_l.append([minor_c_type, tmp_method, tmp_c_type, rmse, r])

    stats_df = pd.DataFrame(
        data=stats_l, columns=["Minor", "Method", "Cell type", "RMSE", "Pearson's"]
    )

stats_df.sort_values(["Minor", "Method", "Cell type"])

In [None]:
for minor_c_type in ["T-cells", "B-cells", "Myeloid"]:
    subplot_mappings = {"T-cells": "b", "B-cells": "c", "Myeloid": "d"}

    # Plot T-cell subtypes
    minor_immune_ctypes = lineages_df[lineages_df["Major Cell Type"] == minor_c_type][
        "Minor Cell Type"
    ].unique()

    ctype_minor_preds_truth_df = minor_preds_truth_df[
        minor_preds_truth_df["Cell Type"].isin(minor_immune_ctypes)
    ]

    if minor_c_type == "Myeloid":
        # Take care of Cycling Myeloid
        ctype_minor_preds_truth_df.replace(
            {"Cycling_Myeloid": "Cycling Myeloid"}, inplace=True
        )

    # Get color sequence
    color_sequence = colour_pallete_df[
        (colour_pallete_df["lineage"] == "minor")
        & (colour_pallete_df["all_celltype"].isin(minor_immune_ctypes))
    ]["fill"].tolist()

    # Merge with colour pallete to get colour codes
    ctype_minor_preds_truth_df = ctype_minor_preds_truth_df.merge(
        colour_pallete_df.rename(columns={"all_celltype": "Cell Type"}),
        on=["Cell Type"],
        how="inner",
    )

    # Plot scatter with facets by patients beautifully
    plot_scatter_with_facets_by_patient(
        preds_truth_df=ctype_minor_preds_truth_df,
        facet_col="Cell Type",
        facet_row="Method",
        color="Patient",
        color_map=patient_color_map,
        file_name=f"immune_minor_{minor_c_type}_{'_'.join(i for i in methods)}",
        plot_png=True,
        png_width=minor_attrs[minor_c_type]["w"],
        png_height=minor_attrs[minor_c_type]["h"],
        facet_row_spacing=0.04,
        facet_col_spacing=0.04,
    )

    # Append to all_coors_l to create source data later
    minor_coors_df = ctype_minor_preds_truth_df.rename(
        columns={
            "truth": "Actual proportion",
            "preds": "Predicted proportions",
            "lineage": "Lineage level",
        }
    ).drop(["fill", "line"], axis=1)
    minor_coors_df["Subplot"] = subplot_mappings[minor_c_type]
    all_coors_l.append(minor_coors_df)

#### Subset cell types

In [None]:
subset_attrs = {
    "T cells CD4+": {"w": 500, "h": 250},
    "T cells CD8+": {"w": 500, "h": 250},
    "Macrophage": {"w": 500, "h": 250},
    "Monocyte": {"w": 250, "h": 250},
    "DCs": {"w": 375, "h": 250},
}

In [None]:
subset_preds_truth_l = []

for method in tqdm(methods):
    # Read predictions and filter predictions and groundtruth of 50% tumour
    res_df = pd.read_csv(
        Path(subset_prefix).joinpath(f"data/results/{method}.csv"),
        sep="\t",
        index_col=0,
    )
    tmp_truth_df = subset_truth_df[
        subset_truth_df["Cancer Epithelial"] == pur_lvl
    ].sort_index()
    tmp_res_df = res_df[res_df.index.isin(tmp_truth_df.index)].sort_index()

    # Replace values under 0.01% with 0.01%
    tmp_res_df[tmp_res_df < 0.0001] = 0.0001
    tmp_truth_df[tmp_truth_df < 0.0001] = 0.0001

    # Iterate over cell types
    for c_type in tmp_truth_df.columns.tolist():
        ctype_truth_df = tmp_truth_df[c_type]
        ctype_preds_df = tmp_res_df[c_type]

        # Concatenate predictions and groundtruth into a DataFrame
        preds_truth_df = pd.concat(
            [ctype_truth_df.sort_index(), ctype_preds_df.sort_index()], axis=1
        )
        preds_truth_df.columns = ["truth", "preds"]
        preds_truth_df = (preds_truth_df * 100).round(2)
        preds_truth_df["Patient"] = [i.split("_")[0] for i in preds_truth_df.index]
        preds_truth_df["Cell Type"] = c_type
        preds_truth_df["Method"] = method

        subset_preds_truth_l.append(preds_truth_df)

subset_preds_truth_df = pd.concat(subset_preds_truth_l, axis=0)

# Rename methods
subset_preds_truth_df.replace({"dwls": "DWLS", "bprism_v2": "BayesPrism"}, inplace=True)

In [None]:
# Collate RMSE and Pearson's across metrics and method
stats_l = []

for subset_c_type in ["T cells CD4+", "T cells CD8+", "Macrophage", "Monocyte", "DCs"]:
    subset_immune_ctypes = lineages_df[lineages_df["Minor Cell Type"] == subset_c_type][
        "Subset Cell Type"
    ].unique()

    ctype_subset_preds_truth_df = subset_preds_truth_df[
        subset_preds_truth_df["Cell Type"].isin(subset_immune_ctypes.tolist())
    ]

    # Get pretty subset annotations
    ctype_subset_preds_truth_df.rename(
        columns={"Cell Type": "Subset Cell Type"}, inplace=True
    )
    ctype_subset_preds_truth_df = ctype_subset_preds_truth_df.merge(
        lineages_df[["Subset Cell Type", "Annotated Subset Cell Type"]],
        how="inner",
        on="Subset Cell Type",
    )

    for (tmp_c_type, tmp_method), tmp_df in ctype_subset_preds_truth_df.groupby(
        ["Subset Cell Type", "Method"]
    ):
        rmse = sqrt(
            mean_squared_error(
                tmp_df["truth"],
                tmp_df["preds"],
            )
        )

        r = pearsonr(
            tmp_df["truth"],
            tmp_df["preds"],
        )[0]

        # Append to stats_l
        stats_l.append(
            [
                subset_c_type,
                tmp_method,
                tmp_df["Annotated Subset Cell Type"].unique()[0],
                rmse,
                r,
            ]
        )

    stats_df = pd.DataFrame(
        data=stats_l, columns=["Subset", "Method", "Cell type", "RMSE", "Pearson's"]
    )

stats_df.sort_values(["Method", "Subset", "Cell type"])

In [None]:
for subset_c_type in ["T cells CD4+", "T cells CD8+", "Macrophage", "Monocyte", "DCs"]:
    subplot_mappings = {
        "T cells CD4+": "e",
        "T cells CD8+": "f",
        "Macrophage": "g",
        "Monocyte": "h",
        "DCs": "i",
    }

    subset_immune_ctypes = lineages_df[lineages_df["Minor Cell Type"] == subset_c_type][
        "Subset Cell Type"
    ].unique()

    ctype_subset_preds_truth_df = subset_preds_truth_df[
        subset_preds_truth_df["Cell Type"].isin(subset_immune_ctypes.tolist())
    ]

    # Get pretty subset annotations
    ctype_subset_preds_truth_df.rename(
        columns={"Cell Type": "Subset Cell Type"}, inplace=True
    )
    ctype_subset_preds_truth_df = ctype_subset_preds_truth_df.merge(
        lineages_df[["Subset Cell Type", "Annotated Subset Cell Type"]],
        how="inner",
        on="Subset Cell Type",
    )

    # Merge with colour pallete to get colour codes
    ctype_subset_preds_truth_df = ctype_subset_preds_truth_df.merge(
        colour_pallete_df.rename(
            columns={"all_celltype": "Annotated Subset Cell Type"}
        ),
        on=["Annotated Subset Cell Type"],
        how="inner",
    )

    plot_scatter_with_facets_by_patient(
        preds_truth_df=ctype_subset_preds_truth_df,
        facet_row="Method",
        facet_col="Annotated Subset Cell Type",
        color="Patient",
        color_map=patient_color_map,
        file_name=f"immune_subset_{subset_c_type}_{'_'.join(i for i in methods)}",
        plot_png=True,
        png_width=subset_attrs[subset_c_type]["w"],
        png_height=subset_attrs[subset_c_type]["h"],
    )

    # Append to all_coors_l to create source data later
    subset_coors_df = ctype_subset_preds_truth_df.rename(
        columns={
            "truth": "Actual proportion",
            "preds": "Predicted proportions",
            "lineage": "Lineage level",
            "Annotated Subset Cell Type": "Cell Type",
        }
    ).drop(["fill", "line", "Subset Cell Type"], axis=1)
    subset_coors_df["Subplot"] = subplot_mappings[subset_c_type]
    all_coors_l.append(subset_coors_df)

#### Save source data

In [None]:
# Concatenate coordinates across lineage levels
all_coors_df = pd.concat(all_coors_l, axis=0)

# Save into tsv
all_coors_df.to_csv(
    Path(viz_prefix).joinpath("source_data/supp_figure_13.tsv"), sep="\t"
)