# Lui et al. 2021

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import pathlib
import sys
import pandas as pd
from cev.widgets import Embedding, EmbeddingComparisonWidget, compare

## Prepare Data and Split into two DataFrames

In [None]:
df_full = pd.read_parquet(
    pathlib.Path.cwd() / ".." / "data" / "lui-2021" / "data_for_confusion_full.parquet"
)
df = pd.read_parquet(
    pathlib.Path.cwd() / ".." / "data" / "lui-2021" / "data_for_confusion.parquet"
)

markers = [
    "CD3",
    "CD45RA",
    "CD8",
    "CLEC12A",
    "CD11b",
    "CD62L",
    "CD20",
    "HLA_DR",
    "CD11c",
    "CD14",
    "IgD",
    "CD4",
    "CD16",
    "CD45RO",
    "CD27",
    "CD19",
    "CD56",
]

df_ozette_umap_ozette_labels = pd.DataFrame(
    {
        "umapX": df.platform_UMAP_X.values,
        "umapY": df.platform_UMAP_Y.values,
        "faustLabels": df.faust_clustering.map(
            lambda s: "0_0_0_0_0" if s == "rare" else s
        )
        .str.replace("/", "")
        .values,
    }
)

df_ozette_umap_symphony_labels = pd.DataFrame(
    {
        "umapX": df.platform_UMAP_X.values,
        "umapY": df.platform_UMAP_Y.values,
        "faustLabels": df.liu_clustering.values,
    }
)

df_symphony_umap_ozette_labels = pd.DataFrame(
    {
        "umapX": df.symphony_UMAP_1.values,
        "umapY": df.symphony_UMAP_2.values,
        "faustLabels": df.faust_clustering.map(
            lambda s: "0_0_0_0_0" if s == "rare" else s
        )
        .str.replace("/", "")
        .values,
    }
)

df_symphony_umap_symphony_labels = pd.DataFrame(
    {
        "umapX": df.symphony_UMAP_1.values,
        "umapY": df.symphony_UMAP_2.values,
        "faustLabels": df.liu_clustering.values,
    }
)

marker_annotations = df_full.faust_clustering.str.lstrip("/").str.split(
    "/", expand=True
)
for column in marker_annotations:
    marker_annotations[column] = marker_annotations[column].str.slice(-1)

df_ozette_umap_ozette_labels[
    [f"{m}_faust_annotation" for m in markers]
] = marker_annotations
df_ozette_umap_symphony_labels[
    [f"{m}_faust_annotation" for m in markers]
] = marker_annotations
df_symphony_umap_ozette_labels[
    [f"{m}_faust_annotation" for m in markers]
] = marker_annotations
df_symphony_umap_symphony_labels[
    [f"{m}_faust_annotation" for m in markers]
] = marker_annotations

# Comparing the Ozette Against the Symphony Embedding using FAUST Labels

In [None]:
ozette_umap_ozette_labels_embedding = Embedding.from_ozette(
    df_ozette_umap_ozette_labels, robust_only=True
)
symphony_umap_ozette_labels_embedding = Embedding.from_ozette(
    df_symphony_umap_ozette_labels, robust_only=True
)

comparison_ozette_vs_symphony_umap_with_ozette_labels = EmbeddingComparisonWidget(
    ozette_umap_ozette_labels_embedding,
    symphony_umap_ozette_labels_embedding,
    titles=[
        "Ozette Embedding with FAUST Labels",
        "Symphony Embedding with FAUST Labels",
    ],
    metric="neighborhood",
    # active_markers=["CD3"],
    selection="synced",
    auto_zoom=True,
    row_height=400,
)
comparison_ozette_vs_symphony_umap_with_ozette_labels

In [None]:
comparison_ozette_vs_symphony_umap_with_ozette_labels.select(
    ["CD19-", "CD11b-", "CD3+", "CD4+", "CD8-", "CD45RA-", "CD45RO+", "CD62L+"]
)

In [None]:
comparison_ozette_vs_symphony_umap_with_ozette_labels.select(
    ["CD19-", "CD11b-", "CD3+", "CD4+", "CD8-", "CD45RA-", "CD45RO+", "CD62L-"]
)

In [None]:
comparison_ozette_vs_symphony_umap_with_ozette_labels.select(
    ["CD19-", "CD11b-", "CD3+", "CD4+", "CD8-", "CD45RA+", "CD45RO-", "CD62L+"]
)

In [None]:
comparison_ozette_vs_symphony_umap_with_ozette_labels.select(
    ["CD19-", "CD11b-", "CD3+", "CD4+", "CD8-", "CD45RA+", "CD45RO-", "CD62L-"]
)

# Comparing the Ozette Against the Symphony Embedding using Symphony Labels

In [None]:
ozette_umap_symphony_labels_embedding = Embedding(
    df_ozette_umap_symphony_labels[["umapX", "umapY"]].values,
    df_ozette_umap_symphony_labels.faustLabels,
)
symphony_umap_symphony_labels_embedding = Embedding(
    df_symphony_umap_symphony_labels[["umapX", "umapY"]].values,
    df_symphony_umap_symphony_labels.faustLabels,
)

comparison_ozette_vs_symphony_umap_with_symphony_labels = EmbeddingComparisonWidget(
    ozette_umap_symphony_labels_embedding,
    symphony_umap_symphony_labels_embedding,
    titles=[
        "Ozette Embedding with Symphony Labels",
        "Symphony Embedding with Symphony Labels",
    ],
    selection="synced",
    auto_zoom=True,
    row_height=400,
)

comparison_ozette_vs_symphony_umap_with_symphony_labels.left.categorical_scatter.legend(
    True
)
comparison_ozette_vs_symphony_umap_with_symphony_labels.right.categorical_scatter.legend(
    True
)

comparison_ozette_vs_symphony_umap_with_symphony_labels