In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import pathlib
import sys

import pandas as pd

import embcomp as ec

### Ozette vs Regular

In [None]:
data_dir = pathlib.Path.cwd() / ".." / "data" / "mair-2022-ismb"
name = "TISSUE_138_samples_FM96_OM138_035_CD45_live_fcs_110595"

raw = pd.read_parquet(data_dir / f"{name}_umap.parquet")
annotated = pd.read_parquet(data_dir / f"{name}_umap_annotated.parquet")

a = ec.widgets.Embedding.from_ozette(raw, robust_only=True)
b = ec.widgets.Embedding.from_ozette(annotated, robust_only=True)

In [None]:
w = ec.widgets.compare(a, b)
w

In [None]:
w.ilocs

## Mair 2022

In [None]:
w2 = ec.widgets.compare(
    a=ec.widgets.Embedding.from_ozette(
        df=pd.read_parquet(
            "../data/mair-2022/OZEXPSMPL_26134_embedding_output.parquet"
        ),
        robust_only=True,
    ),
    b=ec.widgets.Embedding.from_ozette(
        df=pd.read_parquet(
            "../data/mair-2022/OZEXPSMPL_26136_embedding_output.parquet"
        ),
        robust_only=True,
    ),
)

w2

In [None]:
w2.right.distances

## IFN-γ-independent immune markers of Mycobacterium tuberculosis exposure


In [None]:
import pathlib

import pandas as pd


def load_ics_example(subset: pd.DataFrame, data_dir="~/data/ICS_external_analysis"):
    data_dir = pathlib.Path(data_dir)
    for index, row in subset.iterrows():
        df = pd.read_parquet(data_dir / f"{row['oz_id']}_embedding_output.parquet")
        yield ec.widgets.Embedding.from_ozette(df, robust_only=True)


def select_subset(
    metadata_file="../data/ics/ics_experiment_metadata_table.parquet",
):
    df = pd.read_parquet(metadata_file)
    # df = df[df.arm_or_cohort_name == "LTBI"]
    options = [
        # 'TB Lysate',
        "Peptide Pool 1",
        # 'DMSO',
        # 'SEB',
        # 'Peptide Pool 2'
    ]
    df = df[df.Treatment.isin(options)]
    n = 1
    return (
        df.groupby(["Treatment", "arm_or_cohort_name"])
        .sample(n=n, random_state=123)
        .reset_index(drop=True)
    )


subset = select_subset()
subset

In [None]:
print(
    f"left: {subset.arm_or_cohort_name.loc[0]}, right: {subset.arm_or_cohort_name.loc[1]}"
)
w = ec.widgets.compare(*load_ics_example(subset))
w

In [None]:
import numpy as np

phenoa = "CD4+CD3+CD8-TNF+CD107a-IL4-IFNg+IL2+CD154+IL17a-"
phenob = "CD4+CD3+CD8-TNF-CD107a-IL4-IFNg-IL2+CD154+IL17a-"

# highlight a specific label
w.left.scatter.widget.selection = np.where(w.left.labels == phenoa)[0]