## seaborn

### 6-16

In [None]:
import os
import numpy as np
import anndata as ad
import pandas as pd

In [None]:
import sys
import os

# Define the path to the main folder
main_folder = "/Users/apple/Desktop/KB/SCSeq_LineageBarcoding2/SCSeq_LineageBarcoding/SCLineage_ConstrativeLearning/main_semi_test"

# Add it to sys.path
sys.path.append(main_folder)

from LCL_eval_final import LCL_Eval
from collections import Counter

In [None]:
# ───────────────────────────────────────────────────────────────────────────────
# Paths & constants
# ───────────────────────────────────────────────────────────────────────────────
INPUT_DIR  = "/Users/apple/Desktop/KB/data/feat_LCL_2025/Larry_top200/grid_search_testAsPenalty_1"
TRAIN_ANND = "/Users/apple/Desktop/KB/data/LarryData/train_test/Larry_200_train.h5ad"
TEST_ANND  = "/Users/apple/Desktop/KB/data/LarryData/train_test/Larry_200_test.h5ad"
num_knn = 30
# load once
adata_train = ad.read_h5ad(TRAIN_ANND)
adata_test  = ad.read_h5ad(TEST_ANND)

# flatten labels
train_labels = adata_train.obs["clone_id"].to_numpy()
test_labels  = adata_test.obs["clone_id"].to_numpy()

records = []

# ───────────────────────────────────────────────────────────────────────────────
# Loop over each hyperparam folder
# ───────────────────────────────────────────────────────────────────────────────
for exp_name in sorted(os.listdir(INPUT_DIR)):
    exp_path = os.path.join(INPUT_DIR, exp_name)
    if not os.path.isdir(exp_path): 
        continue

    # find embeddings
    files = os.listdir(exp_path)
    tr_file = next(f for f in files if f.startswith("scBaseEncoderFeat_Z") and f.endswith(".npy"))
    te_file = next(f for f in files if "test_embedding" in f or f.startswith("scBaseEncoderFeat_test"))

    train_emb = np.load(os.path.join(exp_path, tr_file))
    test_emb  = np.load(os.path.join(exp_path, te_file))

    # build combined AnnData
    tr = adata_train.copy()
    te = adata_test.copy()
    tr.obsm["LCL_embedding"] = train_emb
    te.obsm["LCL_embedding"] = test_emb
    tr.obs["dataset"] = "train"
    te.obs["dataset"] = "test"
    adata_all = ad.concat([tr, te], axis=0, join="outer")

    # instantiate helper
    plotter = LCL_Eval(
        adata       = adata_all,
        clone_key   = "clone_id",
        dataset_key = "dataset",
        num_top     = 5,
        palette     = None,
        umap_kwargs = {"random_state": 42}
    )

    # compute KNN stats
    stats = plotter.evaluate_adjusted_knn(
        train_embeddings = train_emb,
        train_labels     = train_labels,
        test_embeddings  = test_emb,
        test_labels      = test_labels,
        k                = num_knn
    )

    # parse λ, unlab, bs
    lam, unlab_s, bs_s = exp_name.split("_")
    lam   = float(lam.replace("lambda",""))
    unlab = int(unlab_s.replace("unlab",""))
    bs    = int(bs_s.replace("bs",""))

    # unpack stats
    to_4 = lambda x: round(x, 4)
    tr_acc = stats["train"]["overall_accuracy"]
    tr_rank= stats["train"]["overall_avg_rank"]
    tr_qs  = stats["train"]["rank_quantiles"]
    te_acc = stats["test"]["overall_accuracy"]
    te_rank= stats["test"]["overall_avg_rank"]
    te_qs  = stats["test"]["rank_quantiles"]

    # plot #1: top‐5 clones
    fig1, ax1 = plotter.plot_top_clones_umap(
        figsize  = (7,7),
        title    = f"Top 5 Clones // λ={lam}, unlab={unlab}, bs={bs}",
        savepath = os.path.join(exp_path, f"umap_top5clones_lambda{lam}_unlab{unlab}_bs{bs}_k{num_knn}.png")
    )

    # plot #2: test‐cell accuracy
    fig2, ax2 = plotter.plot_test_accuracy_umap(
        train_embeddings = train_emb,
        train_labels     = train_labels,
        test_embeddings  = test_emb,
        test_labels      = test_labels,
        k                = num_knn,
        figsize          = (7,7),
        title            = f"Test Accuracy // λ={lam}, unlab={unlab}, bs={bs}, k={num_knn}",
        savepath         = os.path.join(exp_path, f"umap_testAccuracy_lambda{lam}_unlab{unlab}_bs{bs}_k{num_knn}.png")
    )

    # record exactly six stats + hyperparams
    records.append({
        "lambda":            lam,
        "unlabeled_per_batch":unlab,
        "batch_size":        bs,
        "train_overall_acc": to_4(tr_acc),
        "train_overall_rank":to_4(tr_rank),
        "train_q25":         to_4(tr_qs["q25"]),
        "train_q50":         to_4(tr_qs["q50"]),
        "train_q75":         to_4(tr_qs["q75"]),
        "test_overall_acc":  to_4(te_acc),
        "test_overall_rank": to_4(te_rank),
        "test_q25":          to_4(te_qs["q25"]),
        "test_q50":          to_4(te_qs["q50"]),
        "test_q75":          to_4(te_qs["q75"]),
    })

# build summary DataFrame
df = pd.DataFrame(records)
print(df)

# optionally save
out_csv = os.path.join(INPUT_DIR, f"grid_search_summary_k{num_knn}.csv")
df.to_csv(out_csv, index=False)
print(f">>> Saved summary to {out_csv}")

### 7-7

In [1]:
import os
import numpy as np
import anndata as ad
import pandas as pd

In [2]:
import sys
import os

# Define the path to the main folder
main_folder = "/Users/apple/Desktop/KB/SCSeq_LineageBarcoding2/SCSeq_LineageBarcoding/SCLineage_ConstrativeLearning/main_semi_test"

# Add it to sys.path
sys.path.append(main_folder)

from LCL_eval_final_final_final import LCL_Eval
from collections import Counter

In [None]:

import scanpy as sc

# ───────────────────────────────────────────────────────────────────────────────
# Paths & constants
# ───────────────────────────────────────────────────────────────────────────────
INPUT_DIR     = "/Users/apple/Desktop/KB/data/feat_LCL_2025/Larry_top200/grid_search_testAsPenalty_1"
TRAIN_ANND    = "/Users/apple/Desktop/KB/data/LarryData/train_test/Larry_200_train.h5ad"
TEST_ANND     = "/Users/apple/Desktop/KB/data/LarryData/train_test/Larry_200_test.h5ad"
NUM_NEIGHBORS = 30

# load the two AnnDatas once
adata_train = sc.read_h5ad(TRAIN_ANND)
adata_test  = sc.read_h5ad(TEST_ANND)

train_labels = adata_train.obs["clone_id"].to_numpy()
test_labels  = adata_test.obs["clone_id"].to_numpy()

records = []

for exp_name in sorted(os.listdir(INPUT_DIR)):
    exp_path = os.path.join(INPUT_DIR, exp_name)
    if not os.path.isdir(exp_path):
        continue

    # locate embeddings
    files   = os.listdir(exp_path)
    tr_file = next(f for f in files if f.startswith("scBaseEncoderFeat_Z") and f.endswith(".npy"))
    te_file = next(f for f in files if "test_embedding" in f)

    train_emb = np.load(os.path.join(exp_path, tr_file))
    test_emb  = np.load(os.path.join(exp_path, te_file))

    # stitch into one AnnData
    tr = adata_train.copy()
    te = adata_test.copy()
    tr.obsm["LCL_embedding"] = train_emb
    te.obsm["LCL_embedding"]  = test_emb
    tr.obs["dataset"] = "train"
    te.obs["dataset"] = "test"
    adata_all = sc.concat([tr, te], axis=0, join="outer")

    # instantiate evaluator
    plotter = LCL_Eval(
        adata       = adata_all,
        clone_key   = "clone_id",
        dataset_key = "dataset",
        num_top     = 5,
        palette     = None,
        umap_kwargs = {"random_state": 42}
    )

    # compute stats
    stats = plotter.evaluate_adjusted_knn(
        train_embeddings = train_emb,
        train_labels     = train_labels,
        test_embeddings  = test_emb,
        test_labels      = test_labels,
        k                = NUM_NEIGHBORS
    )

    # parse λ, unlab, bs from folder name "lambda0.01_unlab15_bs150_testAsPenalty"
    lam_s, unlab_s, bs_s = exp_name.split("_")
    lam   = float(lam_s.replace("lambda",""))
    unlab = int(unlab_s.replace("unlab",""))
    bs    = int(bs_s.replace("bs",""))

    # unpack train stats
    tr_acc    = stats["train"]["accuracy"]
    tr_unique = stats["train"]["avg_unique"]

    # unpack test stats
    te_acc    = stats["test"]["accuracy"]
    te_cont   = stats["test"]["containment_rate"]
    te_rank   = stats["test"]["overall_avg_rank"]
    te_unique = stats["test"]["avg_unique"]
    uq        = stats["test"]["unique_quantiles"]
    aq        = stats["test"]["accuracy_quantiles"]

    # ─── Plot #1: Top‐5 clones UMAP ───────────────────────────────────────────────
    fig1, ax1 = plotter.plot_top_clones_umap(
        figsize  = (7,7),
        title    = f"Top‐5 Clones // λ={lam}, unlab={unlab}, bs={bs}",
        savepath = os.path.join(
            exp_path,
            f"umap_top5_λ{lam}_unlab{unlab}_bs{bs}.png"
        )
    )

    # ─── Plot #2: Test‐cell accuracy UMAP ─────────────────────────────────────────
    fig2, ax2 = plotter.plot_test_accuracy_umap(
        train_embeddings = train_emb,
        train_labels     = train_labels,
        test_embeddings  = test_emb,
        test_labels      = test_labels,
        k                = NUM_NEIGHBORS,
        figsize          = (7,7),
        title            = f"Test Acc // λ={lam}, unlab={unlab}, bs={bs}",
        savepath         = os.path.join(
            exp_path,
            f"umap_testAcc_λ{lam}_unlab{unlab}_bs{bs}.png"
        )
    )

    # ─── Plot #3: Lineage size vs. test accuracy ─────────────────────────────────
    fig3, ax3 = plotter.plot_lineage_size_vs_accuracy(
        train_embeddings = train_emb,
        train_labels     = train_labels,
        test_embeddings  = test_emb,
        test_labels      = test_labels,
        k                = NUM_NEIGHBORS,
        figsize          = (6,6),
        title            = f"Lineage Size vs Acc // λ={lam}, unlab={unlab}, bs={bs}",
        savepath         = os.path.join(
            exp_path,
            f"lineageSize_vs_acc_λ{lam}_unlab{unlab}_bs{bs}.png"
        )
    )

    # collect into records
    records.append({
        "lambda":               lam,
        "unlabeled_per_batch":  unlab,
        "batch_size":           bs,

        "train_overall_acc":    round(tr_acc,4),
        "train_overall_unique": round(tr_unique,4),

        "test_overall_acc":     round(te_acc,4),
        "test_containment":     round(te_cont,4),
        "test_overall_rank":    round(te_rank,4),
        "test_overall_unique":  round(te_unique,4),

        # unique‐quantiles
        "test_unique_q0":       uq["q0"],
        "test_unique_q25":      uq["q25"],
        "test_unique_q50":      uq["q50"],
        "test_unique_q75":      uq["q75"],
        "test_unique_q100":     uq["q100"],

        # accuracy‐quantiles
        "test_accuracy_q0":     aq["q0"],
        "test_accuracy_q25":    aq["q25"],
        "test_accuracy_q50":    aq["q50"],
        "test_accuracy_q75":    aq["q75"],
        "test_accuracy_q100":   aq["q100"],
    })

# build & save summary
df = pd.DataFrame(records)
print(df)

out_csv = os.path.join(INPUT_DIR, f"grid_search_summary_k{NUM_NEIGHBORS}.csv")
df.to_csv(out_csv, index=False)
print(f">>> Saved summary to {out_csv}")

  warn(
Found Intel OpenMP ('libiomp') and LLVM OpenMP ('libomp') loaded at
the same time. Both libraries are known to be incompatible and this
can cause random crashes or deadlocks on Linux when loaded in the
same Python program.
Using threadpoolctl may cause crashes or deadlocks. For more
information and possible workarounds, please see
    https://github.com/joblib/threadpoolctl/blob/master/multiple_openmp.md



ValueError: not enough values to unpack (expected 4, got 3)