## seaborn

In [1]:
import os
import numpy as np
import anndata as ad
import pandas as pd

In [2]:
import sys
import os

# Define the path to the main folder
main_folder = "/Users/apple/Desktop/KB/SCSeq_LineageBarcoding2/SCSeq_LineageBarcoding/SCLineage_ConstrativeLearning/main_semi_test"

# Add it to sys.path
sys.path.append(main_folder)

from LCL_eval_final import LCL_Eval
from collections import Counter

In [None]:
# ───────────────────────────────────────────────────────────────────────────────
# Paths & constants
# ───────────────────────────────────────────────────────────────────────────────
INPUT_DIR  = "/Users/apple/Desktop/KB/data/feat_LCL_2025/Larry_top200/grid_search_testAsPenalty_1"
TRAIN_ANND = "/Users/apple/Desktop/KB/data/LarryData/train_test/Larry_200_train.h5ad"
TEST_ANND  = "/Users/apple/Desktop/KB/data/LarryData/train_test/Larry_200_test.h5ad"
num_knn = 30
# load once
adata_train = ad.read_h5ad(TRAIN_ANND)
adata_test  = ad.read_h5ad(TEST_ANND)

# flatten labels
train_labels = adata_train.obs["clone_id"].to_numpy()
test_labels  = adata_test.obs["clone_id"].to_numpy()

records = []

# ───────────────────────────────────────────────────────────────────────────────
# Loop over each hyperparam folder
# ───────────────────────────────────────────────────────────────────────────────
for exp_name in sorted(os.listdir(INPUT_DIR)):
    exp_path = os.path.join(INPUT_DIR, exp_name)
    if not os.path.isdir(exp_path): 
        continue

    # find embeddings
    files = os.listdir(exp_path)
    tr_file = next(f for f in files if f.startswith("scBaseEncoderFeat_Z") and f.endswith(".npy"))
    te_file = next(f for f in files if "test_embedding" in f or f.startswith("scBaseEncoderFeat_test"))

    train_emb = np.load(os.path.join(exp_path, tr_file))
    test_emb  = np.load(os.path.join(exp_path, te_file))

    # build combined AnnData
    tr = adata_train.copy()
    te = adata_test.copy()
    tr.obsm["LCL_embedding"] = train_emb
    te.obsm["LCL_embedding"] = test_emb
    tr.obs["dataset"] = "train"
    te.obs["dataset"] = "test"
    adata_all = ad.concat([tr, te], axis=0, join="outer")

    # instantiate helper
    plotter = LCL_Eval(
        adata       = adata_all,
        clone_key   = "clone_id",
        dataset_key = "dataset",
        num_top     = 5,
        palette     = None,
        umap_kwargs = {"random_state": 42}
    )

    # compute KNN stats
    stats = plotter.evaluate_adjusted_knn(
        train_embeddings = train_emb,
        train_labels     = train_labels,
        test_embeddings  = test_emb,
        test_labels      = test_labels,
        k                = num_knn
    )

    # parse λ, unlab, bs
    lam, unlab_s, bs_s = exp_name.split("_")
    lam   = float(lam.replace("lambda",""))
    unlab = int(unlab_s.replace("unlab",""))
    bs    = int(bs_s.replace("bs",""))

    # unpack stats
    to_4 = lambda x: round(x, 4)
    tr_acc = stats["train"]["overall_accuracy"]
    tr_rank= stats["train"]["overall_avg_rank"]
    tr_qs  = stats["train"]["rank_quantiles"]
    te_acc = stats["test"]["overall_accuracy"]
    te_rank= stats["test"]["overall_avg_rank"]
    te_qs  = stats["test"]["rank_quantiles"]

    # plot #1: top‐5 clones
    fig1, ax1 = plotter.plot_top_clones_umap(
        figsize  = (7,7),
        title    = f"Top 5 Clones // λ={lam}, unlab={unlab}, bs={bs}",
        savepath = os.path.join(exp_path, f"umap_top5clones_lambda{lam}_unlab{unlab}_bs{bs}_k{num_knn}.png")
    )

    # plot #2: test‐cell accuracy
    fig2, ax2 = plotter.plot_test_accuracy_umap(
        train_embeddings = train_emb,
        train_labels     = train_labels,
        test_embeddings  = test_emb,
        test_labels      = test_labels,
        k                = num_knn,
        figsize          = (7,7),
        title            = f"Test Accuracy // λ={lam}, unlab={unlab}, bs={bs}, k={num_knn}",
        savepath         = os.path.join(exp_path, f"umap_testAccuracy_lambda{lam}_unlab{unlab}_bs{bs}_k{num_knn}.png")
    )

    # record exactly six stats + hyperparams
    records.append({
        "lambda":            lam,
        "unlabeled_per_batch":unlab,
        "batch_size":        bs,
        "train_overall_acc": to_4(tr_acc),
        "train_overall_rank":to_4(tr_rank),
        "train_q25":         to_4(tr_qs["q25"]),
        "train_q50":         to_4(tr_qs["q50"]),
        "train_q75":         to_4(tr_qs["q75"]),
        "test_overall_acc":  to_4(te_acc),
        "test_overall_rank": to_4(te_rank),
        "test_q25":          to_4(te_qs["q25"]),
        "test_q50":          to_4(te_qs["q50"]),
        "test_q75":          to_4(te_qs["q75"]),
    })

# build summary DataFrame
df = pd.DataFrame(records)
print(df)

# optionally save
out_csv = os.path.join(INPUT_DIR, f"grid_search_summary_k{num_knn}.csv")
df.to_csv(out_csv, index=False)
print(f">>> Saved summary to {out_csv}")

In [None]:
for num_k in [5, 10, 20, 30]:
    # ───────────────────────────────────────────────────────────────────────────────
    # Paths & constants
    # ───────────────────────────────────────────────────────────────────────────────
    INPUT_DIR  = "/Users/apple/Desktop/KB/data/feat_LCL_2025/Larry_top200/grid_search_testAsPenalty_1"
    TRAIN_ANND = "/Users/apple/Desktop/KB/data/LarryData/train_test/Larry_200_train.h5ad"
    TEST_ANND  = "/Users/apple/Desktop/KB/data/LarryData/train_test/Larry_200_test.h5ad"
    num_knn = num_k
    # load once
    adata_train = ad.read_h5ad(TRAIN_ANND)
    adata_test  = ad.read_h5ad(TEST_ANND)

    # flatten labels
    train_labels = adata_train.obs["clone_id"].to_numpy()
    test_labels  = adata_test.obs["clone_id"].to_numpy()

    records = []

    # ───────────────────────────────────────────────────────────────────────────────
    # Loop over each hyperparam folder
    # ───────────────────────────────────────────────────────────────────────────────
    for exp_name in sorted(os.listdir(INPUT_DIR)):
        exp_path = os.path.join(INPUT_DIR, exp_name)
        if not os.path.isdir(exp_path): 
            continue

        # find embeddings
        files = os.listdir(exp_path)
        tr_file = next(f for f in files if f.startswith("scBaseEncoderFeat_Z") and f.endswith(".npy"))
        te_file = next(f for f in files if "test_embedding" in f or f.startswith("scBaseEncoderFeat_test"))

        train_emb = np.load(os.path.join(exp_path, tr_file))
        test_emb  = np.load(os.path.join(exp_path, te_file))

        # build combined AnnData
        tr = adata_train.copy()
        te = adata_test.copy()
        tr.obsm["LCL_embedding"] = train_emb
        te.obsm["LCL_embedding"] = test_emb
        tr.obs["dataset"] = "train"
        te.obs["dataset"] = "test"
        adata_all = ad.concat([tr, te], axis=0, join="outer")

        # instantiate helper
        plotter = LCL_Eval(
            adata       = adata_all,
            clone_key   = "clone_id",
            dataset_key = "dataset",
            num_top     = 5,
            palette     = None,
            umap_kwargs = {"random_state": 42}
        )

        # compute KNN stats
        stats = plotter.evaluate_adjusted_knn(
            train_embeddings = train_emb,
            train_labels     = train_labels,
            test_embeddings  = test_emb,
            test_labels      = test_labels,
            k                = num_knn
        )

        # parse λ, unlab, bs
        lam, unlab_s, bs_s = exp_name.split("_")
        lam   = float(lam.replace("lambda",""))
        unlab = int(unlab_s.replace("unlab",""))
        bs    = int(bs_s.replace("bs",""))

        # unpack stats
        to_4 = lambda x: round(x, 4)
        tr_acc = stats["train"]["overall_accuracy"]
        tr_rank= stats["train"]["overall_avg_rank"]
        tr_qs  = stats["train"]["rank_quantiles"]
        tr_unique = stats["train"]["overall_avg_unique"]
        te_acc = stats["test"]["overall_accuracy"]
        te_rank= stats["test"]["overall_avg_rank"]
        te_unique = stats["test"]["overall_avg_unique"]
        te_qs  = stats["test"]["rank_quantiles"]

        # plot #1: top‐5 clones
        fig1, ax1 = plotter.plot_top_clones_umap(
            figsize  = (7,7),
            title    = f"Top 5 Clones // λ={lam}, unlab={unlab}, bs={bs}",
            savepath = os.path.join(exp_path, f"umap_top5clones_lambda{lam}_unlab{unlab}_bs{bs}_k{num_knn}.png")
        )

        # plot #2: test‐cell accuracy
        fig2, ax2 = plotter.plot_test_accuracy_umap(
            train_embeddings = train_emb,
            train_labels     = train_labels,
            test_embeddings  = test_emb,
            test_labels      = test_labels,
            k                = num_knn,
            figsize          = (7,7),
            title            = f"Test Accuracy // λ={lam}, unlab={unlab}, bs={bs}, k={num_knn}",
            savepath         = os.path.join(exp_path, f"umap_testAccuracy_lambda{lam}_unlab{unlab}_bs{bs}_k{num_knn}.png")
        )

        # record exactly six stats + hyperparams
        records.append({
            "lambda":            lam,
            "unlabeled_per_batch":unlab,
            "batch_size":        bs,
            "train_overall_acc": to_4(tr_acc),
            "train_overall_unique":to_4(tr_unique),
            "train_overall_rank":to_4(tr_rank),
            "train_q25":         to_4(tr_qs["q25"]),
            "train_q50":         to_4(tr_qs["q50"]),
            "train_q75":         to_4(tr_qs["q75"]),
            "test_overall_acc":  to_4(te_acc),
            "test_overall_unique": to_4(te_unique),
            "test_overall_rank": to_4(te_rank),
            "test_q25":          to_4(te_qs["q25"]),
            "test_q50":          to_4(te_qs["q50"]),
            "test_q75":          to_4(te_qs["q75"]),
        })

    # build summary DataFrame
    df = pd.DataFrame(records)
    print(df)

    # optionally save
    out_csv = os.path.join(INPUT_DIR, f"grid_search_summary_k{num_knn}.csv")
    df.to_csv(out_csv, index=False)
    print(f">>> Saved summary to {out_csv}")

### anndata loading

In [None]:
records = []

for grid_name in sorted(os.listdir(INPUT_DIR)):
    grid_path = os.path.join(INPUT_DIR, grid_name)
    if not os.path.isdir(grid_path): continue

    for exp_name in sorted(os.listdir(grid_path)):
        exp_path = os.path.join(grid_path, exp_name)
        if not os.path.isdir(exp_path): continue

        # find your two .npy files
        files = os.listdir(exp_path)
        tr_file = next(f for f in files if f.startswith("scBaseEncoderFeat_Z") and f.endswith(".npy"))
        te_file = next(f for f in files if "test_embedding" in f or f.startswith("scBaseEncoderFeat_test"))

        train_emb = np.load(os.path.join(exp_path, tr_file))
        test_emb = np.load(os.path.join(exp_path, te_file))

        # Original KNN evaluation
        ev = LCL_eval.Eval(train_emb, TRAIN_ADATA)
        orig_tr_acc = ev.KNN_train(n_neighbors=5)
        orig_te_acc = ev.KNN_test(test_emb, TEST_ADATA, n_neighbors=30)

        # Modified KNN classifier (sklearn-based)
        knn_model = KNeighborsClassifier(n_neighbors=30)
        knn_model.fit(train_emb, train_labels)

        preds_mod1_train, _, avg_rank_mod1_train, _ = adjusted_knn_predict_with_rank(
            knn_model, train_labels, train_emb, train_labels, global_freq, k=30)
        preds_mod1_test, _, avg_rank_mod1_test, _ = adjusted_knn_predict_with_rank(
            knn_model, train_labels, test_emb, test_labels, global_freq, k=30)

        mod1_tr_acc = (preds_mod1_train == train_labels).mean()
        mod1_te_acc = (preds_mod1_test == test_labels).mean()

        # Modified KNN classifier v2 (distance-based)
        preds_mod2_train, _, avg_rank_mod2_train, _ = adjusted_knn_predict_with_rank_v2(
            train_emb, train_labels, train_emb, train_labels, global_freq, k=30)
        preds_mod2_test, _, avg_rank_mod2_test, _ = adjusted_knn_predict_with_rank_v2(
            train_emb, train_labels, test_emb, test_labels, global_freq, k=30)

        mod2_tr_acc = (preds_mod2_train == train_labels).mean()
        mod2_te_acc = (preds_mod2_test == test_labels).mean()

        # parse hyperparams out of exp_name
        lam, unlab, bs = exp_name.split("_")
        lam = float(lam.replace("lambda", ""))
        unlab = int(unlab.replace("unlab", ""))
        bs = int(bs.replace("bs", ""))

        # Record results
        records.append({
            "grid": grid_name,
            "lambda": lam,
            "unlabeled": unlab,
            "batch_size": bs,

            # Original KNN accuracy
            "orig_train_acc": orig_tr_acc,
            "orig_test_acc": orig_te_acc,

            # Modified KNN v1 accuracy
            "mod1_train_acc": mod1_tr_acc,
            "mod1_test_acc": mod1_te_acc,
            "mod1_avg_rank_train": avg_rank_mod1_train,
            "mod1_avg_rank_test": avg_rank_mod1_test,

            # Modified KNN v2 accuracy
            "mod2_train_acc": mod2_tr_acc,
            "mod2_test_acc": mod2_te_acc,
            "mod2_avg_rank_train": avg_rank_mod2_train,
            "mod2_avg_rank_test": avg_rank_mod2_test
        })

# Create summary DataFrame
df = pd.DataFrame(records)
print(df)



In [None]:
df

In [None]:
df.to_csv("knn_evaluation_summary.csv", index=False)

In [None]:
adata_train = ad.read_h5ad('/Users/apple/Desktop/KB/data/LarryData/train_test/Larry_200_train.h5ad')
adata_test  = ad.read_h5ad('/Users/apple/Desktop/KB/data/LarryData/train_test/Larry_200_test.h5ad')

INPUT_DIR = "/Users/apple/Desktop/KB/data/feat_LCL_2025/Larry_top200/feat_semi_sup_grid_search"

train_semi_10 = np.load(INPUT_DIR+'/grid_search_3/lambda0.3_unlab15_bs150/scBaseEncoderFeat_Z_bs150_tau0.5.npy')
test_semi_10 = np.load(INPUT_DIR+'/grid_search_3/lambda0.3_unlab15_bs150/test_embedding.npy')


In [None]:
adata_train = ad.read_h5ad('/Users/apple/Desktop/KB/data/LarryData/train_test/Larry_200_train.h5ad')
adata_test  = ad.read_h5ad('/Users/apple/Desktop/KB/data/LarryData/train_test/Larry_200_test.h5ad')

INPUT_DIR = "/Users/apple/Desktop/KB/data/feat_LCL_2025/Larry_top200"

train_semi_10 = np.load(INPUT_DIR+'/lambda0.01_unlab15_bs150_testAsPenalty/scBaseEncoderFeat_Z_bs150_tau0.5.npy')
test_semi_10 = np.load(INPUT_DIR+'/lambda0.01_unlab15_bs150_testAsPenalty/test_embedding.npy')


In [None]:
adata_train.obsm["LCL_embedding_semi_10"] = train_semi_10
adata_test.obsm["LCL_embedding_semi_10"] = test_semi_10

adata_train.obs["dataset"] = "train"
adata_test.obs["dataset"] = "test"

adata = ad.concat([adata_train, adata_test], axis=0, join='outer')

In [None]:
import umap

reducer = umap.UMAP()

embedding_umap = reducer.fit_transform(adata.obsm["LCL_embedding_semi_10"])

adata.obsm["X_umap"] = embedding_umap

In [None]:
import pandas as pd

# Count number of cells per lineage
clone_counts = adata.obs["clone_id"].value_counts()

# Get the top 5 largest lineages
top_5_clones = clone_counts.index[:5]

# Assign 'Other' to all lineages except the top 5
adata.obs["clone_group"] = adata.obs["clone_id"].apply(lambda x: x if x in top_5_clones else "Other")

# Convert to categorical
adata.obs["clone_group"] = adata.obs["clone_group"].astype("category")

# Print for verification
print(adata.obs["clone_group"].value_counts())

In [None]:
import matplotlib.pyplot as plt
import numpy as np

def plot_umap(adata, colormap="tab10"):
    """
    Plots UMAP with:
    - Top 5 clones in distinct colors (plotted on top)
    - Other clones in gray with lower opacity
    - Train cells as dots, Test cells as crosses
    - Larger marker size for top 5 clones
    """
    umap_coords = adata.obsm["X_umap"]

    # Extract train and test indices
    train_idx = adata.obs["dataset"] == "train"
    test_idx = adata.obs["dataset"] == "test"

    # Get unique clone groups
    unique_clones = adata.obs["clone_group"].cat.categories

    # Define a colormap for the top 5 clones, others in gray
    colors = plt.get_cmap(colormap)(range(len(unique_clones) - 1))  # Leave space for gray
    color_map = dict(zip(unique_clones[:-1], colors))  # Map top 5 clones
    color_map["Other"] = "gray"  # Set 'Other' to gray

    plt.figure(figsize=(8, 6))

    # **Step 1**: Plot "Other" cells first (background with low opacity)
    idx_train_other = (adata.obs["clone_group"] == "Other") & train_idx
    idx_test_other = (adata.obs["clone_group"] == "Other") & test_idx

    plt.scatter(umap_coords[idx_train_other, 0], umap_coords[idx_train_other, 1], 
                color=color_map["Other"], s=8, marker=".", alpha=0.2, label="Train Other")  # Lower opacity

    plt.scatter(umap_coords[idx_test_other, 0], umap_coords[idx_test_other, 1], 
                color=color_map["Other"], s=12, marker="x", alpha=0.2, label="Test Other")  # Lower opacity

    # **Step 2**: Plot top 5 clones on top (larger size)
    for clone in unique_clones[:-1]:  # Skip "Other"
        idx_train = (adata.obs["clone_group"] == clone) & train_idx
        idx_test = (adata.obs["clone_group"] == clone) & test_idx

        # Train: Dots
        plt.scatter(umap_coords[idx_train, 0], umap_coords[idx_train, 1], 
                    color=color_map[clone], s=30, marker=".", alpha=0.8, label=f"Train {clone}")  # Bigger size

        # Test: Crosses
        plt.scatter(umap_coords[idx_test, 0], umap_coords[idx_test, 1], 
                    color=color_map[clone], s=40, marker="x", alpha=0.9, label=f"Test {clone}")  # Bigger size

    plt.xlabel("UMAP1")
    plt.ylabel("UMAP2")
    plt.title("UMAP Projection - Top 5 Clones Highlighted")
    plt.legend()
    plt.show()

# Run the function to plot
plot_umap(adata)

### Train 

In [None]:
import matplotlib.pyplot as plt
import numpy as np

def plot_umap_with_lineages(adata, n_top_lineages=5, colormap="tab10"):
    """
    Plots UMAP from `adata.obsm["X_umap"]` with:
    - Top N clones in distinct colors (plotted on top)
    - Other clones in gray with lower opacity
    - Train cells as dots, Test cells as crosses
    
    Parameters:
    - adata (AnnData): AnnData object with precomputed UMAP in `.obsm["X_umap"]`
    - n_top_lineages (int): Number of largest lineages to highlight in the plot
    - colormap (str): Matplotlib colormap for the distinct top N lineages
    
    Output:
    - A UMAP scatter plot (does NOT modify `adata`)
    """

    ### **1️⃣ Check If Required Fields Exist**
    if "X_umap" not in adata.obsm:
        raise ValueError("UMAP coordinates missing! Ensure `adata.obsm['X_umap']` is computed.")
    
    if "LCL_embedding_semi_10" not in adata.obsm:
        raise ValueError("Contrastive learning embeddings missing! Ensure `adata.obsm['LCL_embedding_dim10']` exists.")
    
    if "clone_id" not in adata.obs:
        raise ValueError("Clone ID column missing! Ensure `adata.obs['clone_id']` exists.")
    
    if "dataset" not in adata.obs:
        raise ValueError("Dataset column missing! Ensure `adata.obs['dataset']` exists with 'train' and 'test' values.")

    ### **2️⃣ Identify the Top N Largest Lineages**
    print(f"Identifying the top {n_top_lineages} largest lineages...")
    clone_counts = adata.obs["clone_id"].value_counts()
    top_n_clones = clone_counts.index[:n_top_lineages]

    # Assign "Other" to all but the top N lineages
    adata.obs["clone_group"] = adata.obs["clone_id"].apply(lambda x: x if x in top_n_clones else "Other")
    
    # Convert to categorical for easy plotting
    adata.obs["clone_group"] = adata.obs["clone_group"].astype("category")

    ### **3️⃣ Plot UMAP with Custom Formatting**
    print("Plotting UMAP with lineage-specific colors and train/test markers...")

    umap_coords = adata.obsm["X_umap"]
    train_idx = adata.obs["dataset"] == "train"
    test_idx = adata.obs["dataset"] == "test"
    
    unique_clones = adata.obs["clone_group"].cat.categories

    # Define a colormap for the top N clones, others in gray
    colors = plt.get_cmap(colormap)(range(len(unique_clones) - 1))  # Leave space for gray
    color_map = dict(zip(unique_clones[:-1], colors))  # Map top N clones
    color_map["Other"] = "gray"  # Set 'Other' to gray

    plt.figure(figsize=(8, 6))

    # **Step 1**: Plot "Other" cells first (background with low opacity)
    idx_train_other = (adata.obs["clone_group"] == "Other") & train_idx
    idx_test_other = (adata.obs["clone_group"] == "Other") & test_idx

    plt.scatter(umap_coords[idx_train_other, 0], umap_coords[idx_train_other, 1], 
                color=color_map["Other"], s=8, marker=".", alpha=0.2, label="Train Other")

    plt.scatter(umap_coords[idx_test_other, 0], umap_coords[idx_test_other, 1], 
                color=color_map["Other"], s=8, marker="x", alpha=0.2, label="Test Other")

    # **Step 2**: Plot top N clones on top (larger size)
    for clone in unique_clones[:-1]:  # Skip "Other"
        idx_train = (adata.obs["clone_group"] == clone) & train_idx
        idx_test = (adata.obs["clone_group"] == clone) & test_idx

        # Train: Dots
        plt.scatter(umap_coords[idx_train, 0], umap_coords[idx_train, 1], 
                    color=color_map[clone], s=40, marker=".", alpha=0.8, label=f"Train {clone}")

        # Test: Crosses
        plt.scatter(umap_coords[idx_test, 0], umap_coords[idx_test, 1], 
                    color=color_map[clone], s=40, marker="x", alpha=1, label=f"Test {clone}")

    plt.xlabel("UMAP1")
    plt.ylabel("UMAP2")
    plt.title(f"UMAP Projection - Top {n_top_lineages} Clones Highlighted")
    plt.legend()
    plt.show()

In [None]:
plot_umap_with_lineages(adata, n_top_lineages=1)

In [None]:
import matplotlib.pyplot as plt
plt.scatter(embedding_umap[:,0],embedding_umap[:,1])

In [None]:
plt.scatter(embedding_umap[:10147,0],embedding_umap[:10147,1])

In [None]:
plt.scatter(embedding_umap[10148:,0],embedding_umap[10148:,1])

In [None]:
eval_semi_10_train = LCL_eval.Eval(train_semi_10, adata_train)
eval_semi_10_train.plot_umap_top_lin("semi-supervised learning with 5 unlabeled data")
eval_semi_10_train.KNN_train()
score_10_train = eval_semi_10_train.calculate_calinski_harabasz_score()

In [None]:
eval_semi_10_test = LCL_eval.Eval(test_semi_10, adata_test)
eval_semi_10_test.plot_umap_top_lin("semi-supervised learning with 5 unlabeled data")
score_semi_10_test = eval_semi_10_test.calculate_calinski_harabasz_score()
eval_semi_10_train.KNN_test(test_semi_10, adata_test)

In [None]:
adata_train = ad.read_h5ad('/Users/apple/Desktop/KB/data/LarryData/train_test/Larry_200_train.h5ad')
adata_test  = ad.read_h5ad('/Users/apple/Desktop/KB/data/LarryData/train_test/Larry_200_test.h5ad')

INPUT_DIR = "/Users/apple/Desktop/KB/data/feat_LCL_2025/Larry_top200"

train_semi_001 = np.load(INPUT_DIR+'/lambda0.01_unlab15_bs150_testAsPenalty/scBaseEncoderFeat_Z_bs150_tau0.5.npy')
test_semi_001 = np.load(INPUT_DIR+'/lambda0.01_unlab15_bs150_testAsPenalty/test_embedding.npy')


In [None]:
ev = LCL_eval.Eval(train_semi_001, adata_train)
tr_acc = ev.KNN_train(n_neighbors=5)
te_acc = ev.KNN_test(test_semi_001, adata_test, n_neighbors=5)