## seaborn

In [7]:
import os
import numpy as np
import anndata as ad
import pandas as pd
from sklearn.neighbors import KNeighborsClassifier

In [8]:
import sys
import os

# Define the path to the main folder
main_folder = "/Users/apple/Desktop/KB/SCSeq_LineageBarcoding2/SCSeq_LineageBarcoding/SCLineage_ConstrativeLearning/main_semi_test"

# Add it to sys.path
sys.path.append(main_folder)

import LCL_eval
from LCL_eval_modifiedKNN import (
    adjusted_knn_predict_with_rank, 
    adjusted_knn_predict_with_rank_v2
)
from collections import Counter

In [9]:
INPUT_DIR = "/Users/apple/Desktop/KB/data/feat_LCL_2025/Larry_top200/feat_semi_sup_grid_search"
TRAIN_ADATA = ad.read_h5ad('/Users/apple/Desktop/KB/data/LarryData/train_test/Larry_200_train.h5ad')
TEST_ADATA  = ad.read_h5ad('/Users/apple/Desktop/KB/data/LarryData/train_test/Larry_200_test.h5ad')

train_labels = TRAIN_ADATA.obs['clone_id'].values
test_labels = TEST_ADATA.obs['clone_id'].values
all_labels = np.concatenate([train_labels, test_labels])
total_label_counts = Counter(all_labels)
total_cells = len(all_labels)
global_freq = {label: count/total_cells for label, count in total_label_counts.items()}


In [10]:
records = []

for grid_name in sorted(os.listdir(INPUT_DIR)):
    grid_path = os.path.join(INPUT_DIR, grid_name)
    if not os.path.isdir(grid_path): continue

    for exp_name in sorted(os.listdir(grid_path)):
        exp_path = os.path.join(grid_path, exp_name)
        if not os.path.isdir(exp_path): continue

        # find your two .npy files
        files = os.listdir(exp_path)
        tr_file = next(f for f in files if f.startswith("scBaseEncoderFeat_Z") and f.endswith(".npy"))
        te_file = next(f for f in files if "test_embedding" in f or f.startswith("scBaseEncoderFeat_test"))

        train_emb = np.load(os.path.join(exp_path, tr_file))
        test_emb = np.load(os.path.join(exp_path, te_file))

        # Original KNN evaluation
        ev = LCL_eval.Eval(train_emb, TRAIN_ADATA)
        orig_tr_acc = ev.KNN_train(n_neighbors=5)
        orig_te_acc = ev.KNN_test(test_emb, TEST_ADATA, n_neighbors=5)

        # Modified KNN classifier (sklearn-based)
        knn_model = KNeighborsClassifier(n_neighbors=30)
        knn_model.fit(train_emb, train_labels)

        preds_mod1_train, _, avg_rank_mod1_train, _ = adjusted_knn_predict_with_rank(
            knn_model, train_labels, train_emb, train_labels, global_freq, k=30)
        preds_mod1_test, _, avg_rank_mod1_test, _ = adjusted_knn_predict_with_rank(
            knn_model, train_labels, test_emb, test_labels, global_freq, k=30)

        mod1_tr_acc = (preds_mod1_train == train_labels).mean()
        mod1_te_acc = (preds_mod1_test == test_labels).mean()

        # Modified KNN classifier v2 (distance-based)
        preds_mod2_train, _, avg_rank_mod2_train, _ = adjusted_knn_predict_with_rank_v2(
            train_emb, train_labels, train_emb, train_labels, global_freq, k=30)
        preds_mod2_test, _, avg_rank_mod2_test, _ = adjusted_knn_predict_with_rank_v2(
            train_emb, train_labels, test_emb, test_labels, global_freq, k=30)

        mod2_tr_acc = (preds_mod2_train == train_labels).mean()
        mod2_te_acc = (preds_mod2_test == test_labels).mean()

        # parse hyperparams out of exp_name
        lam, unlab, bs = exp_name.split("_")
        lam = float(lam.replace("lambda", ""))
        unlab = int(unlab.replace("unlab", ""))
        bs = int(bs.replace("bs", ""))

        # Record results
        records.append({
            "grid": grid_name,
            "lambda": lam,
            "unlabeled": unlab,
            "batch_size": bs,

            # Original KNN accuracy
            "orig_train_acc": orig_tr_acc,
            "orig_test_acc": orig_te_acc,

            # Modified KNN v1 accuracy
            "mod1_train_acc": mod1_tr_acc,
            "mod1_test_acc": mod1_te_acc,
            "mod1_avg_rank_train": avg_rank_mod1_train,
            "mod1_avg_rank_test": avg_rank_mod1_test,

            # Modified KNN v2 accuracy
            "mod2_train_acc": mod2_tr_acc,
            "mod2_test_acc": mod2_te_acc,
            "mod2_avg_rank_train": avg_rank_mod2_train,
            "mod2_avg_rank_test": avg_rank_mod2_test
        })

# Create summary DataFrame
df = pd.DataFrame(records)
print(df)



KNN classifier training accuracy: 100.00%
KNN classifier testing accuracy: 14.12%
KNN classifier training accuracy: 100.00%
KNN classifier testing accuracy: 13.88%
KNN classifier training accuracy: 100.00%
KNN classifier testing accuracy: 14.86%
KNN classifier training accuracy: 99.41%
KNN classifier testing accuracy: 15.51%
KNN classifier training accuracy: 99.70%
KNN classifier testing accuracy: 12.98%
KNN classifier training accuracy: 100.00%
KNN classifier testing accuracy: 14.37%
KNN classifier training accuracy: 100.00%
KNN classifier testing accuracy: 15.02%
KNN classifier training accuracy: 99.61%
KNN classifier testing accuracy: 13.06%
KNN classifier training accuracy: 100.00%
KNN classifier testing accuracy: 14.45%
KNN classifier training accuracy: 100.00%
KNN classifier testing accuracy: 13.14%
KNN classifier training accuracy: 100.00%
KNN classifier testing accuracy: 12.16%
KNN classifier training accuracy: 99.66%
KNN classifier testing accuracy: 13.14%
KNN classifier train

In [11]:
df

Unnamed: 0,grid,lambda,unlabeled,batch_size,orig_train_acc,orig_test_acc,mod1_train_acc,mod1_test_acc,mod1_avg_rank_train,mod1_avg_rank_test,mod2_train_acc,mod2_test_acc,mod2_avg_rank_train,mod2_avg_rank_test
0,grid_search_1,0.01,10,100,1.0,0.141224,1.0,0.141224,1.0,2.246531,1.0,0.141224,1.0,2.246531
1,grid_search_1,0.01,10,120,1.0,0.138776,1.0,0.138776,1.0,2.198367,1.0,0.138776,1.0,2.198367
2,grid_search_1,0.01,10,150,1.0,0.148571,1.0,0.145306,1.0,2.14449,1.0,0.145306,1.0,2.14449
3,grid_search_1,0.01,10,80,0.994089,0.155102,0.996551,0.155102,1.003942,2.369796,0.996551,0.155102,1.003942,2.369796
4,grid_search_1,0.01,15,100,0.997044,0.129796,0.997832,0.126531,1.002266,2.346122,0.997832,0.126531,1.002266,2.346122
5,grid_search_1,0.01,15,120,1.0,0.143673,1.0,0.142857,1.0,2.213878,1.0,0.142857,1.0,2.213061
6,grid_search_1,0.01,15,150,1.0,0.150204,1.0,0.149388,1.0,2.262857,1.0,0.149388,1.0,2.262857
7,grid_search_1,0.01,15,80,0.996059,0.130612,0.996453,0.133061,1.003449,2.273469,0.996453,0.133061,1.003449,2.273469
8,grid_search_1,0.01,5,100,1.0,0.14449,1.0,0.143673,1.0,2.226122,1.0,0.143673,1.0,2.226122
9,grid_search_1,0.01,5,120,1.0,0.131429,1.0,0.130612,1.0,2.374694,1.0,0.130612,1.0,2.374694


### anndata loading

In [12]:
records = []

for grid_name in sorted(os.listdir(INPUT_DIR)):
    grid_path = os.path.join(INPUT_DIR, grid_name)
    if not os.path.isdir(grid_path): continue

    for exp_name in sorted(os.listdir(grid_path)):
        exp_path = os.path.join(grid_path, exp_name)
        if not os.path.isdir(exp_path): continue

        # find your two .npy files
        files = os.listdir(exp_path)
        tr_file = next(f for f in files if f.startswith("scBaseEncoderFeat_Z") and f.endswith(".npy"))
        te_file = next(f for f in files if "test_embedding" in f or f.startswith("scBaseEncoderFeat_test"))

        train_emb = np.load(os.path.join(exp_path, tr_file))
        test_emb = np.load(os.path.join(exp_path, te_file))

        # Original KNN evaluation
        ev = LCL_eval.Eval(train_emb, TRAIN_ADATA)
        orig_tr_acc = ev.KNN_train(n_neighbors=5)
        orig_te_acc = ev.KNN_test(test_emb, TEST_ADATA, n_neighbors=30)

        # Modified KNN classifier (sklearn-based)
        knn_model = KNeighborsClassifier(n_neighbors=30)
        knn_model.fit(train_emb, train_labels)

        preds_mod1_train, _, avg_rank_mod1_train, _ = adjusted_knn_predict_with_rank(
            knn_model, train_labels, train_emb, train_labels, global_freq, k=30)
        preds_mod1_test, _, avg_rank_mod1_test, _ = adjusted_knn_predict_with_rank(
            knn_model, train_labels, test_emb, test_labels, global_freq, k=30)

        mod1_tr_acc = (preds_mod1_train == train_labels).mean()
        mod1_te_acc = (preds_mod1_test == test_labels).mean()

        # Modified KNN classifier v2 (distance-based)
        preds_mod2_train, _, avg_rank_mod2_train, _ = adjusted_knn_predict_with_rank_v2(
            train_emb, train_labels, train_emb, train_labels, global_freq, k=30)
        preds_mod2_test, _, avg_rank_mod2_test, _ = adjusted_knn_predict_with_rank_v2(
            train_emb, train_labels, test_emb, test_labels, global_freq, k=30)

        mod2_tr_acc = (preds_mod2_train == train_labels).mean()
        mod2_te_acc = (preds_mod2_test == test_labels).mean()

        # parse hyperparams out of exp_name
        lam, unlab, bs = exp_name.split("_")
        lam = float(lam.replace("lambda", ""))
        unlab = int(unlab.replace("unlab", ""))
        bs = int(bs.replace("bs", ""))

        # Record results
        records.append({
            "grid": grid_name,
            "lambda": lam,
            "unlabeled": unlab,
            "batch_size": bs,

            # Original KNN accuracy
            "orig_train_acc": orig_tr_acc,
            "orig_test_acc": orig_te_acc,

            # Modified KNN v1 accuracy
            "mod1_train_acc": mod1_tr_acc,
            "mod1_test_acc": mod1_te_acc,
            "mod1_avg_rank_train": avg_rank_mod1_train,
            "mod1_avg_rank_test": avg_rank_mod1_test,

            # Modified KNN v2 accuracy
            "mod2_train_acc": mod2_tr_acc,
            "mod2_test_acc": mod2_te_acc,
            "mod2_avg_rank_train": avg_rank_mod2_train,
            "mod2_avg_rank_test": avg_rank_mod2_test
        })

# Create summary DataFrame
df = pd.DataFrame(records)
print(df)



KNN classifier training accuracy: 100.00%
KNN classifier testing accuracy: 14.12%
KNN classifier training accuracy: 100.00%
KNN classifier testing accuracy: 13.80%
KNN classifier training accuracy: 100.00%
KNN classifier testing accuracy: 14.53%
KNN classifier training accuracy: 99.41%
KNN classifier testing accuracy: 15.51%
KNN classifier training accuracy: 99.70%
KNN classifier testing accuracy: 12.65%
KNN classifier training accuracy: 100.00%
KNN classifier testing accuracy: 14.37%
KNN classifier training accuracy: 100.00%
KNN classifier testing accuracy: 14.94%
KNN classifier training accuracy: 99.61%
KNN classifier testing accuracy: 13.31%
KNN classifier training accuracy: 100.00%
KNN classifier testing accuracy: 14.37%
KNN classifier training accuracy: 100.00%
KNN classifier testing accuracy: 13.06%
KNN classifier training accuracy: 100.00%
KNN classifier testing accuracy: 12.24%
KNN classifier training accuracy: 99.66%
KNN classifier testing accuracy: 12.73%
KNN classifier train

In [13]:
df

Unnamed: 0,grid,lambda,unlabeled,batch_size,orig_train_acc,orig_test_acc,mod1_train_acc,mod1_test_acc,mod1_avg_rank_train,mod1_avg_rank_test,mod2_train_acc,mod2_test_acc,mod2_avg_rank_train,mod2_avg_rank_test
0,grid_search_1,0.01,10,100,1.0,0.141224,1.0,0.141224,1.0,2.246531,1.0,0.141224,1.0,2.246531
1,grid_search_1,0.01,10,120,1.0,0.137959,1.0,0.138776,1.0,2.198367,1.0,0.138776,1.0,2.198367
2,grid_search_1,0.01,10,150,1.0,0.145306,1.0,0.145306,1.0,2.14449,1.0,0.145306,1.0,2.14449
3,grid_search_1,0.01,10,80,0.994089,0.155102,0.996551,0.155102,1.003942,2.369796,0.996551,0.155102,1.003942,2.369796
4,grid_search_1,0.01,15,100,0.997044,0.126531,0.997832,0.126531,1.002266,2.346122,0.997832,0.126531,1.002266,2.346122
5,grid_search_1,0.01,15,120,1.0,0.143673,1.0,0.142857,1.0,2.213878,1.0,0.142857,1.0,2.213061
6,grid_search_1,0.01,15,150,1.0,0.149388,1.0,0.149388,1.0,2.262857,1.0,0.149388,1.0,2.262857
7,grid_search_1,0.01,15,80,0.996059,0.133061,0.996453,0.133061,1.003449,2.273469,0.996453,0.133061,1.003449,2.273469
8,grid_search_1,0.01,5,100,1.0,0.143673,1.0,0.143673,1.0,2.226122,1.0,0.143673,1.0,2.226122
9,grid_search_1,0.01,5,120,1.0,0.130612,1.0,0.130612,1.0,2.374694,1.0,0.130612,1.0,2.374694


In [14]:
df.to_csv("knn_evaluation_summary.csv", index=False)

In [None]:
adata_train = ad.read_h5ad('/Users/apple/Desktop/KB/data/LarryData/train_test/Larry_200_train.h5ad')
adata_test  = ad.read_h5ad('/Users/apple/Desktop/KB/data/LarryData/train_test/Larry_200_test.h5ad')

INPUT_DIR = "/Users/apple/Desktop/KB/data/feat_LCL_2025/Larry_top200/feat_semi_sup_grid_search"

train_semi_10 = np.load(INPUT_DIR+'/grid_search_3/lambda0.3_unlab15_bs150/scBaseEncoderFeat_Z_bs150_tau0.5.npy')
test_semi_10 = np.load(INPUT_DIR+'/grid_search_3/lambda0.3_unlab15_bs150/test_embedding.npy')


In [None]:
adata_train = ad.read_h5ad('/Users/apple/Desktop/KB/data/LarryData/train_test/Larry_200_train.h5ad')
adata_test  = ad.read_h5ad('/Users/apple/Desktop/KB/data/LarryData/train_test/Larry_200_test.h5ad')

INPUT_DIR = "/Users/apple/Desktop/KB/data/feat_LCL_2025/Larry_top200"

train_semi_10 = np.load(INPUT_DIR+'/lambda0.01_unlab15_bs150_testAsPenalty/scBaseEncoderFeat_Z_bs150_tau0.5.npy')
test_semi_10 = np.load(INPUT_DIR+'/lambda0.01_unlab15_bs150_testAsPenalty/test_embedding.npy')


In [None]:
adata_train.obsm["LCL_embedding_semi_10"] = train_semi_10
adata_test.obsm["LCL_embedding_semi_10"] = test_semi_10

adata_train.obs["dataset"] = "train"
adata_test.obs["dataset"] = "test"

adata = ad.concat([adata_train, adata_test], axis=0, join='outer')

In [None]:
import umap

reducer = umap.UMAP()

embedding_umap = reducer.fit_transform(adata.obsm["LCL_embedding_semi_10"])

adata.obsm["X_umap"] = embedding_umap

In [None]:
import pandas as pd

# Count number of cells per lineage
clone_counts = adata.obs["clone_id"].value_counts()

# Get the top 5 largest lineages
top_5_clones = clone_counts.index[:5]

# Assign 'Other' to all lineages except the top 5
adata.obs["clone_group"] = adata.obs["clone_id"].apply(lambda x: x if x in top_5_clones else "Other")

# Convert to categorical
adata.obs["clone_group"] = adata.obs["clone_group"].astype("category")

# Print for verification
print(adata.obs["clone_group"].value_counts())

In [None]:
import matplotlib.pyplot as plt
import numpy as np

def plot_umap(adata, colormap="tab10"):
    """
    Plots UMAP with:
    - Top 5 clones in distinct colors (plotted on top)
    - Other clones in gray with lower opacity
    - Train cells as dots, Test cells as crosses
    - Larger marker size for top 5 clones
    """
    umap_coords = adata.obsm["X_umap"]

    # Extract train and test indices
    train_idx = adata.obs["dataset"] == "train"
    test_idx = adata.obs["dataset"] == "test"

    # Get unique clone groups
    unique_clones = adata.obs["clone_group"].cat.categories

    # Define a colormap for the top 5 clones, others in gray
    colors = plt.get_cmap(colormap)(range(len(unique_clones) - 1))  # Leave space for gray
    color_map = dict(zip(unique_clones[:-1], colors))  # Map top 5 clones
    color_map["Other"] = "gray"  # Set 'Other' to gray

    plt.figure(figsize=(8, 6))

    # **Step 1**: Plot "Other" cells first (background with low opacity)
    idx_train_other = (adata.obs["clone_group"] == "Other") & train_idx
    idx_test_other = (adata.obs["clone_group"] == "Other") & test_idx

    plt.scatter(umap_coords[idx_train_other, 0], umap_coords[idx_train_other, 1], 
                color=color_map["Other"], s=8, marker=".", alpha=0.2, label="Train Other")  # Lower opacity

    plt.scatter(umap_coords[idx_test_other, 0], umap_coords[idx_test_other, 1], 
                color=color_map["Other"], s=12, marker="x", alpha=0.2, label="Test Other")  # Lower opacity

    # **Step 2**: Plot top 5 clones on top (larger size)
    for clone in unique_clones[:-1]:  # Skip "Other"
        idx_train = (adata.obs["clone_group"] == clone) & train_idx
        idx_test = (adata.obs["clone_group"] == clone) & test_idx

        # Train: Dots
        plt.scatter(umap_coords[idx_train, 0], umap_coords[idx_train, 1], 
                    color=color_map[clone], s=30, marker=".", alpha=0.8, label=f"Train {clone}")  # Bigger size

        # Test: Crosses
        plt.scatter(umap_coords[idx_test, 0], umap_coords[idx_test, 1], 
                    color=color_map[clone], s=40, marker="x", alpha=0.9, label=f"Test {clone}")  # Bigger size

    plt.xlabel("UMAP1")
    plt.ylabel("UMAP2")
    plt.title("UMAP Projection - Top 5 Clones Highlighted")
    plt.legend()
    plt.show()

# Run the function to plot
plot_umap(adata)

### Train 

In [None]:
import matplotlib.pyplot as plt
import numpy as np

def plot_umap_with_lineages(adata, n_top_lineages=5, colormap="tab10"):
    """
    Plots UMAP from `adata.obsm["X_umap"]` with:
    - Top N clones in distinct colors (plotted on top)
    - Other clones in gray with lower opacity
    - Train cells as dots, Test cells as crosses
    
    Parameters:
    - adata (AnnData): AnnData object with precomputed UMAP in `.obsm["X_umap"]`
    - n_top_lineages (int): Number of largest lineages to highlight in the plot
    - colormap (str): Matplotlib colormap for the distinct top N lineages
    
    Output:
    - A UMAP scatter plot (does NOT modify `adata`)
    """

    ### **1️⃣ Check If Required Fields Exist**
    if "X_umap" not in adata.obsm:
        raise ValueError("UMAP coordinates missing! Ensure `adata.obsm['X_umap']` is computed.")
    
    if "LCL_embedding_semi_10" not in adata.obsm:
        raise ValueError("Contrastive learning embeddings missing! Ensure `adata.obsm['LCL_embedding_dim10']` exists.")
    
    if "clone_id" not in adata.obs:
        raise ValueError("Clone ID column missing! Ensure `adata.obs['clone_id']` exists.")
    
    if "dataset" not in adata.obs:
        raise ValueError("Dataset column missing! Ensure `adata.obs['dataset']` exists with 'train' and 'test' values.")

    ### **2️⃣ Identify the Top N Largest Lineages**
    print(f"Identifying the top {n_top_lineages} largest lineages...")
    clone_counts = adata.obs["clone_id"].value_counts()
    top_n_clones = clone_counts.index[:n_top_lineages]

    # Assign "Other" to all but the top N lineages
    adata.obs["clone_group"] = adata.obs["clone_id"].apply(lambda x: x if x in top_n_clones else "Other")
    
    # Convert to categorical for easy plotting
    adata.obs["clone_group"] = adata.obs["clone_group"].astype("category")

    ### **3️⃣ Plot UMAP with Custom Formatting**
    print("Plotting UMAP with lineage-specific colors and train/test markers...")

    umap_coords = adata.obsm["X_umap"]
    train_idx = adata.obs["dataset"] == "train"
    test_idx = adata.obs["dataset"] == "test"
    
    unique_clones = adata.obs["clone_group"].cat.categories

    # Define a colormap for the top N clones, others in gray
    colors = plt.get_cmap(colormap)(range(len(unique_clones) - 1))  # Leave space for gray
    color_map = dict(zip(unique_clones[:-1], colors))  # Map top N clones
    color_map["Other"] = "gray"  # Set 'Other' to gray

    plt.figure(figsize=(8, 6))

    # **Step 1**: Plot "Other" cells first (background with low opacity)
    idx_train_other = (adata.obs["clone_group"] == "Other") & train_idx
    idx_test_other = (adata.obs["clone_group"] == "Other") & test_idx

    plt.scatter(umap_coords[idx_train_other, 0], umap_coords[idx_train_other, 1], 
                color=color_map["Other"], s=8, marker=".", alpha=0.2, label="Train Other")

    plt.scatter(umap_coords[idx_test_other, 0], umap_coords[idx_test_other, 1], 
                color=color_map["Other"], s=8, marker="x", alpha=0.2, label="Test Other")

    # **Step 2**: Plot top N clones on top (larger size)
    for clone in unique_clones[:-1]:  # Skip "Other"
        idx_train = (adata.obs["clone_group"] == clone) & train_idx
        idx_test = (adata.obs["clone_group"] == clone) & test_idx

        # Train: Dots
        plt.scatter(umap_coords[idx_train, 0], umap_coords[idx_train, 1], 
                    color=color_map[clone], s=40, marker=".", alpha=0.8, label=f"Train {clone}")

        # Test: Crosses
        plt.scatter(umap_coords[idx_test, 0], umap_coords[idx_test, 1], 
                    color=color_map[clone], s=40, marker="x", alpha=1, label=f"Test {clone}")

    plt.xlabel("UMAP1")
    plt.ylabel("UMAP2")
    plt.title(f"UMAP Projection - Top {n_top_lineages} Clones Highlighted")
    plt.legend()
    plt.show()

In [None]:
plot_umap_with_lineages(adata, n_top_lineages=1)

In [None]:
import matplotlib.pyplot as plt
plt.scatter(embedding_umap[:,0],embedding_umap[:,1])

In [None]:
plt.scatter(embedding_umap[:10147,0],embedding_umap[:10147,1])

In [None]:
plt.scatter(embedding_umap[10148:,0],embedding_umap[10148:,1])

In [None]:
eval_semi_10_train = LCL_eval.Eval(train_semi_10, adata_train)
eval_semi_10_train.plot_umap_top_lin("semi-supervised learning with 5 unlabeled data")
eval_semi_10_train.KNN_train()
score_10_train = eval_semi_10_train.calculate_calinski_harabasz_score()

In [None]:
eval_semi_10_test = LCL_eval.Eval(test_semi_10, adata_test)
eval_semi_10_test.plot_umap_top_lin("semi-supervised learning with 5 unlabeled data")
score_semi_10_test = eval_semi_10_test.calculate_calinski_harabasz_score()
eval_semi_10_train.KNN_test(test_semi_10, adata_test)

In [None]:
adata_train = ad.read_h5ad('/Users/apple/Desktop/KB/data/LarryData/train_test/Larry_200_train.h5ad')
adata_test  = ad.read_h5ad('/Users/apple/Desktop/KB/data/LarryData/train_test/Larry_200_test.h5ad')

INPUT_DIR = "/Users/apple/Desktop/KB/data/feat_LCL_2025/Larry_top200"

train_semi_001 = np.load(INPUT_DIR+'/lambda0.01_unlab15_bs150_testAsPenalty/scBaseEncoderFeat_Z_bs150_tau0.5.npy')
test_semi_001 = np.load(INPUT_DIR+'/lambda0.01_unlab15_bs150_testAsPenalty/test_embedding.npy')


In [None]:
ev = LCL_eval.Eval(train_semi_001, adata_train)
tr_acc = ev.KNN_train(n_neighbors=5)
te_acc = ev.KNN_test(test_semi_001, adata_test, n_neighbors=5)