In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import numpy as np
import scanpy as sc
import time
from pathlib import Path
import torch
import Concord as ccd
import warnings
warnings.filterwarnings('ignore')
%matplotlib inline
import matplotlib as mpl

from matplotlib import font_manager, rcParams
custom_rc = {
    'font.family': 'Arial',  # Set the desired font for this plot
}

mpl.rcParams['svg.fonttype'] = 'none'
mpl.rcParams['pdf.fonttype'] = 42

In [None]:
proj_name = "human_pancreas_benchmarking"
save_dir = f"../save/dev_{proj_name}-{time.strftime('%b%d')}/"
save_dir = Path(save_dir)
save_dir.mkdir(parents=True, exist_ok=True)
file_suffix = f"{proj_name}_{time.strftime('%b%d-%H%M')}"
device = torch.device('cuda:3' if torch.cuda.is_available() else 'cpu')
seed = 0
ccd.ul.set_seed(seed)

In [None]:
data_dir = Path("../data/pancreas/")
adata = sc.read(
    data_dir / "dataset.h5ad"
)
# Load existing
adata = adata = sc.read( data_dir / f"adata_human_pancreas_benchmarking_Dec11-1918.h5ad")
adata

In [None]:
adata.X = adata.layers["counts"].copy()
feature_list = ccd.ul.select_features(adata, n_top_features=3000, flavor='seurat_v3', normalize=False, log1p=False)
adata = adata[:, feature_list]
adata.X = adata.layers["log_normalized"].copy()
adata.obs['log_ncounts'] = np.log1p(adata.obs['n_counts'])

Load previous results.

In [None]:
adata.obsm = ccd.ul.load_obsm_from_hdf5(Path('../save/dev_human_pancreas_benchmarking-Oct10/') / "obsm_human_pancreas_benchmarking_Oct10-1845.h5")
adata.obsm

### No correction

In [None]:
sc.tl.pca(adata, n_comps=30)
adata.obsm["Unintegrated"] = adata.obsm["X_pca"]
sc.pp.neighbors(adata, n_neighbors=30, n_pcs=30)
sc.tl.umap(adata, min_dist=0.1)

In [None]:
show_basis = 'X_umap'
show_cols = ['log_ncounts', 'celltype', 'batch']
ccd.pl.plot_embedding(
    adata, show_basis, show_cols, figsize=(10,3), dpi=300, ncols=3, font_size=5, point_size=5, legend_loc='on data',
    save_path=save_dir / f"embeddings_{show_basis}_{file_suffix}.png"
)

In [None]:
file_suffix = f"{time.strftime('%b%d-%H%M')}"
ccd.set_verbose_mode(True)
state_key = 'cell_type'
batch_key= 'batch'
show_cols = [state_key, batch_key]
timer = ccd.ul.Timer()
time_log = {}

### Scanorama

In [None]:
output_key = 'Scanorama'

with timer:
    ccd.ul.run_scanorama(adata, batch_key="batch", output_key=output_key, return_corrected=False)

time_log[output_key] = timer.interval
ccd.ul.save_obsm_to_hdf5(adata, save_dir / f"obsm_{file_suffix}.h5")

In [None]:
output_key = 'Scanorama'
ccd.ul.run_umap(adata, source_key=output_key, result_key=f'{output_key}_UMAP', n_components=2, n_neighbors=30, min_dist=0.1, metric='euclidean', random_state=seed)
show_basis = f'{output_key}_UMAP'
show_cols = ['log_ncounts', 'celltype', 'batch']
ccd.pl.plot_embedding(
    adata, show_basis, show_cols, figsize=(10,3), dpi=300, ncols=3, font_size=5, point_size=5, legend_loc='on data',
    save_path=save_dir / f"embeddings_{show_basis}_{file_suffix}.png"
)

In [None]:
import scib
output_key = 'Scanorama'
cluster_key = f'leiden_{output_key}'
label_key = 'cell_type'
sc.pp.neighbors(adata, n_neighbors=30, use_rep=output_key, metric='euclidean')
sc.tl.leiden(adata, resolution=1.0, key_added=cluster_key)
show_basis = f'{output_key}_UMAP'
show_cols = [cluster_key, label_key, 'batch']

ccd.pl.plot_embedding(
    adata, show_basis, show_cols, figsize=(10,3), dpi=300, ncols=3, font_size=5, point_size=5, legend_loc='on data',
    save_path=save_dir / f"embeddings_leiden_{show_basis}_{file_suffix}.png"
)
nmi_val = scib.metrics.nmi(adata, cluster_key, label_key)
ari_val = scib.metrics.ari(adata, cluster_key, label_key)
label_asw = scib.metrics.isolated_labels_asw(adata, label_key, batch_key, embed = output_key)
label_f1 = scib.metrics.isolated_labels_f1(adata, label_key, batch_key, embed = output_key)
silhouette = scib.me.silhouette(adata, label_key=label_key, embed=output_key)
print(f"NMI: {nmi_val:.3f}, ARI: {ari_val:.3f}, isolated_labels_asw: {label_asw:.3f}, isolated_labels_f1: {label_f1:.3f}, silhouette: {silhouette:.3f}")

In [None]:
import scib
output_key = 'Concord'
cluster_key = f'leiden_{output_key}'
label_key = 'cell_type'
sc.pp.neighbors(adata, n_neighbors=30, use_rep=output_key, metric='euclidean')
sc.tl.leiden(adata, resolution=0.1, key_added=cluster_key)
show_basis = f'{output_key}_UMAP'
show_cols = [cluster_key, label_key, 'batch']

ccd.pl.plot_embedding(
    adata, show_basis, show_cols, figsize=(10,3), dpi=300, ncols=3, font_size=5, point_size=5, legend_loc='on data',
    save_path=save_dir / f"embeddings_leiden_{show_basis}_{file_suffix}.png"
)
nmi_val = scib.metrics.nmi(adata, cluster_key, label_key)
ari_val = scib.metrics.ari(adata, cluster_key, label_key)
#silhouette = scib.me.silhouette(adata, label_key=label_key, embed=output_key)
print(f"NMI: {nmi_val:.3f}, ARI: {ari_val:.3f}")

### Liger

In [None]:
output_key = 'Liger'
with timer:
    ccd.ul.run_liger(adata, batch_key="batch", count_layer="counts", output_key=output_key, k=30, return_corrected=True)

time_log[output_key] = timer.interval
ccd.ul.save_obsm_to_hdf5(adata, save_dir / f"obsm_{file_suffix}.h5")

In [None]:
ccd.ul.run_umap(adata, source_key=output_key, result_key=f'{output_key}_UMAP', n_components=2, n_neighbors=30, min_dist=0.1, metric='euclidean', random_state=seed)
show_basis = f'{output_key}_UMAP'
show_cols = ['log_ncounts', 'celltype', 'batch']
ccd.pl.plot_embedding(
    adata, show_basis, show_cols, figsize=(10,3), dpi=300, ncols=3, font_size=5, point_size=5, legend_loc='on data',
    save_path=save_dir / f"embeddings_{show_basis}_{file_suffix}.png"
)

### Harmony

In [None]:
output_key = 'Harmony'
with timer:
    ccd.ul.run_harmony(adata, batch_key="batch", input_key='X_pca', output_key=output_key)

time_log[output_key] = timer.interval
ccd.ul.save_obsm_to_hdf5(adata, save_dir / f"obsm_{file_suffix}.h5")

In [None]:
output_key = 'Harmony'
ccd.ul.run_umap(adata, source_key=output_key, result_key=f'{output_key}_UMAP', n_components=2, n_neighbors=30, min_dist=0.1, metric='euclidean', random_state=seed)
show_basis = f'{output_key}_UMAP'
ccd.pl.plot_embedding(
    adata, show_basis, show_cols, figsize=(10,3), dpi=300, ncols=3, font_size=5, point_size=5, legend_loc='on data',
    save_path=save_dir / f"embeddings_{show_basis}_{file_suffix}.png"
)

### scVI

In [None]:
output_key = 'scVI'
import scvi
scvi.settings.device = device
with timer:
    scvi_vae = ccd.ul.run_scvi(adata, batch_key="batch", output_key=output_key, return_model=True, return_corrected=False, transform_batch=None)

time_log[output_key] = timer.interval
ccd.ul.save_obsm_to_hdf5(adata, save_dir / f"obsm_{file_suffix}.h5")

In [None]:
ccd.ul.run_umap(adata, source_key=output_key, result_key=f'{output_key}_UMAP', n_components=2, n_neighbors=30, min_dist=0.1, metric='euclidean', random_state=seed)
output_key = 'scVI'
show_basis = f'{output_key}_UMAP'
ccd.pl.plot_embedding(
    adata, show_basis, show_cols, figsize=(10,3), dpi=300, ncols=3, font_size=5, point_size=5, legend_loc='on data',
    save_path=save_dir / f"embeddings_{show_basis}_{file_suffix}.png"
)

### scANVI

In [None]:
output_key = 'scANVI'
with timer:
    ccd.ul.run_scanvi(adata, batch_key="batch", labels_key='cell_type', output_key=output_key, scvi_model=scvi_vae, return_corrected=True, transform_batch=None)

time_log[output_key] = timer.interval
ccd.ul.save_obsm_to_hdf5(adata, save_dir / f"obsm_{file_suffix}.h5")

In [None]:
ccd.ul.run_umap(adata, source_key=output_key, result_key=f'{output_key}_UMAP', n_components=2, n_neighbors=30, min_dist=0.1, metric='euclidean', random_state=seed)
output_key = 'scANVI'
show_basis = f'{output_key}_UMAP'
ccd.pl.plot_embedding(
    adata, show_basis, show_cols, figsize=(10,3), dpi=300, ncols=3, font_size=5, point_size=5, legend_loc='on data',
    save_path=save_dir / f"embeddings_{show_basis}_{file_suffix}.png"
)

In [None]:
adata.layers

### Concord

In [None]:
adata.shape

In [None]:
adata.obs['cell_type'].value_counts()

In [None]:
concord_args = {
    'adata': adata,
    'input_feature': None, 
    'min_p_intra_domain': 0.95,
    'n_epochs': 15,
    'p_intra_knn': 0.3,
    #'sampler_knn': 100,
    'augmentation_mask_prob': 0.4,
    'clr_temperature': 0.3,
    'latent_dim': 64,
    'domain_key': 'batch',
    'seed': seed, # random seed
    'verbose': False, # print training progress
    'device': device, # device to run on
    'save_dir': save_dir # directory to save model checkpoints
}


In [None]:
cur_ccd = ccd.Concord( use_decoder=False, **concord_args)

# Encode data, saving the latent embedding in adata.obsm['Concord']
output_key = 'Concord'
with timer:
    cur_ccd.encode_adata(input_layer_key='X_log1p', preprocess=True, output_key=output_key)

time_log[output_key] = timer.interval
# Save the latent embedding to a file, so that it can be loaded later
ccd.ul.save_obsm_to_hdf5(cur_ccd.adata, save_dir / f"obsm_{file_suffix}.h5")

In [None]:
ccd.ul.run_umap(adata, source_key=output_key, result_key=f'{output_key}_UMAP', n_components=2, n_neighbors=30, min_dist=0.1, metric='euclidean', random_state=seed)
output_key = 'Concord'
show_cols = ['log_ncounts', 'celltype', 'batch']
show_basis = f'{output_key}_UMAP'
ccd.pl.plot_embedding(
    adata, show_basis, show_cols, figsize=(10,3), dpi=300, ncols=3, font_size=5, point_size=5, legend_loc='on data',
    save_path=save_dir / f"embeddings_{show_basis}_{file_suffix}.png"
)

In [None]:
output_key = 'Concord'
adata_sub = adata[adata.obs['cell_type'].isin(['activated_stellate', 'quiescent_stellate', 'schwann', 'macrophage', 'mast', 't_cell'])]
ccd.ul.run_umap(adata_sub, source_key=output_key, result_key=f'{output_key}_UMAP', n_components=2, n_neighbors=30, min_dist=0.1, metric='euclidean', random_state=seed)
show_basis = f'{output_key}_UMAP'
show_cols = ['log_ncounts', 'celltype', 'batch']
ccd.pl.plot_embedding(
    adata_sub, show_basis, show_cols, figsize=(10,3), dpi=300, ncols=3, font_size=5, point_size=20, legend_loc='on data',
    save_path=save_dir / f"embeddings_sub_{show_basis}_{file_suffix}.png"
)

In [None]:
output_key = 'Concord'
adata_sub = adata[adata.obs['cell_type'].isin(['macrophage', 'mast', 't_cell'])]
ccd.ul.run_umap(adata_sub, source_key=output_key, result_key=f'{output_key}_UMAP', n_components=2, n_neighbors=30, min_dist=0.1, metric='euclidean', random_state=seed)
show_basis = f'{output_key}_UMAP'
show_cols = ['log_ncounts', 'celltype', 'batch']
ccd.pl.plot_embedding(
    adata_sub, show_basis, show_cols, figsize=(10,3), dpi=300, ncols=3, font_size=5, point_size=30, legend_loc='on data',
    save_path=save_dir / f"embeddings_sub_{show_basis}_{file_suffix}.png"
)

In [None]:
import scib
output_key = 'Concord'
cluster_key = f'leiden_{output_key}'
label_key = 'cell_type'
sc.pp.neighbors(adata, n_neighbors=30, use_rep=output_key, metric='euclidean')
sc.tl.leiden(adata, resolution=1.0, key_added=cluster_key)
show_basis = f'{output_key}_UMAP'
show_cols = [cluster_key, label_key, 'batch']
nmi_val = scib.metrics.nmi(adata, cluster_key, label_key)
ari_val = scib.metrics.ari(adata, cluster_key, label_key)
print(f"NMI: {nmi_val:.3f}, ARI: {ari_val:.3f}, isolated_labels_asw: {label_asw:.3f}, isolated_labels_f1: {label_f1:.3f}, silhouette: {silhouette:.3f}")

In [None]:
import scib
output_key = 'Concord'
cluster_key = f'leiden_{output_key}'
label_key = 'cell_type'
sc.pp.neighbors(adata, n_neighbors=30, use_rep=output_key, metric='euclidean')
sc.tl.leiden(adata, resolution=0.2, key_added=cluster_key)
show_basis = f'{output_key}_UMAP'
show_cols = [cluster_key, label_key, 'batch']

ccd.pl.plot_embedding(
    adata, show_basis, show_cols, figsize=(10,3), dpi=300, ncols=3, font_size=5, point_size=5, legend_loc='on data',
    save_path=save_dir / f"embeddings_leiden_{show_basis}_{file_suffix}.png"
)
nmi_val = scib.metrics.nmi(adata, cluster_key, label_key)
ari_val = scib.metrics.ari(adata, cluster_key, label_key)
silhouette = scib.me.silhouette(adata, label_key=label_key, embed=output_key)
print(f"NMI: {nmi_val:.3f}, ARI: {ari_val:.3f}, silhouette: {silhouette:.3f}")

### Concord-decoder

In [None]:
decoder_ccd = ccd.Concord(use_decoder=True, **concord_args)

output_key = 'Concord-decoder'
with timer:
    decoder_ccd.encode_adata(input_layer_key='X_log1p', preprocess=True, output_key=output_key)

time_log[output_key] = timer.interval
# Save the latent embedding to a file, so that it can be loaded later
ccd.ul.save_obsm_to_hdf5(decoder_ccd.adata, save_dir / f"obsm_{file_suffix}.h5")

In [None]:
output_key = 'Concord-decoder'
ccd.ul.run_umap(adata, source_key=output_key, result_key=f'{output_key}_UMAP', n_components=2, n_neighbors=30, min_dist=0.1, metric='euclidean', random_state=seed)
show_basis = f'{output_key}_UMAP'
show_cols = ['log_ncounts', 'celltype', 'batch']
ccd.pl.plot_embedding(
    adata, show_basis, show_cols, figsize=(10,3), dpi=300, ncols=3, font_size=5, point_size=5, legend_loc='on data',
    save_path=save_dir / f"embeddings_{show_basis}_{file_suffix}.png"
)

In [None]:
output_key = 'Concord-decoder'
adata_sub = adata[adata.obs['cell_type'].isin(['activated_stellate', 'quiescent_stellate', 'schwann', 'macrophage', 'mast', 't_cell'])]
ccd.ul.run_umap(adata_sub, source_key=output_key, result_key=f'{output_key}_UMAP', n_components=2, n_neighbors=30, min_dist=0.1, metric='euclidean', random_state=seed)
show_basis = f'{output_key}_UMAP'
show_cols = ['log_ncounts', 'celltype', 'batch']
ccd.pl.plot_embedding(
    adata_sub, show_basis, show_cols, figsize=(10,3), dpi=300, ncols=3, font_size=5, point_size=20, legend_loc='on data',
    save_path=save_dir / f"embeddings_sub_{show_basis}_{file_suffix}.png"
)

In [None]:
adata_sub = adata[adata.obs['cell_type'].isin(['macrophage', 'mast', 't_cell'])]
ccd.ul.run_umap(adata_sub, source_key=output_key, result_key=f'{output_key}_UMAP', n_components=2, n_neighbors=30, min_dist=0.1, metric='euclidean', random_state=seed)
show_basis = f'{output_key}_UMAP'
show_cols = ['log_ncounts', 'celltype', 'batch']
ccd.pl.plot_embedding(
    adata_sub, show_basis, show_cols, figsize=(10,3), dpi=300, ncols=3, font_size=5, point_size=30, legend_loc='on data',
    save_path=save_dir / f"embeddings_sub_{show_basis}_{file_suffix}.png"
)

### Concord-class

In [None]:
cur_ccd = ccd.Concord(use_classifier=True, # use classifier
                      class_key = 'cell_type', # key indicating cell type
                      train_frac=1.0, # fraction of data to use for training
                      **concord_args
                      ) 

# Encode data, saving the latent embedding in adata.obsm['Concord']
file_suffix = f"{proj_name}_{time.strftime('%b%d-%H%M')}"
output_key = 'Concord-class'
with timer:
    cur_ccd.encode_adata(input_layer_key='X_log1p', output_key=output_key, preprocess=True)

time_log[output_key] = timer.interval
# Save the latent embedding to a file, so that it can be loaded later
ccd.ul.save_obsm_to_hdf5(cur_ccd.adata, save_dir / f"obsm_{file_suffix}.h5")

In [None]:
output_key = 'Concord-class'
ccd.ul.run_umap(adata, source_key=output_key, result_key=f'{output_key}_UMAP', n_components=2, n_neighbors=30, min_dist=0.1, metric='euclidean', random_state=seed)
show_basis = f'{output_key}_UMAP'
show_cols = ['log_ncounts', 'celltype', 'batch']
ccd.pl.plot_embedding(
    adata, show_basis, show_cols, figsize=(10,3), dpi=300, ncols=3, font_size=5, point_size=5, legend_loc='on data',
    save_path=save_dir / f"embeddings_{show_basis}_{file_suffix}.png"
)

In [None]:
output_key = 'Concord-class'
adata_sub = adata[adata.obs['cell_type'].isin(['activated_stellate', 'quiescent_stellate', 'schwann', 'macrophage', 'mast', 't_cell'])]
ccd.ul.run_umap(adata_sub, source_key=output_key, result_key=f'{output_key}_UMAP', n_components=2, n_neighbors=30, min_dist=0.1, metric='euclidean', random_state=seed)
show_basis = f'{output_key}_UMAP'
show_cols = ['log_ncounts', 'celltype', 'batch']
ccd.pl.plot_embedding(
    adata_sub, show_basis, show_cols, figsize=(10,3), dpi=300, ncols=3, font_size=5, point_size=20, legend_loc='on data',
    save_path=save_dir / f"embeddings_sub_{show_basis}_{file_suffix}.png"
)

In [None]:
obsm_filename = save_dir / f"obsm_{file_suffix}.h5"
ccd.ul.save_obsm_to_hdf5(adata, obsm_filename)
adata.write_h5ad( data_dir / f"adata_{file_suffix}.h5ad")
file_suffix

## Benchmarking

In [None]:
latent_keys = ["Unintegrated", "Scanorama", "LIGER", "Harmony", "scVI", "scANVI", "Concord", 'Concord-decoder', "Concord-class"]
#latent_keys = ["Unintegrated", "Concord", 'Concord-decoder', "Concord-class"]

In [None]:
from scib_metrics.benchmark import Benchmarker
bm = Benchmarker(
    adata,
    batch_key='batch',
    label_key='cell_type',
    embedding_obsm_keys=latent_keys,
    n_jobs=6,
)
bm.benchmark()

In [None]:
import matplotlib.pyplot as plt
import os
bm.plot_results_table(min_max_scale=False, show=False)
fig = plt.gcf()
fig.set_size_inches(16, 6) 
fig.savefig(os.path.join(save_dir, f'scibmetrics_results_{file_suffix}.pdf'), facecolor='white', dpi=600)
plt.show()
plt.close(fig)

In [None]:
# Recompute nmi and ari using the approach described in paper, with resolution range from 0.1 to 1.0 step 0.1
label_key = 'cell_type'
nmi_df, ari_df = ccd.ul.benchmark_nmi_ari(adata, emb_keys=latent_keys, label_key=label_key, resolution_range=np.arange(0.1, 1.1, 0.1), n_neighbors=15, metric='euclidean', verbose=True)
nmi_df.to_csv(save_dir / f"nmi_{file_suffix}.csv")
ari_df.to_csv(save_dir / f"ari_{file_suffix}.csv")

In [None]:
import pandas as pd
scib_scores = bm.get_results(min_max_scale=False)
scib_scores.to_csv(save_dir / f"scib_scores_{file_suffix}.csv")
# Take the max across resolutions for nmi and ari
nmi_max = nmi_df.max()
ari_max = ari_df.max()
# Replace '('Bio conservation', 'KMeans NMI'), ('Bio conservation', 'KMeans ARI') in scib_scores with the max values
# Also remove Kmeans from the column names
scib_scores.loc[nmi_max.index, 'KMeans NMI'] = nmi_max
scib_scores.loc[ari_max.index, 'KMeans ARI'] = ari_max

# Change column names from 'KMeans NMI' to 'NMI' and 'KMeans ARI' to 'ARI'
scib_scores.columns = scib_scores.columns.str.replace('KMeans ', '')
scib_scores


In [None]:
import pandas as pd
# Convert row 'Metric Type' to multi-index column, first level is 'Metric Type', second level is existing column name
metric_type = scib_scores.loc['Metric Type']
scib_scores = scib_scores.drop('Metric Type')  # Drop the last row now that it's stored in metric_type
scib_scores.columns = pd.MultiIndex.from_tuples([(metric_type[col], col) for col in scib_scores.columns])
scib_scores = ccd.ul.benchmark_stats_to_score(scib_scores, min_max_scale=False, one_minus=False, aggregate_score=False, rank=True, rank_col=('Aggregate score', 'Total'), name_exact=False)

custom_rc = {
    'font.family': 'Arial',  # Set the desired font for this plot
}

with plt.rc_context(rc=custom_rc):
    ccd.pl.plot_benchmark_table(scib_scores, pal='PRGn', pal_agg='RdYlBu_r', cmap_method = 'minmax', save_path=save_dir / f"scib_results_{file_suffix}.pdf", figsize=(16, 6), dpi=300)

### Benchmarking hyperparam test

In [None]:
# Batch run
param_grid = {
    "augmentation_mask_prob": [0.0, 0.1, 0.3, 0.5, 0.7, 0.9],
    "p_intra_knn": [0.0, 0.05, 0.1, 0.3, 0.5],
    "min_p_intra_domain": [0.3, 0.5, 0.8, 1.0],
}
ccd.ul.run_hyperparameter_tests(adata, params, param_grid, output_key='Concord', return_decoded=False, trace_memory=True, trace_gpu_memory=True, save_dir=save_dir)

params_cdt = params.copy()
params_cdt['use_classifier'] = True
param_grid = {
    "augmentation_mask_prob": [0.0, 0.1, 0.3, 0.5, 0.7, 0.9],
    "p_intra_knn": [0.0, 0.05, 0.1, 0.3, 0.5],
    "min_p_intra_domain": [0.3, 0.5, 0.8, 1.0],
}
ccd.ul.run_hyperparameter_tests(adata, params_cdt, param_grid, output_key='Concord-ant', return_decoded=False, trace_memory=True, trace_gpu_memory=True, save_dir=save_dir)


In [None]:

file_suffix = f"{time.strftime('%b%d-%H%M')}"
bm = Benchmarker(
    adata,
    batch_key="batch",
    label_key="cell_type",
    embedding_obsm_keys= [key for key in adata.obsm.keys() if all(substring not in key for substring in ["UMAP"])],
    n_jobs=3
)
bm.benchmark()
ccd.ul.save_object(bm, save_dir / f"bm_{file_suffix}.pkl")
bm.get_results(min_max_scale=False).to_excel(save_dir / f"bm_results_{file_suffix}.xlsx")


In [None]:

import matplotlib.pyplot as plt
import os
bm.plot_results_table(min_max_scale=False, show=False)
fig = plt.gcf()
fig.set_size_inches(25, 5) 
fig.savefig(os.path.join(save_dir, f'scibmetrics_results_{file_suffix}.pdf'), facecolor='white', dpi=300)
plt.show()
plt.close(fig)


In [None]:
output_key = "Concord-ant_augmentation_mask_prob_0.5"
ccd.ul.run_umap(adata, source_key=output_key, umap_key=f'{output_key}_UMAP', n_components=2, n_epochs=500, n_neighbors=15, min_dist=0.1, metric='euclidean', random_state=seed, use_cuml=False)

show_emb = f'{output_key}_UMAP'

ccd.pl.plot_embedding(
    adata, show_emb, show_cols, figsize=(10,8), dpi=600, ncols=2, font_size=8, point_size=2, legend_loc='on data',
    save_path=save_dir / f"embeddings_{show_emb}_{file_suffix}.png"
)
