# Method Benchmarking

- **Creator**: Sebastian Birk (<sebastian.birk@helmholtz-munich.de>).
- **Affiliation:** Helmholtz Munich, Institute of Computational Biology (ICB), Talavera-López Lab
- **Date of Creation:** 06.01.2023
- **Date of Last Modification:** 10.01.2023

## 1. Setup

### 1.1 Import Libraries

In [1]:
import sys
sys.path.append("../../autotalker")

In [2]:
from datetime import datetime

import scanpy as sc
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import scib

from autotalker.benchmarking import compute_benchmarking_metrics

  res = Downloader(opt).maybe_download(
  res = Downloader(opt).maybe_download(
  res = Downloader(opt).maybe_download(
  res = Downloader(opt).maybe_download(
  return UNKNOWN_SERVER_VERSION
  warn(


### 1.2 Define Parameters

In [3]:
dataset = "squidpy_seqfish_mouse_organogenesis"
cell_type_key = "celltype_mapped_refined"
latent_key = "deeplinc_latent"
active_gp_names_key = "deeplinc_active_gp_names"
spatial_key = "spatial"
leiden_resolution = 0.3
random_seed = 0

### 1.3 Run Notebook Setup

In [4]:
sc.set_figure_params(figsize=(6, 6))

  IPython.display.set_matplotlib_formats(*ipython_format)


In [5]:
# Get time of notebook execution for timestamping saved artifacts
now = datetime.now()
current_timestamp = now.strftime("%d%m%Y_%H%M%S")

### 1.4 Configure Paths and Directories

In [6]:
data_folder_path = "../datasets/srt_data/gold/"
figure_folder_path = f"../figures/{dataset}/benchmarking/"
benchmark_data_folder_path = "../datasets/benchmark_data/"

## 2. Data

In [7]:
# Load Data
adata = sc.read_h5ad(data_folder_path + f"{dataset}_deeplinc.h5ad")

## 3. Method Benchmarking

### 3.1 DeepLinc

In [None]:
# Compute benchmarking metrics for DeepLinc models
benchmarking_dict_list = []
# for run_number, n_neighbors in zip(np.arange(1, 11), [2, 2, 4, 4, 8, 8, 16, 16, 32, 32]):
for run_number, n_neighbors in zip(np.arange(1, 3), [2, 2]):
    benchmarking_dict = {}
    
    # autotalker metrics
    benchmarking_dict = compute_benchmarking_metrics(adata=adata,
                                                     spatial_model=True,
                                                     latent_key=latent_key + f"_run{run_number}",
                                                     active_gp_names_key=active_gp_names_key,
                                                     cell_type_key=cell_type_key,
                                                     spatial_key="spatial",
                                                     spatial_knng_key = f"deeplinc_spatial_{n_neighbors}nng",
                                                     latent_knng_key = f"deeplinc_latent_{n_neighbors}nng",
                                                     n_neighbors=n_neighbors)
    
    # scib metrics
    sc.pp.neighbors(adata=adata,
                    use_rep=latent_key + f"_run{run_number}")
    scib.me.cluster_optimal_resolution(adata=adata,
                                       cluster_key="cluster",
                                       label_key=cell_type_key)
    benchmarking_dict["ari"] = scib.me.ari(adata,
                                           cluster_key="cluster",
                                           label_key=cell_type_key)
    benchmarking_dict["clisi"] = scib.me.clisi_graph(adata=adata,
                                                     label_key=cell_type_key,
                                                     type_="embed",
                                                     use_rep=latent_key + f"_run{run_number}")
    benchmarking_dict["nmi"] = scib.me.nmi(adata=adata,
                                           cluster_key="cluster",
                                           label_key=cell_type_key)
    benchmarking_dict["asw"] = scib.me.silhouette(adata=adata,
                                                  label_key=cell_type_key,
                                                  embed=latent_key + f"_run{run_number}")
    benchmarking_dict["isolated_labels_asw"] = scib.me.isolated_labels_asw(adata=adata,
                                                                           batch_key="sample",
                                                                           label_key=cell_type_key,
                                                                           embed=latent_key + f"_run{run_number}")
    
    benchmarking_dict["model"] = "deeplinc"
    benchmarking_dict["run"] = run_number
    benchmarking_dict_list.append(benchmarking_dict)
df = pd.DataFrame(benchmarking_dict_list)

### 3.2 Summary

In [None]:
mean_df = df.groupby("model").mean()

In [None]:
columns = ["gcd", "mlnmi", "cad", "arclisi", "germse", "cca", "ari", "clisi",
           "nmi", "asw", "isolated_labels_asw"]

In [None]:
fig, axs = plt.subplots(1, len(columns), figsize=(4*len(columns), 4))

# Plot each column in a separate subplot
for ax, col in zip(axs, columns):
    sns.barplot(data=mean_df, x=mean_df.index, y=col, ax=ax)
plt.suptitle("Method Benchmarking Metrics")
plt.subplots_adjust(wspace=0.5, top=0.9)
plt.show()