In [None]:
import pandas as pd
import seaborn as sns
import scanpy as sc
import signaturescoring as ssc
import numpy as np
from datetime import datetime 
import pandas as pd
from itertools import product

In [None]:
DATA_PATH = "" #Path to the esophagel dataset.
SCORING_METHODS = ['adjusted_neighborhood_scoring', 'seurat_scoring', 'seurat_ag_scoring', 
                    'seurat_lvg_scoring', 'scanpy_scoring', 'jasmine_scoring_likelihood', 
                   'jasmine_scoring_oddsratio', 'ucell_scoring']

In [None]:
n_random_seeds = 5
sig_lengths = [(10 ** i) for i in range(4)]
n_cells = [10_000 * i for i in range(1, 17, 3)]


adata = sc.read(DATA_PATH)
sc.pp.normalize_total(adata)
sc.pp.log1p(adata)
index = pd.MultiIndex.from_tuples(list(product(range(n_random_seeds), sig_lengths, n_cells)), names=["random_seed", "sig_length", "n_cell"])
results = pd.DataFrame(columns=SCORING_METHODS, index=index)

for n_cell in n_cells:
    print(n_cell)
    bdata = adata[np.random.choice(adata.obs_names, size=n_cell, replace=False)].copy()
    for sig_length in sig_lengths:
        sig = np.random.choice(adata.var_names, size=sig_length, replace=False)
        sig = list(sig)
        for method ,n_run in product(SCORING_METHODS, range(n_random_seeds)):
            start_time = datetime.now()
            if method.startswith("jasmine_scoring"):
                ssc.score_signature(bdata, gene_list=sig, method=method.rsplit("_", 1)[0], score_method=method.rsplit("_", 1)[1], score_name="foo")    
            else:
                ssc.score_signature(bdata, gene_list=sig, method=method, score_name="foo")
            time_elapsed = datetime.now() - start_time
            results.loc[(n_run, sig_length, n_cell), method] = time_elapsed

results.to_csv("results_scoring.csv")

In [None]:
df = pd.read_csv("results_scoring.csv", parse_dates=True, infer_datetime_format=True)

In [None]:
df = df[df["random_seed"] != 0]
df = df.set_index(["random_seed", "sig_length", "n_cell"])
df = df.applymap(lambda x: pd.to_timedelta(x)).applymap(lambda x: x.total_seconds())
df = df.groupby(level=[1, 2]).mean()

In [None]:
color_map = {k: sns.color_palette("tab10").as_hex()[n] for n, k in enumerate(df.columns)}
linestyle_tuple = [
     (0, (1, 1)),
     (5, (10, 3)),
     (0, (5, 5)),
     (0, (5, 1)),
     (0, (3, 5, 1, 5)),
     (0, (3, 1, 1, 1)),
     (0, (3, 5, 1, 5, 1, 5)),
     (0, (3, 1, 1, 1, 1, 1))]
ls_map = {k: linestyle_tuple[n] for n, k in enumerate(df.columns)}

In [None]:
import matplotlib.pyplot as plt
sig_lens = df.index.get_level_values(0).unique()
fig, axes = plt.subplots(2, len(sig_lens)// 2 + len(sig_lens) % 2, figsize=(8, 8))
for n_plot, (ax, sig_len) in enumerate(zip(axes.flatten(), sorted(sig_lens))):
    subset = df[df.index.get_level_values(0)==sig_len]
    
    for col in subset.columns:
        if n_plot != len(sig_lens)-1:
            ax.plot(subset.index.get_level_values(1), subset[col], color=color_map[col], linestyle=ls_map[col])
        else:
            ax.plot(subset.index.get_level_values(1), subset[col], color=color_map[col], label=col, linestyle=ls_map[col]) 
    ax.set_title(f"Run time for signature of length {sig_len}")
    ax.set_ylabel("Run time in seconds")
    ax.set_xlabel("Number of cells")
    ax.set_xticks([0, 50_000, 100_000, 150_000])
    ax.set_xticklabels([0, 50_000, 100_000, 150_000])
    
fig.legend(loc='center left', bbox_to_anchor=(1.0, .5))
fig.tight_layout()
fig.savefig("scoring_benchmark.pdf", bbox_inches="tight")

In [None]:
fig, ax = plt.subplots()
subset = df[df.index.get_level_values(1)==160_000]
for col in subset.columns:
    if n_plot != len(sig_lens)-1:
        ax.plot(subset.index.get_level_values(0), subset[col], color=color_map[col], linestyle=ls_map[col])
    else:
        ax.plot(subset.index.get_level_values(0), subset[col], color=color_map[col], label=col, linestyle=ls_map[col]) 
    ax.set_title("Run time for 160,000 cells")
    ax.set_ylabel("Run time in seconds")
    ax.set_xlabel("Signature length")
fig.legend(loc='center left', bbox_to_anchor=(1.0, .5))
fig.tight_layout()