In [23]:
import json
import math

import decoupler as dc
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import scanpy as sc
from scipy.stats import rankdata
import seaborn as sns
from sklearn.metrics import roc_curve, auc

In [None]:
# Generate simulated data
!uv run src/sim_data_generator_optimized.py simulated_data/parameters.json

In [2]:
# Run kale without weights
print("\nRunning KALE for test case")
!uv run src/kale.py --gene_exp_file simulated_data/simulated_scRNASeq_data.tsv --prior_file simulated_data/simulated_prior_data.tsv --output_file simulated_data/_kale_scores_test1.tsv --pvalue_output_file simulated_data/_kale_pvalues_test1.tsv --ignore_zeros False --cores 8 --method rank_of_ranks --min_targets 1 --weighted False


Running KALE for test case
Using raw gene expression as input for per-cell ranking...
Starting TF activity using 8 cores.
Running in parallel with CORES_USED=8.
Processing cells in parallel:   0%|                    | 0/2000 [00:00<?, ?it/s][Parallel(n_jobs=8)]: Using backend LokyBackend with 8 concurrent workers.
Processing cells in parallel:   1%|           | 16/2000 [00:01<02:08, 15.48it/s][Parallel(n_jobs=8)]: Done   34 out of 2000 | elapsed:    1.1s
Processing cells in parallel: 100%|███████| 2000/2000 [00:01<00:00, 1199.68it/s]
[Parallel(n_jobs=8)]: Done 1824 out of 2000 | elapsed:    2.2s
[Parallel(n_jobs=8)]: Done 1985 out of 2000 | elapsed:    2.3s remaining:    0.0s
[Parallel(n_jobs=8)]: Done 2000 out of 2000 | elapsed:    2.3s finished

Aggregating results...
kale completed
Kale TF activity scores and p-values have been saved.


In [3]:
# Run with a weighted option 2 with power factor 1
print("\nRunning KALE with weights for test case")
!uv run src/kale.py --gene_exp_file simulated_data/simulated_scRNASeq_data.tsv --prior_file simulated_data/simulated_prior_data.tsv --output_file simulated_data/_kale_scores_test2.tsv --pvalue_output_file simulated_data/_kale_pvalues_test2.tsv --ignore_zeros False --cores 8 --method rank_of_ranks --min_targets 1 --weighted True --weighted_power_factor 1


Running KALE with weights for test case
Using raw gene expression as input for per-cell ranking...
Calculating weighted gene expression as input for per-cell ranking...
Starting TF activity using 8 cores.
Running in parallel with CORES_USED=8.
Processing cells in parallel:   0%|                    | 0/2000 [00:00<?, ?it/s][Parallel(n_jobs=8)]: Using backend LokyBackend with 8 concurrent workers.
Processing cells in parallel:   2%|▏          | 40/2000 [00:01<00:43, 45.23it/s][Parallel(n_jobs=8)]: Done   34 out of 2000 | elapsed:    1.1s
Processing cells in parallel:  52%|████▏   | 1040/2000 [00:04<00:03, 292.64it/s][Parallel(n_jobs=8)]: Done  976 out of 2000 | elapsed:    4.5s
Processing cells in parallel: 100%|████████| 2000/2000 [00:07<00:00, 263.20it/s]
[Parallel(n_jobs=8)]: Done 2000 out of 2000 | elapsed:    8.0s finished

Aggregating results...
kale completed
--------------------------------------------------------------------------------------------------------------

In [9]:
# 5. Prior Knowledge Network
net_file = "simulated_data/simulated_prior_data.tsv"
effect_map = {"upregulates-expression": 1, "downregulates-expression": -1}
net = pd.read_csv(
    net_file,
    sep="\t",
    names=["source", "weight", "target"],
    usecols=[0, 1, 2],
    converters={"weight": effect_map.get}
)[["source", "target", "weight"]]


# 4. Gene Expression data
gene_exp_file = "simulated_data/simulated_scRNASeq_data.tsv"
gene_exp = pd.read_csv(gene_exp_file, sep="\t", index_col=0)
adata = sc.AnnData(gene_exp)


methods_to_run = ["viper", "mlm"]
dc.mt.decouple(adata, net, tmin=1, methods=methods_to_run)

Saved viper scores to simulated_data/_viper_scores_simulated_data.tsv
Saved mlm scores to simulated_data/_mlm_scores_simulated_data.tsv


In [19]:
viper_scores_df = adata.obsm[f"score_viper"]
mlm_scores_df = adata.obsm[f"score_mlm"]

In [27]:
kale_scores_df1 = pd.read_csv("simulated_data/_kale_scores_test1.tsv", sep="\t", index_col=0)
kale_scores_df2 = pd.read_csv("simulated_data/_kale_scores_test2.tsv", sep="\t", index_col=0)

print(kale_scores_df1.isna().sum().sum())
print(kale_scores_df2.isna().sum().sum())

0
0


In [25]:
tfs = kale_scores_df1.columns.unique().tolist()
cells = kale_scores_df1.index.unique().tolist()

result = {}

for tf in tfs:
    kale_scores = pd.DataFrame(kale_scores_df1[tf], index=cells).sort_values(by=tf, ascending=True)
    viper_scores = pd.DataFrame(viper_scores_df[tf], index=cells).sort_values(by=tf, ascending=True)
    mlm_scores = pd.DataFrame(mlm_scores_df[tf], index=cells).sort_values(by=tf, ascending=True)
    kale_scores2 = pd.DataFrame(kale_scores_df2[tf], index=cells).sort_values(by=tf, ascending=True)

    kale_scores["marked"] = True
    viper_scores["marked"] = True
    mlm_scores["marked"] = True
    kale_scores2["marked"] = True

    # Rank the scores between 0 and 1
    kale_scores['rank'] = (rankdata(kale_scores[tf], method='average') - 0.5) / len(kale_scores)
    viper_scores['rank'] = (rankdata(viper_scores[tf], method='average') - 0.5) / len(viper_scores)
    mlm_scores['rank'] = (rankdata(mlm_scores[tf], method='average') - 0.5) / len(mlm_scores)
    kale_scores2['rank'] = (rankdata(kale_scores2[tf], method='average') - 0.5) / len(kale_scores2)

    # Calculated mean rank of marked cells
    mean_rank_kale = kale_scores[kale_scores['marked'] == True]['rank'].mean()
    mean_rank_viper = viper_scores[viper_scores['marked'] == True]['rank'].mean()
    mean_rank_mlm = mlm_scores[mlm_scores['marked'] == True]['rank'].mean()
    mean_rank_kale2 = kale_scores2[kale_scores2['marked'] == True]['rank'].mean()

    print(f"{tf} ------------------------------")
    print(f"\tKALE Mean rank: {mean_rank_kale}")
    print(f"\tViper Mean rank: {mean_rank_viper}")
    print(f"\tMLM Mean rank: {mean_rank_mlm}")
    print(f"\tKALE with weights Mean rank: {mean_rank_kale2}")

    break

TF_1 ------------------------------
	KALE Mean rank: 0.5
	Viper Mean rank: 0.5
	MLM Mean rank: 0.5
	KALE with weights Mean rank: 0.5


In [None]:

kale_scores