In [1]:
from SIENA.siena import *
from helper import *

To assess the mean AUC of SIENA, we need to repeat the described pipeline multiple times and average the AUC obtained in each run. This is the same process as the one used in the paper to calculate SIENA's AUC means.
Here we exemplify the procedure for the islam dataset using only 2 runs.

In [5]:
n_times = 2 #number of runs
n_epochs = 1000
path = './data/Islam'
trueDEG = pd.read_csv(path + '/trueDEG.txt', sep=",", header=0)
data = CSVDataset(path, "Islam_treated.csv", labels_file="Islam_labels.csv")
aucs = []
for i in range(n_times):
    model = ModelDE(data.X,data.cell_types,data.labels, zero_inflation=False, dispersion=True, use_log=True)
    model.approximate_model(iters=n_epochs)
    scores = model.differential_expression_scores("MEF","ESC", n_samples=100)
    res = model.differentially_expressed_genes(data.gene_names,scores)
    auc_myModel = calculate_auc(res['factor'], res["Gene"], trueDEG["SYMBOL"])
    aucs.append(auc_myModel)
print(aucs)

Using log scalings
Minisamples: 1
1000/1000 [100%] ██████████████████████████████ Elapsed: 107s | Loss: 17661.345
rho1_values shape: (100, 44, 6757)
rho2_values shape: (100, 48, 6757)
Using log scalings
Minisamples: 1
1000/1000 [100%] ██████████████████████████████ Elapsed: 107s | Loss: 17612.497
rho1_values shape: (100, 44, 6757)
rho2_values shape: (100, 48, 6757)
[0.6890521447545355, 0.6885060811683533]


For the synthetic data the procedure is almost the same, with some small changes.
Here we exemplify the procedure with the 50-50-50-50 dataset.

In [9]:
n_times = 2 #number of runs
n_epochs = 1000
path = './data/Sinteticos/50-50-50-50-400-400'
trueDEG = pd.read_csv(path + '/trueDEG.txt', sep=",", header=0)
data = CSVDataset(path, "synthetic.csv", gene_by_cell=True, labels_file="labels.csv")
aucs = []
for i in range(n_times):
    model = ModelDE(data.X,data.cell_types,data.labels, zero_inflation=False, dispersion=False, use_log=True)
    model.approximate_model(iters=n_epochs)
    scores = model.differential_expression_scores("B","A", n_samples=100)
    res = model.differentially_expressed_genes(data.gene_names,scores)
    auc_myModel = calculate_auc(res['factor'], res["Gene"], trueDEG["Gene"])
    aucs.append(auc_myModel)
print(aucs)

Using log scalings
No Dispersion
Minisamples: 1
1000/1000 [100%] ██████████████████████████████ Elapsed: 82s | Loss: 24166.068
rho1_values shape: (100, 500, 1000)
rho2_values shape: (100, 500, 1000)
Using log scalings
No Dispersion
Minisamples: 1
1000/1000 [100%] ██████████████████████████████ Elapsed: 82s | Loss: 24208.380
rho1_values shape: (100, 500, 1000)
rho2_values shape: (100, 500, 1000)
[0.9232250000000001, 0.9193874999999999]
