In [None]:
import scanpy as sc
from tqdm import tqdm
import pandas as pd
import itertools
from scipy.stats import spearmanr
import random

In [None]:
base_path = 'path/to/base-dir'
filename = 'filename.h5ad'

adata = sc.read_h5ad(f'{base_path}/uncorrelated_genes_{filename}')

# Select simulation
adata.obs['Batch'] = 'sim_1'

In [None]:
sample = 'Sample'
run = 'Batch'

In [None]:
result_intra = []

# Iterate over samples
for samp in adata.obs[sample].unique():
    adata_samp = adata[adata.obs[sample] == samp]
    print(samp, adata_samp.shape[0])
    # Iterate over runs
    for ct in adata_samp.obs[run].unique():
        adata_samp_ct = adata_samp[adata_samp.obs[run] == ct]
        print("\t", ct, adata_samp_ct.shape[0])
        for i, j in itertools.combinations(range(adata_samp_ct.n_obs), r=2):
            result_intra.append(('intra', ct, spearmanr(adata_samp_ct.X[i], adata_samp_ct.X[j]).statistic))


In [None]:
len(result_intra)

In [None]:
result_inter = []
for _ in tqdm(range(1000)):
    for ct in adata.obs[run].unique():
        cells = []
        adata_ct = adata[adata.obs[run] == ct]
        for samp in adata_ct.obs[sample].unique():
            adata_ct_samp = adata_ct[adata_ct.obs[sample] == samp]
            cells.append(adata_ct_samp.X[random.randint(0, adata_ct_samp.n_obs - 1), :])
        for one, two in itertools.combinations(cells, r=2):
            result_inter.append(('inter', ct, spearmanr(one, two).statistic))

In [None]:
len(result_inter)

In [None]:
result = result_intra + result_inter

In [None]:
len(result)

In [None]:
pd.DataFrame(result, columns=['type', 'run', 'correlation']).to_csv(f'{base_path}/correlations_{filename.split(".")[0]}.tsv', sep='\t', index=False)