# Use method described in Jorstad 2023 Science

In [1]:
import os
from pathlib import Path
import matplotlib.pyplot as plt
root_dir = Path("/tscc/projects/ps-epigen/users/biy022/biccn/data/SNAREdata/scenicplus/")
os.chdir(root_dir / "L4_IT")
plot_dir = Path("/tscc/projects/ps-epigen/users/biy022/biccn/analysis/scenicplus/L4_IT/figures/")

In [2]:
import pandas as pd
pd.options.mode.chained_assignment = None 
import scanpy as sc
import decoupler as dc
import numpy as np
import scipy.stats as stats
import seaborn as sns

## RNA spearman / donor as replicate

In [3]:
L4_IT_rna = sc.read_h5ad("L4_IT_rna_matrix.h5ad")
L4_IT_rna_counts = L4_IT_rna.raw.to_adata()

In [4]:
L4_IT_rna_counts.obs["Donor_Region"] = \
    L4_IT_rna_counts.obs["PatientID"].astype("str") + "_" + L4_IT_rna_counts.obs["Region"].astype("str")

In [5]:
L4_IT_rna_region_counts = dc.get_pseudobulk(
    adata=L4_IT_rna_counts,
    sample_col="Donor_Region",
    groups_col=None,
    min_cells=0,
    min_counts=0,
    min_prop=0.0,
    min_smpls=0,
    remove_empty=True,
    mode="sum"
)

In [6]:
sc.pp.normalize_total(L4_IT_rna_region_counts, target_sum=1e6)
sc.pp.log1p(L4_IT_rna_region_counts, base=2)

  view_to_actual(adata)


In [7]:
region_align = ["A24", "A9", "FI", "M1C", "S1C", "MTG", "A1C", "AnG", "V1C"]
L4_IT_rna_region_counts.obs["Region"] = pd.Categorical(
    L4_IT_rna_region_counts.obs["Region"], categories=region_align
)

In [8]:
sorted_index = L4_IT_rna_region_counts.obs.sort_values(by="Region").index

In [9]:
b = L4_IT_rna_region_counts.obs.loc[sorted_index, "Region"].cat.codes.to_list()
data_matrix = L4_IT_rna_region_counts[sorted_index, :].X
corrs = np.zeros(data_matrix.shape[1])
pvalues = np.zeros(data_matrix.shape[1])

for i in range(0, data_matrix.shape[1]):
    a = data_matrix[:, i]
    corr, pvalue = stats.spearmanr(a, b)
    corrs[i] = corr
    pvalues[i] = pvalue

In [10]:
padjusted = stats.false_discovery_control(pvalues, method="bh")

In [11]:
spearman_result_all = pd.DataFrame({
    "gene": L4_IT_rna_region_counts.var.index.to_list(),
    "corr": corrs,
    "p_value": pvalues,
    "p_adjusted": padjusted,
})

In [12]:
L4_IT_rna_region_counts_mid = L4_IT_rna_region_counts[
    ~(L4_IT_rna_region_counts.obs["Region"].isin(["A24", "V1C"]))].copy()
non_zero_genes = np.any(L4_IT_rna_region_counts_mid.X != 0, axis=0)
L4_IT_rna_region_counts_mid = L4_IT_rna_region_counts_mid[:, non_zero_genes].copy()
region_align_mid = ["A9", "FI", "M1C", "S1C", "MTG", "A1C", "AnG"]
L4_IT_rna_region_counts_mid.obs["Region"] = pd.Categorical(
    L4_IT_rna_region_counts_mid.obs["Region"], categories=region_align_mid
)

In [13]:
sorted_index = L4_IT_rna_region_counts_mid.obs.sort_values(by="Region").index

In [14]:
b = L4_IT_rna_region_counts_mid.obs.loc[sorted_index, "Region"].cat.codes.to_list()
data_matrix = L4_IT_rna_region_counts_mid[sorted_index, :].X
corrs = np.zeros(data_matrix.shape[1])
pvalues = np.zeros(data_matrix.shape[1])

for i in range(0, data_matrix.shape[1]):
    a = data_matrix[:, i]
    corr, pvalue = stats.spearmanr(a, b)
    corrs[i] = corr
    pvalues[i] = pvalue

In [15]:
padjusted = stats.false_discovery_control(pvalues, method="bh")

In [16]:
spearman_result_mid = pd.DataFrame({
    "gene": L4_IT_rna_region_counts_mid.var.index.to_list(),
    "corr": corrs,
    "p_value": pvalues,
    "p_adjusted": padjusted,
})

In [17]:
corr_set1 = spearman_result_all[spearman_result_all["corr"] > 0.7]["gene"].to_list()
corr_set2 = spearman_result_mid[spearman_result_mid["corr"] > 0.5]["gene"].to_list()
corr_set = list(set(corr_set1) & set(corr_set2))
with open("rostral_caudal_spearman/positive_genes_all_7e-1_mid7_5e-1.tsv", "w") as fout:
    for gene in corr_set:
        fout.write("{}\n".format(gene))

In [18]:
corr_set1 = spearman_result_all[spearman_result_all["corr"] < -0.7]["gene"].to_list()
corr_set2 = spearman_result_mid[spearman_result_mid["corr"] < -0.5]["gene"].to_list()
corr_set = list(set(corr_set1) & set(corr_set2))
with open("rostral_caudal_spearman/negative_genes_all_7e-1_mid7_5e-1.tsv", "w") as fout:
    for gene in corr_set:
        fout.write("{}\n".format(gene))