# Correlation with distance from rostral end

In [1]:
import os
from pathlib import Path
import matplotlib.pyplot as plt
root_dir = Path("/tscc/projects/ps-epigen/users/biy022/biccn/data/SNAREdata/scenicplus/")
os.chdir(root_dir / "L4_IT")
plot_dir = Path("/tscc/projects/ps-epigen/users/biy022/biccn/analysis/scenicplus/L4_IT/figures/")

In [2]:
import pandas as pd
pd.options.mode.chained_assignment = None 
import scanpy as sc
import decoupler as dc
import numpy as np
import scipy.stats as stats
import seaborn as sns

In [3]:
region_align = ["A9", "A24", "FI", "M1C", "MTG", "S1C", "A1C", "AnG", "V1C"]
region_dist = [35, 60.5, 69, 87, 90.5, 94, 96.5, 133, 151]

In [4]:
L4_IT_rna = sc.read_h5ad("L4_IT_rna_matrix.h5ad")
L4_IT_rna_counts = L4_IT_rna.raw.to_adata()

In [5]:
L4_IT_rna_region_counts = dc.get_pseudobulk(
    adata=L4_IT_rna_counts,
    sample_col="Region",
    groups_col=None,
    min_cells=0,
    min_counts=0,
    min_prop=0.0,
    min_smpls=0,
    remove_empty=True,
    mode="sum"
)

In [6]:
sc.pp.normalize_total(L4_IT_rna_region_counts, target_sum=1e6)
sc.pp.log1p(L4_IT_rna_region_counts, base=2)

  view_to_actual(adata)


In [7]:
data_matrix = L4_IT_rna_region_counts[region_align, :].X

In [8]:
import scipy.stats as stats

In [9]:
b = np.array(region_dist)
# b = np.arange(0, len(region_align))
corrs = np.zeros(data_matrix.shape[1])
pvalues = np.zeros(data_matrix.shape[1])

for i in range(0, data_matrix.shape[1]):
    a = data_matrix[:, i]
    corr, pvalue = stats.pearsonr(a, b)
    corrs[i] = corr
    pvalues[i] = pvalue

In [10]:
padjusted = stats.false_discovery_control(pvalues, method="bh")

In [11]:
pearson_result = pd.DataFrame({
    "gene": L4_IT_rna_region_counts.var.index.to_list(),
    "corr": corrs,
    "p_value": pvalues,
    "p_adjusted": padjusted,
})

In [12]:
pearson_result[pearson_result["p_adjusted"] < 0.05].shape

(0, 4)

In [13]:
positive_sig_genes = pearson_result[
    (pearson_result["corr"] > 0) & (pearson_result["p_adjusted"] < 0.05)]["gene"].to_list()
with open("rostral_caudal_dist_pearson/positive_genes_bh_5e-2.tsv", "w") as fout:
    for gene in positive_sig_genes:
        fout.write("{}\n".format(gene))

In [14]:
negative_sig_genes = pearson_result[
    (pearson_result["corr"] < 0) & (pearson_result["p_adjusted"] < 0.05)]["gene"].to_list()
with open("rostral_caudal_dist_pearson/negative_genes_bh_5e-2.tsv", "w") as fout:
    for gene in negative_sig_genes:
        fout.write("{}\n".format(gene))

In [15]:
L4_IT_rna_region_counts.obs["Region"] = pd.Categorical(
    L4_IT_rna_region_counts.obs["Region"], categories=region_align)