In [1]:
import os              
os.environ['PYTHONHASHSEED'] = '0'
import pandas as pd                                                    
import numpy as np                                                     
import scanpy as sc                                                                                 
from time import time                                                       
import sys
import matplotlib

import matplotlib.pyplot as plt
from anndata import AnnData, read_h5ad, concat
from tqdm import tqdm
import scipy
import scipy.stats as ss

In [2]:
adata = read_h5ad("norm_exp.h5ad")

  utils.warn_names_duplicates("obs")


In [3]:
def change_name(name):
    if "-" in name:
        name = name[(name.find("-") + 1):]
    return name

adata.obs['area'] = list(map(change_name, adata.obs['area']))
adata = adata[adata.obs['area'].isin(['PFC', 'V2', 'Par', 'V1', 'M1', 'Temp'])]
adata = adata[adata.obs['gw'].isin(['gw20', 'gw22'])]
adata_sub = adata[adata.obs['H1_annotation'].isin(['EN-IT', 'EN-ET'])].copy()

In [4]:
def expr_tot(adata):
    area_uniq = np.unique(adata.obs['area'])
    area_expr = []
    for i in range(len(area_uniq)):
        adata_area = adata[adata.obs['area'] == area_uniq[i]]
        avg_area = adata_area.X.mean(axis = 0)
        area_expr.append(avg_area)
    area_expr = np.array(area_expr).T
    area_expr = pd.DataFrame(area_expr)
    area_expr.index = adata.var.index
    area_expr.columns = area_uniq
    return area_expr

def prop_tot(adata):
    area_uniq = np.unique(adata.obs['area'])
    area_expr = []
    for i in range(len(area_uniq)):
        adata_area = adata[adata.obs['area'] == area_uniq[i]]
        avg_area = np.mean(adata_area.X != 0, axis=0)
        area_expr.append(avg_area)
    area_expr = np.array(area_expr).T
    area_expr = pd.DataFrame(area_expr)
    area_expr.index = adata.var.index
    area_expr.columns = area_uniq
    return area_expr

In [8]:
zs_tot = expr_tot(adata_sub)
zs_tot = zs_tot[['PFC', 'M1', 'Par', 'Temp', 'V2', 'V1']]

perc_tot = prop_tot(adata_sub)
perc_tot = perc_tot[['PFC', 'M1', 'Par', 'Temp', 'V2', 'V1']]

In [85]:
adata_sub.obs['direction'] = "-1"
adata_sub.obs.loc[adata_sub.obs['area'].isin(['PFC', 'M1']), 'direction'] = "A"
adata_sub.obs.loc[adata_sub.obs['area'].isin(['Par', 'V2']), 'direction'] = "P"
adata_sub_de = adata_sub[adata_sub.obs['direction'] != '-1'].copy()

sc.tl.rank_genes_groups(adata_sub_de, 'direction', method='t-test')

In [48]:
result_ap = adata_sub_de.uns['rank_genes_groups']
result_ap = pd.DataFrame(np.array(result_ap['names'][:50]))

In [49]:
adata_sub.obs['direction'] = "-1"
adata_sub.obs.loc[adata_sub.obs['area'].isin(['Temp']), 'direction'] = "Temp"
adata_sub.obs.loc[~adata_sub.obs['area'].isin(['Temp']), 'direction'] = "N"
adata_sub_de = adata_sub[adata_sub.obs['direction'] != '-1'].copy()

sc.tl.rank_genes_groups(adata_sub_de, 'direction', method='t-test')

In [50]:
result_t = adata_sub_de.uns['rank_genes_groups']
result_t = pd.DataFrame(np.array(result_t['names'][:20]))

In [54]:
os.makedirs("result/DEG", exist_ok=True)
perc_tot.loc[result_ap['A'], :].to_csv("result/DEG/prop_A.csv")
perc_tot.loc[result_ap['P'], :].to_csv("result/DEG/prop_P.csv")
zs_tot.loc[result_ap['A'], :].to_csv("result/DEG/expr_A.csv")
zs_tot.loc[result_ap['P'], :].to_csv("result/DEG/expr_P.csv")

perc_tot.loc[result_t['Temp'], :].to_csv("result/DEG/prop_T.csv")
zs_tot.loc[result_t['Temp'], :].to_csv("result/DEG/expr_T.csv")

In [19]:
h1_class = ["EN-Mig", "RG", "IPC", "IN"]

for j in range(len(h1_class)):
    zs_tot = expr_tot(adata[adata.obs['H1_annotation'] == h1_class[j]])
    zs_tot = zs_tot[['PFC', 'M1', 'Par', 'Temp', 'V2', 'V1']]

    perc_tot = prop_tot(adata[adata.obs['H1_annotation'] == h1_class[j]])
    perc_tot = perc_tot[['PFC', 'M1', 'Par', 'Temp', 'V2', 'V1']]
    zs_tot.to_csv(f"result/expr_{h1_class[j]}.csv")
    perc_tot.to_csv(f"result/prop_{h1_class[j]}.csv")