In [None]:
import pandas as pd
import os,sys
import matplotlib.pyplot as plt
sys.path.append(r'{}'.format(os.path.abspath(os.pardir)))

from wholebrain_tools import aba, genes, dataIO
import wholebrain_tools.stats as st 
import wholebrain_tools.graphics as gt

paths = dataIO.pathParser()

nodes_path = paths.structures
A = aba.Atlas(nodes = nodes_path)
GM = genes.GeneManager()
DFM = aba.AnatomyDataFrameManager(A)

# Load the data

In [None]:
# --------------------------------------------------------------------
ish_path = paths.abaIsh_datasets
searchPath = paths.alldata
correlationPath = paths.corrdata_genes
# --------------------------------------------------------------------

# Load WFA data
wfa = dataIO.allMiceRegions(searchPath=searchPath, channelName='wfa', normCellIntens=True)
wfa = DFM.multiIndexDf_from_fineDf(wfa, verbose=False)
wfa = DFM.regionsDf_to_mid(wfa, verbose=False, normalize= True)

wfa_en = DFM.midDf_to_avgVector(wfa,'energy',exclude_last=5)
wfa_diff = DFM.midDf_to_avgVector(wfa,'diffuseFluo', exclude_last=5)

# Load PV data
pv = dataIO.allMiceRegions(searchPath=searchPath, channelName='pv', normCellIntens=True)
pv = DFM.multiIndexDf_from_fineDf(pv, verbose=False)
pv = DFM.regionsDf_to_mid(pv, verbose=False, normalize= True)

pv_en = DFM.midDf_to_avgVector(pv,'energy', exclude_last=5)

# LOAD GENE EXPRESSION DATA
ish_en =  pd.read_csv(os.path.join(ish_path,"gene_expression_ABA_energy.csv"), index_col=0)
ish_en.columns = pd.to_numeric(ish_en.columns)

# LOAD CORRELATION DFs
corr_wfa_en = pd.read_csv(os.path.join(paths.corrdata_genes,"pnnEn_ishEn_corrdata.csv"), index_col=0)
corr_wfa_diff = pd.read_csv(os.path.join(paths.corrdata_genes,"wfaDiff_ishEn_corrdata.csv"), index_col=0)
corr_pv_en = pd.read_csv(os.path.join(paths.corrdata_genes,"pvEn_ishEn_corrdata.csv"), index_col=0)

# Correlation with marker genes

## Marker geners PNN - PNN Energy

In [None]:
_, axs = plt.subplots(2,3, figsize = (10,7))
axs = axs.flatten()
# Select correlation data for all the marker genes
markersCorrData = corr_wfa_en.loc[GM.acronyms_to_ids(genes.pnn_markers)]

for i, (geneId, row )in enumerate(markersCorrData.iterrows()):
    # Select ISH data for this gene
    expression = ish_en.loc[geneId]
    # Merge ISH with PNN Energy
    mergedDf = pd.concat([expression, wfa_en], axis=1).dropna(how='any', axis=0)
    # Plot correlation
    gt.correlationWithGene(A,
        x = mergedDf.iloc[:,0],
        y = mergedDf.iloc[:,1], 
        pval = row['p_spearman_fdr'],
        corr_spearman = row['corr_spearman'],
        fitLine = False, 
        ax=axs[i],
        title = row['gene_acronym'], 
        xlabel = 'Gene Expression\nEnergy (A.U.)'if i == 3 else '', 
        ylabel = 'PNN Energy (A.U.)'if i == 3 else '',
        fontScaling=.8)
        
plt.subplots_adjust(hspace=0.35)
# plt.savefig(f"pnnMarkers_Energy.svg", bbox_inches="tight")

## Marker genes PNN - WFA Diffuse Fluorescence

In [None]:
_, axs = plt.subplots(2,3, figsize = (10,7))
axs = axs.flatten()
# Select correlation data for all the marker genes
markersCorrData = corr_wfa_diff.loc[GM.acronyms_to_ids(genes.pnn_markers)]

for i, (geneId, row )in enumerate(markersCorrData.iterrows()):
    # Select ISH data for this gene
    expression = ish_en.loc[geneId]
    # Merge ISH with PNN Energy
    mergedDf = pd.concat([expression, wfa_diff], axis=1).dropna(how='any', axis=0)
    # Plot correlation
    gt.correlationWithGene(A,
        x = mergedDf.iloc[:,0],
        y = mergedDf.iloc[:,1], 
        pval = row['p_spearman_fdr'],
        corr_spearman = row['corr_spearman'],
        fitLine = False, 
        ax=axs[i],
        title = row['gene_acronym'], 
        xlabel = 'Gene Expression\nEnergy (A.U.)'if i == 3 else '', 
        ylabel = 'WFA Diffuse\nFluorescence (A.U.)'if i == 3 else '',
        fontScaling=.85)
        

# set the spacing between subplots
plt.subplots_adjust(hspace=0.35)
# plt.savefig(f"pnnMarkers_diffuseFluorescence.svg", bbox_inches="tight")

## Marker genes PV - PV Energy

In [None]:
_, axs = plt.subplots(2,3, figsize = (10,7))
axs = axs.flatten()
# Select correlation data for all the marker genes
markersCorrData = corr_pv_en.loc[GM.acronyms_to_ids(genes.pv_markers)]

for i, (geneId, row )in enumerate(markersCorrData.iterrows()):
    # Select ISH data for this gene
    expression = ish_en.loc[geneId]
    # Merge ISH with PNN Energy
    mergedDf = pd.concat([expression, pv_en], axis=1).dropna(how='any', axis=0)
    # Plot correlation
    gt.correlationWithGene(A,
        x = mergedDf.iloc[:,0],
        y = mergedDf.iloc[:,1], 
        pval = row['p_spearman_fdr'],
        corr_spearman = row['corr_spearman'],
        fitLine = False, 
        ax=axs[i],
        title = row['gene_acronym'], 
        xlabel = 'Gene Expression\nEnergy (A.U.)'if i == 3 else '', 
        ylabel = 'PV Energy (A.U.)'if i == 3 else '',
        fontScaling=.8)
        
plt.subplots_adjust(hspace=0.35)
# plt.savefig(f"pvMarkers_Energy.svg", bbox_inches="tight")