In [None]:
import pandas as pd 
import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib import colors, cm
from scipy.stats import spearmanr, zscore
from statsmodels.stats.multitest import multipletests
from matplotlib.ticker import MaxNLocator
import sys, os
import pandas as pd 
import numpy as np


sys.path.append(r'C:\Users\Valentino\Documents\MATLAB\PNN_wholeBrain\analysis')
import GraphicTool as gt
from geneTool import GeneManager, pnn_markers, pv_markers
from AbaTool import Atlas, AnatomyDataFrameManager
import dataIO

# You can provide a "structures.json" file. If you dont, it will download one
structuresFile = r"C:\Users\Valentino\Documents\MATLAB\PNN_wholeBrain\analysis\structures.json"
genes = r'C:\Users\Valentino\Documents\MATLAB\PNN_wholeBrain\analysis\genes.json'
A = Atlas(nodes=structuresFile)
DFM = AnatomyDataFrameManager(A)
GM = GeneManager(path = genes)

# Load the data

In [None]:
# --------------------------------------------------------------------
ish_path = r"D:\proj_PNN-Atlas\ABAgeneData"
searchPath = r'D:\proj_PNN-Atlas\RESULTS\allData'
correlationPath = r"D:\proj_PNN-Atlas\RESULTS\genecorrelationISH\correlations"
# --------------------------------------------------------------------

# Load WFA data
wfa = dataIO.allMiceRegions(searchPath=searchPath, channelName='wfa', normCellIntens=True)
wfa = DFM.multiIndexDf_from_fineDf(wfa, verbose=False)
wfa = DFM.regionsDf_to_mid(wfa, verbose=False, normalize= True)

wfa_en = DFM.midDf_to_avgVector(wfa,'energy',exclude_last=5,group='CTR')
wfa_diff = DFM.midDf_to_avgVector(wfa,'diffuseFluo', exclude_last=5, group='CTR')

# Load PV data
pv = dataIO.allMiceRegions(searchPath=searchPath, channelName='pv', normCellIntens=True)
pv = DFM.multiIndexDf_from_fineDf(pv, verbose=False)
pv = DFM.regionsDf_to_mid(pv, verbose=False, normalize= True)

pv_en = DFM.midDf_to_avgVector(pv,'energy', exclude_last=5, group='CTR')

# LOAD GENE EXPRESSION DATA
ish_en =  pd.read_csv(os.path.join(ish_path,"gene_expression_ABA_energy.csv"), index_col=0)
ish_en.columns = pd.to_numeric(ish_en.columns)

# LOAD CORRELATION DFs
corr_wfa_en = pd.read_csv(r"D:\proj_PNN-Atlas\RESULTS\genecorrelationISH\correlations\pnnEn_ishEn_corrdata.csv", index_col=0)
corr_wfa_diff = pd.read_csv(r"D:\proj_PNN-Atlas\RESULTS\genecorrelationISH\correlations\wfaDiff_ishEn_corrdata.csv", index_col=0)
corr_pv_en = pd.read_csv(r"D:\proj_PNN-Atlas\RESULTS\genecorrelationISH\correlations\pvEn_ishEn_corrdata.csv", index_col=0)

# Correlation with marker genes

## Marker geners PNN - PNN Energy

In [None]:
_, axs = plt.subplots(2,3, figsize = (10,7))
axs = axs.flatten()
# Select correlation data for all the marker genes
markersCorrData = corr_wfa_en.loc[GM.acronyms_to_ids(pnn_markers)]

for i, (geneId, row )in enumerate(markersCorrData.iterrows()):
    # Select ISH data for this gene
    expression = ish_en.loc[geneId]
    # Merge ISH with PNN Energy
    mergedDf = pd.concat([expression, wfa_en], axis=1).dropna(how='any', axis=0)
    # Plot correlation
    gt.correlationWithGene(A,
        x = mergedDf.iloc[:,0],
        y = mergedDf.iloc[:,1], 
        pval = row['p_spearman_fdr'],
        corr_spearman = row['corr_spearman'],
        fitLine = False, 
        ax=axs[i],
        title = row['gene_acronym'], 
        xlabel = 'Gene Expression\nEnergy (A.U.)'if i == 3 else '', 
        ylabel = 'PNN Energy (A.U.)'if i == 3 else '',
        fontScaling=.8)
        
plt.subplots_adjust(hspace=0.35)
# plt.savefig(f"pnnMarkers_Energy.svg", bbox_inches="tight")

## Marker genes PNN - WFA Diffuse Fluorescence

In [None]:
_, axs = plt.subplots(2,3, figsize = (10,7))
axs = axs.flatten()
# Select correlation data for all the marker genes
markersCorrData = corr_wfa_diff.loc[GM.acronyms_to_ids(pnn_markers)]

for i, (geneId, row )in enumerate(markersCorrData.iterrows()):
    # Select ISH data for this gene
    expression = ish_en.loc[geneId]
    # Merge ISH with PNN Energy
    mergedDf = pd.concat([expression, wfa_diff], axis=1).dropna(how='any', axis=0)
    # Plot correlation
    gt.correlationWithGene(A,
        x = mergedDf.iloc[:,0],
        y = mergedDf.iloc[:,1], 
        pval = row['p_spearman_fdr'],
        corr_spearman = row['corr_spearman'],
        fitLine = False, 
        ax=axs[i],
        title = row['gene_acronym'], 
        xlabel = 'Gene Expression\nEnergy (A.U.)'if i == 3 else '', 
        ylabel = 'WFA Diffuse\nFluorescence (A.U.)'if i == 3 else '',
        fontScaling=.85)
        

# set the spacing between subplots
plt.subplots_adjust(hspace=0.35)
# plt.savefig(f"pnnMarkers_diffuseFluorescence.svg", bbox_inches="tight")

## Marker genes PV - PV Energy

In [None]:
_, axs = plt.subplots(2,3, figsize = (10,7))
axs = axs.flatten()
# Select correlation data for all the marker genes
markersCorrData = corr_pv_en.loc[GM.acronyms_to_ids(pv_markers)]

for i, (geneId, row )in enumerate(markersCorrData.iterrows()):
    # Select ISH data for this gene
    expression = ish_en.loc[geneId]
    # Merge ISH with PNN Energy
    mergedDf = pd.concat([expression, pv_en], axis=1).dropna(how='any', axis=0)
    # Plot correlation
    gt.correlationWithGene(A,
        x = mergedDf.iloc[:,0],
        y = mergedDf.iloc[:,1], 
        pval = row['p_spearman_fdr'],
        corr_spearman = row['corr_spearman'],
        fitLine = False, 
        ax=axs[i],
        title = row['gene_acronym'], 
        xlabel = 'Gene Expression\nEnergy (A.U.)'if i == 3 else '', 
        ylabel = 'PV Energy (A.U.)'if i == 3 else '',
        fontScaling=.8)
        
plt.subplots_adjust(hspace=0.35)
# plt.savefig(f"pvMarkers_Energy.svg", bbox_inches="tight")

# Correlation with all genes

In [None]:
pos_col = np.array([107, 5, 4])/255
neg_col = np.array([73, 88, 103])/255

# pos_col = cm.RdGy_r(.8)
# neg_col = cm.RdGy_r(.2)

filt = (corr_wfa_en['p_spearman_fdr']<0.01) & (corr_wfa_en['corr_spearman']>0)
positiveN = filt.sum()
corr_wfa_en.loc[filt,'status'] = 'correlated'
filt = (corr_wfa_en['p_spearman_fdr']<0.01) & (corr_wfa_en['corr_spearman']<0)
negativeN = filt.sum()
corr_wfa_en.loc[filt,'status'] = 'anticorrelated'
filt = (corr_wfa_en['p_spearman_fdr']>0.01)
corr_wfa_en.loc[filt,'status'] = 'uncorrelated'

f, ax = plt.subplots(figsize=(7, 5), dpi=120)
plt.title("Correlation with PNN Energy")
sns.histplot(
    corr_wfa_en,
    x="corr_spearman", hue="status",
    multiple="stack",
    palette={'uncorrelated':'lightgray', 'anticorrelated':neg_col, 'correlated':pos_col},
    edgecolor=".5",
    linewidth=0,
    legend=False,
    alpha=.9,bins = 100)

# Customize
X = 0.5
Y = ax.get_ylim()[1] - 0.8*(ax.get_ylim()[1]-ax.get_ylim()[0])
msg = f'Correlated:\n{positiveN} genes'
plt.text(X,Y, msg,fontsize=12,horizontalalignment='center')
X = -0.5
msg =f'Anticorrelated:\n{negativeN} genes'
plt.text(X,Y, msg,fontsize=12,horizontalalignment='center')
plt.xlim([-.7,.7])
plt.xlabel('Spearman r'), plt.ylabel('Number of genes')
sns.despine(top = True, right = True)

# plt.savefig("Corr_PNNenergy.svg", bbox_inches="tight")

In [None]:
pos_col = np.array([107, 5, 4])/255
neg_col = np.array([73, 88, 103])/255

# pos_col = cm.RdGy_r(1)
# neg_col = cm.RdGy_r(.2)

filt = (corr_wfa_diff['p_spearman_fdr']<0.01) & (corr_wfa_diff['corr_spearman']>0)
positiveN = filt.sum()
corr_wfa_diff.loc[filt,'status'] = 'correlated'
filt = (corr_wfa_diff['p_spearman_fdr']<0.01) & (corr_wfa_diff['corr_spearman']<0)
negativeN = filt.sum()
corr_wfa_diff.loc[filt,'status'] = 'anticorrelated'
filt = (corr_wfa_diff['p_spearman_fdr']>0.01)
corr_wfa_diff.loc[filt,'status'] = 'uncorrelated'

f, ax = plt.subplots(figsize=(7, 5), dpi=120)
plt.title("Correlation with WFA Diffuse Fluorescence ")
sns.histplot(
    corr_wfa_diff,
    x="corr_spearman", hue="status",
    multiple="stack",
    palette={'uncorrelated':'lightgray', 'anticorrelated':neg_col, 'correlated':pos_col},
    edgecolor=".5",
    linewidth=0,
    legend=False,
    alpha=.9,bins = 100)

# Customize
X = 0.5
Y = ax.get_ylim()[1] - 0.8*(ax.get_ylim()[1]-ax.get_ylim()[0])
msg = f'Correlated:\n{positiveN} genes'
plt.text(X,Y, msg,fontsize=12,horizontalalignment='center')
X = -0.5
msg = f'Anticorrelated:\n{negativeN} genes'
plt.text(X,Y, msg,fontsize=12,horizontalalignment='center')
plt.xlim([-.7,.7])
plt.xlabel(r'Spearman $\rho$'), plt.ylabel('Number of Genes')
sns.despine(top = True, right = True)

# plt.savefig("Corr_PNNdiffuse.svg", bbox_inches="tight")

In [None]:
pos_col = np.array([107, 5, 4])/255
neg_col = np.array([73, 88, 103])/255

# pos_col = cm.RdGy_r(.8)
# neg_col = cm.RdGy_r(.2)

filt = (corr_pv_en['p_spearman_fdr']<0.01) & (corr_pv_en['corr_spearman']>0)
positiveN = filt.sum()
corr_pv_en.loc[filt,'status'] = 'correlated'
filt = (corr_pv_en['p_spearman_fdr']<0.01) & (corr_pv_en['corr_spearman']<0)
negativeN = filt.sum()
corr_pv_en.loc[filt,'status'] = 'anticorrelated'
filt = (corr_pv_en['p_spearman_fdr']>0.01)
corr_pv_en.loc[filt,'status'] = 'uncorrelated'

f, ax = plt.subplots(figsize=(7, 5), dpi=120)
plt.title("Correlation with PV Energy")
sns.histplot(
    corr_pv_en,
    x="corr_spearman", hue="status",
    multiple="stack",
    palette={'uncorrelated':'lightgray', 'anticorrelated':neg_col, 'correlated':pos_col},
    edgecolor=".5",
    linewidth=0,
    legend=False,
    alpha=.9,bins = 100)

# Customize
X = 0.5
Y = ax.get_ylim()[1] - 0.8*(ax.get_ylim()[1]-ax.get_ylim()[0])
msg = f'Correlated:\n{positiveN} genes'
plt.text(X,Y, msg,fontsize=12,horizontalalignment='center')
X = -0.5
msg =f'Anticorrelated:\n{negativeN} genes'
plt.text(X,Y, msg,fontsize=12,horizontalalignment='center')
plt.xlim([-.7,.7])
plt.xlabel(r'Spearman $\rho$'), plt.ylabel('Number of genes')
sns.despine(top = True, right = True)

# plt.savefig("Corr_PVenergy.svg", bbox_inches="tight")