In [1]:
%%capture
%cd "Compound GRN ENC Analysis/scripts"

In [2]:
import os

import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
from statannotations.Annotator import Annotator

# Params
DATA_FOLDER = os.path.join(os.path.abspath(''), '../../data')
RESULTS_FOLDER = os.path.join(os.path.abspath(''), '../results')
PLOTS_FOLDER = os.path.join(os.path.abspath(''), '../plots')

# Style
sns.set_theme(context='talk', style='white', palette='Accent')
matplotlib.rcParams['pdf.fonttype'] = 42
matplotlib.rcParams['ps.fonttype'] = 42


# Data

In [3]:
"""
Cohort : Disease : Delimiter
CMC: SCZ : tsv
UCLA_ASD: ASD : csv
Urban_DLPFC: BPD, SCZ : tsv
"""
group = ['CMC', 'UCLA_ASD', 'Urban_DLPFC'][1]
disease = ['SCZ', 'ASD', 'BPD'][1]
delimiter = [',', '\t'][0]

In [4]:
# Get AD, BPD, and SCZ labels
gene_dir = os.path.join(DATA_FOLDER, 'new_labels')
gene_fnames = [fname for fname in os.listdir(gene_dir) if fname.endswith('.txt')]
gene_lists = {'.'.join(fname.split('.')[:-1]): np.loadtxt(os.path.join(gene_dir, fname), dtype=str) for fname in gene_fnames}
gene_lists['BPD'] = gene_lists.pop('BD')

# Get ASD labels
sfari = pd.read_csv(os.path.join(DATA_FOLDER, 'sfari/SFARI-Gene_genes_01-16-2024release_03-21-2024export.csv'))
gene_score_threshold = -1
sfari = sfari.loc[sfari['gene-score'] > gene_score_threshold]  # Threshold by score
gene_lists['ASD'] = sfari['gene-symbol'].to_numpy()

# Set positive genes
positive_genes = gene_lists[disease]

# Get files for contrast
base_dir = os.path.join(DATA_FOLDER, 'merged_GRNs_v2', group)
disease_folder = os.path.join(base_dir, disease)
control_folder = os.path.join(base_dir, 'ctrl')
grn_fnames = os.listdir(control_folder)  # Should be the same names in either folder

# Get groups and cell-type based on fname
get_cell_type = lambda fname: '_'.join(fname.split('_')[:-1])
# Convert to result file name
get_result_name = lambda fname: f'{group}_{disease}_{get_cell_type(fname)}_prioritized_genes.csv'

# Network Analyses

In [5]:
%matplotlib agg

## In and Out Degrees

In [6]:
# Create figures
num_panels = len(grn_fnames)
fig, ax = {}, {}
key = 'out'; fig[key], ax[key] = plt.subplots(1, num_panels+1, figsize=(3*(num_panels+1), 3), sharex=True, sharey=True)  # Out
key = 'in'; fig[key], ax[key] = plt.subplots(1, num_panels+1, figsize=(3*(num_panels+1), 3), sharex=True, sharey=True)  # In
for k in ax: ax[k] = ax[k].flatten()

# Construct consistent objects
sm = plt.cm.ScalarMappable(cmap='Reds', norm=plt.Normalize(0, 1))

# Plot network analyses
for i, fname in enumerate(grn_fnames):
    # Load graph
    disease_graph = pd.read_csv(os.path.join(disease_folder, fname), index_col=False, delimiter=delimiter)
    disease_graph['disease'] = 'Disease'
    control_graph = pd.read_csv(os.path.join(control_folder, fname), index_col=False, delimiter=delimiter)
    control_graph['disease'] = 'Control'
    combined_graph = pd.concat((disease_graph, control_graph), axis=0)

    # Load scores
    scores = pd.read_csv(os.path.join(RESULTS_FOLDER, get_result_name(fname)), index_col=0)[['label', 'mean', 'std']]

    # Out degree plot
    plt.sca(ax['out'][i])
    out_degree_df = combined_graph.copy()
    out_degree_df['Out Degree'] = 1
    out_degree_df = out_degree_df[['TF', 'disease', 'Out Degree']].groupby(['TF', 'disease']).sum().reset_index()
    out_degree_df = out_degree_df.pivot(index='TF', columns='disease', values='Out Degree').fillna(0)
    out_degree_df = out_degree_df.join(scores, on='TF')
    out_degree_df = out_degree_df[['Control', 'Disease', 'mean']].groupby(['Control', 'Disease']).mean().reset_index()
    sns.scatterplot(data=out_degree_df, x='Control', y='Disease', hue='mean', palette='Reds')
    sns.despine()
    plt.xlabel('Control'); plt.ylabel('Disease')
    plt.title('Out Degree')
    plt.gca().get_legend().remove()

    # In degree plot
    plt.sca(ax['in'][i])
    in_degree_df = combined_graph.copy()
    in_degree_df['In Degree'] = 1
    in_degree_df = in_degree_df[['target', 'disease', 'In Degree']].groupby(['target', 'disease']).sum().reset_index()
    in_degree_df = in_degree_df.pivot(index='target', columns='disease', values='In Degree').fillna(0)
    in_degree_df = in_degree_df.join(scores, on='target')
    in_degree_df = in_degree_df[['Control', 'Disease', 'mean']].groupby(['Control', 'Disease']).mean().reset_index()
    sns.scatterplot(data=in_degree_df, x='Control', y='Disease', hue='mean', palette='Reds')
    sns.despine()
    plt.xlabel('Control'); plt.ylabel('Disease')
    plt.title('In Degree')
    plt.gca().get_legend().remove()

# Insert colorbars
key = 'out'; ax[key][-1].axis('off'); fig[key].colorbar(sm, ax=ax[key][-1])
key = 'in'; ax[key][-1].axis('off'); fig[key].colorbar(sm, ax=ax[key][-1])

# Save figures
fig['out'].savefig(os.path.join(PLOTS_FOLDER, f'DegreeOut_{group}_{disease}.pdf'), bbox_inches='tight')
fig['in'].savefig(os.path.join(PLOTS_FOLDER, f'DegreeIn_{group}_{disease}.pdf'), bbox_inches='tight')

# Close figs
plt.close()

## Score by Label

In [7]:
# Create figure
fig, ax = plt.subplots(1, 1, figsize=(2*num_panels, 6), sharex=True, sharey=True)

# Create df
df_all = pd.DataFrame()
for i, fname in enumerate(grn_fnames):
    # Load scores
    scores = pd.read_csv(os.path.join(RESULTS_FOLDER, get_result_name(fname)), index_col=0)[['label', 'mean', 'std']]

    # Format df
    df = scores.copy()
    df.loc[df['label'] == 0, 'label'] = 'Control'; df.loc[df['label'] == 1, 'label'] = 'Disease'
    df = df.rename(columns={'label': 'Label', 'mean': 'Score'})
    df['Cell Type'] = get_cell_type(fname)
    df_all = pd.concat((df_all, df), axis=0)

# Params
hue_order = ['Control', 'Disease']

# Plot
plt.sca(ax)
sns.violinplot(data=df_all, x='Cell Type', y='Score', hue='Label', hue_order=hue_order, split=True, inner='quart', density_norm='count')
sns.despine()
# plt.title('Score Distribution by Label')

# Annotate significance
pairs = [((ct, hue_order[0]), (ct, hue_order[1])) for ct in df_all['Cell Type'].unique()]
annotator = Annotator(ax, pairs, data=df_all, x='Cell Type', y='Score', hue='Label', hue_order=hue_order)
annotator.configure(test='Mann-Whitney', text_format='star', loc='outside')
results = annotator.apply_test().annotate()

# Save figure
fig.savefig(os.path.join(PLOTS_FOLDER, f'DistributionScore_{group}_{disease}.pdf'), bbox_inches='tight')
plt.close()

p-value annotation legend:
      ns: 5.00e-02 < p <= 1.00e+00
       *: 1.00e-02 < p <= 5.00e-02
      **: 1.00e-03 < p <= 1.00e-02
     ***: 1.00e-04 < p <= 1.00e-03
    ****: p <= 1.00e-04

endo_Control vs. endo_Disease: Mann-Whitney-Wilcoxon test two-sided, P_val:3.305e-19 U_stat=5.933e+05
astro_Control vs. astro_Disease: Mann-Whitney-Wilcoxon test two-sided, P_val:1.950e-08 U_stat=5.384e+05
excitatory_Control vs. excitatory_Disease: Mann-Whitney-Wilcoxon test two-sided, P_val:1.796e-06 U_stat=3.531e+05
micro_Control vs. micro_Disease: Mann-Whitney-Wilcoxon test two-sided, P_val:9.003e-29 U_stat=4.805e+05
oligo_Control vs. oligo_Disease: Mann-Whitney-Wilcoxon test two-sided, P_val:3.740e-16 U_stat=5.983e+05
opc_Control vs. opc_Disease: Mann-Whitney-Wilcoxon test two-sided, P_val:2.265e-18 U_stat=4.654e+05
vlmc_Control vs. vlmc_Disease: Mann-Whitney-Wilcoxon test two-sided, P_val:7.735e-15 U_stat=8.582e+05


## Score by Graph

In [8]:
# Create figure
fig, ax = plt.subplots(1, 1, figsize=(2*num_panels, 6), sharex=True, sharey=True)

# Create df
df_all = pd.DataFrame()
for i, fname in enumerate(grn_fnames):
    # Load graph
    disease_graph = pd.read_csv(os.path.join(disease_folder, fname), index_col=False, delimiter=delimiter)
    disease_graph['disease'] = 'Disease'
    control_graph = pd.read_csv(os.path.join(control_folder, fname), index_col=False, delimiter=delimiter)
    control_graph['disease'] = 'Control'
    combined_graph = pd.concat((disease_graph, control_graph), axis=0)

    # Load scores
    scores = pd.read_csv(os.path.join(RESULTS_FOLDER, get_result_name(fname)), index_col=0)[['label', 'mean', 'std']]

    # Format df
    control_genes = np.unique(control_graph['TF'].to_list() + control_graph['target'].to_list())
    control_genes = list(set(control_genes).intersection(set(scores.index)))
    control_df = pd.DataFrame({'genes': control_genes})
    control_df['Graph'] = 'Control'
    disease_genes = np.unique(disease_graph['TF'].to_list() + disease_graph['target'].to_list())
    disease_genes = list(set(disease_genes).intersection(set(scores.index)))
    disease_df = pd.DataFrame({'genes': disease_genes})
    disease_df['Graph'] = 'Disease'
    df = pd.concat((control_df, disease_df), axis=0).reset_index(drop=True)
    df = df.join(scores, on='genes')
    df = df.rename(columns={'mean': 'Score'})
    df['Cell Type'] = get_cell_type(fname)
    df_all = pd.concat((df_all, df), axis=0)

# Params
hue_order = ['Control', 'Disease']

# Plot
plt.sca(ax)
sns.violinplot(data=df_all, x='Cell Type', y='Score', hue='Graph', hue_order=hue_order, split=True, inner='quart', density_norm='count')
sns.despine()
# plt.title('Score Distribution by Graph')

# Annotate significance
pairs = [((ct, hue_order[0]), (ct, hue_order[1])) for ct in df_all['Cell Type'].unique()]
annotator = Annotator(ax, pairs, data=df_all, x='Cell Type', y='Score', hue='Graph', hue_order=hue_order)
annotator.configure(test='Mann-Whitney', text_format='star', loc='outside')
results = annotator.apply_test().annotate()

# Save figure
fig.savefig(os.path.join(PLOTS_FOLDER, f'DistributionDisease_{group}_{disease}.pdf'), bbox_inches='tight')
plt.close()

p-value annotation legend:
      ns: 5.00e-02 < p <= 1.00e+00
       *: 1.00e-02 < p <= 5.00e-02
      **: 1.00e-03 < p <= 1.00e-02
     ***: 1.00e-04 < p <= 1.00e-03
    ****: p <= 1.00e-04

endo_Control vs. endo_Disease: Mann-Whitney-Wilcoxon test two-sided, P_val:3.004e-18 U_stat=4.030e+06
astro_Control vs. astro_Disease: Mann-Whitney-Wilcoxon test two-sided, P_val:1.711e-45 U_stat=2.642e+06
excitatory_Control vs. excitatory_Disease: Mann-Whitney-Wilcoxon test two-sided, P_val:8.070e-13 U_stat=2.338e+06
micro_Control vs. micro_Disease: Mann-Whitney-Wilcoxon test two-sided, P_val:4.191e-73 U_stat=2.980e+06
oligo_Control vs. oligo_Disease: Mann-Whitney-Wilcoxon test two-sided, P_val:7.588e-32 U_stat=3.992e+06
opc_Control vs. opc_Disease: Mann-Whitney-Wilcoxon test two-sided, P_val:5.479e-49 U_stat=2.936e+06
vlmc_Control vs. vlmc_Disease: Mann-Whitney-Wilcoxon test two-sided, P_val:1.045e-82 U_stat=5.033e+06


## Drug Targets

In [9]:
# Parameters
drug_targets = {
    'Drug A': ['ATXN1', 'HMGA1', 'TBX15', 'ETV7', 'ZBTB14', 'EPHA4', 'C2CD2', 'MDM2', 'KLF11', 'PTAFR'],
    'Drug B': ['OPRK1', 'HDAC2-AS2', 'HES1', 'S1PR1', 'ADAM33', 'SMAD7', 'ZNF607', 'CDKN2D', 'ADK', 'MAPK8IP1'],
    'Drug C': ['ANPEP', 'PDE11A', 'RSAD2', 'RASSF5', 'ONECUT2', 'PDIA5', 'CNGB1', 'ZNF124', 'GTF2I', 'HS3ST5'],
    'Drug D': ['IL18', 'SLFN12L', 'OMG', 'DOK6', 'RBBP9', 'IGFBP7', 'MARCH3', 'NIPA1', 'ZEB1-AS1', 'RGS18'],
    'Drug E': ['BACH1', 'CYFIP2', 'FMNL3', 'TEKT3', 'HMG20A', 'MTUS2', 'SLITRK5', 'ZNF687', 'HCFC1', 'GDPD4'],
    'Drug F': ['TRMO', 'KITLG', 'ARHGAP10', 'ZNF655', 'CPTP', 'KCNF1', 'VSX2', 'MAP1LC3B2', 'TENM4', 'ZNF711'],
}

In [10]:
# Create figure
fig, ax = plt.subplots(1, 1, figsize=(12, 6), sharex=True, sharey=True)

# Create df
df_all = pd.DataFrame()
for i, fname in enumerate(grn_fnames):
    # Load scores
    scores = pd.read_csv(os.path.join(RESULTS_FOLDER, get_result_name(fname)), index_col=0)[['label', 'mean', 'std']]

    # Format df
    df = scores.copy()
    df.loc[df['label'] == 0, 'label'] = 'Control'; df.loc[df['label'] == 1, 'label'] = 'Disease'
    df = df.rename(columns={'label': 'Label', 'mean': 'Score'})
    df['Cell Type'] = get_cell_type(fname)
    df_all = pd.concat((df_all, df), axis=0)

# Annotate drug targets
df_all = df_all.reset_index()
df_all['Drug'] = 'Background'
for name, targets in drug_targets.items():
    df = df_all.loc[df_all['gene'].isin(targets)].copy()
    df['Drug'] = name
    df_all = pd.concat((df_all, df), axis=0)

# # Params
# hue_order = ['Control', 'Disease']

# # Plot
plt.sca(ax)
sns.violinplot(data=df_all, x='Score', y='Drug', orient='h')  # , inner='quart'
sns.despine()

# Annotate significance
pairs = [('Background', name) for name in drug_targets]
annotator = Annotator(ax, pairs, data=df_all, x='Score', y='Drug', orient='h')
annotator.configure(test='Mann-Whitney', text_format='star')
results = annotator.apply_test().annotate()

# Save figure
fig.savefig(os.path.join(PLOTS_FOLDER, f'DistributionDrug_{group}_{disease}.pdf'), bbox_inches='tight')
plt.close()

p-value annotation legend:
      ns: 5.00e-02 < p <= 1.00e+00
       *: 1.00e-02 < p <= 5.00e-02
      **: 1.00e-03 < p <= 1.00e-02
     ***: 1.00e-04 < p <= 1.00e-03
    ****: p <= 1.00e-04

Background vs. Drug A: Mann-Whitney-Wilcoxon test two-sided, P_val:2.447e-01 U_stat=7.083e+05
Background vs. Drug B: Mann-Whitney-Wilcoxon test two-sided, P_val:2.354e-02 U_stat=4.322e+05
Background vs. Drug C: Mann-Whitney-Wilcoxon test two-sided, P_val:3.782e-01 U_stat=5.587e+05
Background vs. Drug D: Mann-Whitney-Wilcoxon test two-sided, P_val:2.652e-01 U_stat=7.120e+05
Background vs. Drug E: Mann-Whitney-Wilcoxon test two-sided, P_val:4.743e-01 U_stat=5.677e+05
Background vs. Drug F: Mann-Whitney-Wilcoxon test two-sided, P_val:1.233e-01 U_stat=5.634e+05
