# TWAS tissue comparison

In [None]:
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from matplotlib_venn import venn3, venn3_circles

## Prepare data

In [None]:
def limiting_features(set_dict, f1, f2):
    xx = len(set_dict[f1] & set_dict[f2]) / len(set_dict[f2]) * 100
    print("Comparing %s with %s: %0.2f%%" % (f1, f2, xx))
    print("Features in common: %d" % len(set_dict[f1] & set_dict[f2]))

### Load PGC2+CLOZUK

In [None]:
pgc2_file = '/ceph/projects/v4_phase3_paper/inputs/sz_gwas/pgc2_clozuk/map_phase3/_m/libd_hg38_pgc2sz_snps.tsv'
pgc2_df = pd.read_csv(pgc2_file, sep='\t', low_memory=False, index_col=0)

### Load TWAS associations

In [None]:
caudate_file = '/ceph/projects/v4_phase3_paper/analysis/twas_ea/'+\
              'gene_weights/fusion_pgc2/summary_stats/_m/fusion_associations.txt'
caudate0 = pd.read_csv(caudate_file, sep='\t')
caudate = caudate0[(caudate0['FDR'] <= 0.05)].copy()
print(caudate.shape[0])

In [None]:
dlpfc_file = '/ceph/users/jbenja13/phase3_paper/phase2/twas/extract_twas/_m/dlpfc_twas_assocations_fusion.csv'
dlpfc0 = pd.read_csv(dlpfc_file, low_memory=False)
dlpfc0 = dlpfc0[(dlpfc0['feature'] == 'gene') & (dlpfc0['region'] == 'DLPFC')].drop('FILE', axis=1).copy()
dlpfc0 = dlpfc0.loc[:, ['ID', 'genesymbol', 'CHR', 'P0', 'P1', 'HSQ', 'BEST.GWAS.ID', 'BEST.GWAS.Z', 
                        'EQTL.ID', 'EQTL.R2', 'EQTL.Z', 'EQTL.GWAS.Z', 'NSNP', 'NWGT', 'MODEL', 
                        'MODELCV.R2', 'MODELCV.PV', 'TWAS.Z', 'TWAS.P', 'TWAS.FDR', 'TWAS.Bonf',
                        'BEST.GWAS.pos_hg19', 'BEST.GWAS.pos_hg38']]\
               .rename(columns={'ID': 'FILE', 'genesymbol': 'ID', 'TWAS.FDR': 'FDR'})
dlpfc0.FILE = dlpfc0.FILE.str.replace('\\..*', '', regex=True)
dlpfc = dlpfc0[(dlpfc0['FDR'] <= 0.05)]
print(dlpfc.shape[0])

In [None]:
hippo_file = '/ceph/users/jbenja13/phase3_paper/phase2/twas/extract_twas/_m/hippo_twas_assocations_fusion.csv'
hippo0 = pd.read_csv(hippo_file, low_memory=False)
hippo0 = hippo0[(hippo0['feature'] == 'gene') & (hippo0['region'] == 'HIPPO')].drop('FILE', axis=1).copy()
hippo0 = hippo0.loc[:, ['ID', 'genesymbol', 'CHR', 'P0', 'P1', 'HSQ', 'BEST.GWAS.ID', 'BEST.GWAS.Z', 
                        'EQTL.ID', 'EQTL.R2', 'EQTL.Z', 'EQTL.GWAS.Z', 'NSNP', 'NWGT', 'MODEL', 
                        'MODELCV.R2', 'MODELCV.PV', 'TWAS.Z', 'TWAS.P', 'TWAS.FDR', 'TWAS.Bonf',
                        'BEST.GWAS.pos_hg19', 'BEST.GWAS.pos_hg38']]\
               .rename(columns={'ID': 'FILE', 'genesymbol': 'ID', 'TWAS.FDR': 'FDR'})
hippo0.FILE = hippo0.FILE.str.replace('\\..*', '', regex=True)
hippo = hippo0[(hippo0['FDR'] <= 0.05)]
print(hippo.shape[0])

In [None]:
## Caudate
new_caudate0 = pd.merge(caudate0, pgc2_df, left_on='BEST.GWAS.ID', right_on='our_snp_id', suffixes=['_TWAS', '_PGC2'])
new_caudate0['GWAS.SNP'] = ['Risk SNP' if x <= 5e-8 else 'Other' for x in new_caudate0['P']]
## DLPFC
new_dlpfc0 = pd.merge(dlpfc0, pgc2_df, left_on='BEST.GWAS.ID', right_on='rsid', suffixes=['_TWAS', '_PGC2'])
new_dlpfc0['GWAS.SNP'] = ['Risk SNP' if x <= 5e-8 else 'Other' for x in new_dlpfc0['P']]
## Hippocampus
new_hippo0 = pd.merge(hippo0, pgc2_df, left_on='BEST.GWAS.ID', right_on='rsid', suffixes=['_TWAS', '_PGC2'])
new_hippo0['GWAS.SNP'] = ['Risk SNP' if x <= 5e-8 else 'Other' for x in new_hippo0['P']]

In [None]:
tt = pd.merge(new_caudate0[['FILE', 'ID', 'TWAS.Z', 'FDR', 'GWAS.SNP']].set_index(['FILE', 'ID']).add_prefix('Caudate_'), 
              new_dlpfc0[['FILE', 'ID', 'TWAS.Z', 'FDR', 'GWAS.SNP']].set_index(['FILE', 'ID']).add_prefix('DLPFC_'), 
              left_index=True, right_index=True, how='outer')\
       .merge(new_hippo0[['FILE', 'ID', 'TWAS.Z', 'FDR', 'GWAS.SNP']].set_index(['FILE', 'ID']).add_prefix('HIPPO_'),
              left_index=True, right_index=True, how='outer')\
       .reset_index().rename(columns={'FILE': 'Geneid', 'ID': 'Symbol'})
tt.sort_values("Caudate_FDR").to_csv('TWAS_gene_tissue_summary.csv', index=False, header=True)
tt.head(2)

## Plot Venn

### Genes

#### Heritable comparison

In [None]:
tissues = {
    'Caudate': set(caudate0.FILE),
    'DLPFC': set(dlpfc0.FILE),
    'Hippocampus': set(hippo0.FILE),
}

In [None]:
plt.rcParams.update({'font.size': 32, 'font.weight': 'normal'})
plt.figure(figsize=(12,12))

v = venn3([tissues['Caudate'], tissues['DLPFC'], tissues['Hippocampus']], 
          ('Caudate', 'DLPFC', 'Hippocampus'))

v.get_patch_by_id('100').set_alpha(0.7)
v.get_patch_by_id('010').set_alpha(0.7)
v.get_patch_by_id('001').set_alpha(0.7)

plt.savefig('twas_tissue_comparison_allFeatures.png')
plt.savefig('twas_tissue_comparison_allFeatures.pdf')
plt.savefig('twas_tissue_comparison_allFeatures.svg')
plt.show()

In [None]:
limiting_features(tissues, 'Caudate', 'Hippocampus')
limiting_features(tissues, 'DLPFC', 'Caudate')
limiting_features(tissues, 'DLPFC', 'Hippocampus')

#### TWAS significant

In [None]:
tissues = {
    'Caudate': set(caudate.FILE),
    'DLPFC': set(dlpfc.FILE),
    'Hippocampus': set(hippo.FILE),
}

In [None]:
plt.rcParams.update({'font.size': 32, 'font.weight': 'normal'})
plt.figure(figsize=(12,12))

v = venn3([tissues['Caudate'], tissues['DLPFC'], tissues['Hippocampus']], 
          ('Caudate', 'DLPFC', 'Hippocampus'))

v.get_patch_by_id('100').set_alpha(0.7)
v.get_patch_by_id('010').set_alpha(0.7)
v.get_patch_by_id('001').set_alpha(0.7)

plt.savefig('twas_tissue_comparison_fdr05.png')
plt.savefig('twas_tissue_comparison_fdr05.pdf')
plt.savefig('twas_tissue_comparison_fdr05.svg')
plt.show()

In [None]:
overlapping_twas = new_caudate0.set_index('FILE').loc[set(caudate.FILE) & set(dlpfc.FILE) & set(hippo.FILE), :]
overlapping_twas.to_csv('overlapping_tissue_twasList.txt', sep='\t')

In [None]:
overlapping_twas2 = caudate0.set_index('FILE').loc[set(caudate0.FILE) & set(dlpfc0.FILE) & set(hippo0.FILE), :]
overlapping_twas2.to_csv('overlapping_tissue_twasList_allFeatures.txt', sep='\t')
overlapping_twas2.shape

In [None]:
limiting_features(tissues, 'Caudate', 'Hippocampus')
limiting_features(tissues, 'DLPFC', 'Caudate')
limiting_features(tissues, 'DLPFC', 'Hippocampus')

In [None]:
caudate_only_genes = new_caudate0.set_index('FILE')\
                                 .loc[tissues['Caudate'] - tissues['DLPFC'] - tissues['Hippocampus'], :]
caudate_only_genes.to_csv('caudate_only_twasList_genes.txt', sep='\t')
print(caudate_only_genes.shape)
caudate_only_genes.head()

## Z score comparison

In [None]:
import warnings
warnings.filterwarnings('ignore')

from plotnine import *
from scipy.stats import spearmanr

In [None]:
def save_plot(p, fn):
    for ext in ['png', 'pdf', 'svg']:
        p.save(fn + '.' + ext)

### DLPFC and Caudate

In [None]:
df1 = dlpfc0.merge(caudate0, on=['FILE'], 
                   suffixes=['_dlpfc', '_caudate'], 
                   how='outer').fillna(0)

pp = (ggplot(df1, aes(x='TWAS.Z_caudate', y='TWAS.Z_dlpfc')) + geom_point(alpha=0.5, size=1.25) + 
      labs(x='TWAS Zscore Caudate', y='TWAS Zscore DLPFC') + theme_light() + 
      theme(axis_text=element_text(size=14), axis_title=element_text(size=16, face='bold')))
pp

In [None]:
save_plot(pp, 'twas_zscore_comparison_dlpfc_caudate_allFeatures')

In [None]:
df1 = dlpfc0.merge(caudate0, on=['FILE'], 
                   suffixes=['_dlpfc', '_caudate'], 
                   how='inner')
spearmanr(df1['TWAS.Z_caudate'], df1['TWAS.Z_dlpfc'])

In [None]:
pp = (ggplot(df1, aes(x='TWAS.Z_caudate', y='TWAS.Z_dlpfc')) + geom_point() + 
      labs(x='TWAS Zscore Caudate', y='TWAS Zscore DLPFC') + theme_light() + 
      theme(axis_text=element_text(size=14), axis_title=element_text(size=16, face='bold')))
pp

In [None]:
save_plot(pp, 'twas_zscore_comparison_dlpfc_caudate')

#### Significant TWAS

In [None]:
df1 = dlpfc.merge(caudate, on=['FILE'], suffixes=['_dlpfc', '_caudate'])
spearmanr(df1['TWAS.Z_caudate'], df1['TWAS.Z_dlpfc'])

In [None]:
pp = (ggplot(df1, aes(x='TWAS.Z_caudate', y='TWAS.Z_dlpfc')) + geom_point() + 
      labs(x='TWAS Zscore Caudate', y='TWAS Zscore DLPFC') + theme_light() + 
      theme(axis_text=element_text(size=14), axis_title=element_text(size=16, face='bold')))
pp

In [None]:
save_plot(pp, 'twas_zscore_comparison_dlpfc_caudate_fdr05')

### Hippocampus and Caudate

In [None]:
df2 = hippo0.merge(caudate0, on=['FILE'], 
                   suffixes=['_hippo', '_caudate'], 
                   how='outer').fillna(0)

pp = (ggplot(df2, aes(x='TWAS.Z_caudate', y='TWAS.Z_hippo')) + geom_point() + 
      labs(x='TWAS Zscore Caudate', y='TWAS Zscore Hippocampus') + theme_light() + 
      theme(axis_text=element_text(size=14), axis_title=element_text(size=16, face='bold')))
pp

In [None]:
save_plot(pp, 'twas_zscore_comparison_hippo_caudate_allFeatures')

In [None]:
df2 = hippo0.merge(caudate0, on=['FILE'], 
                   suffixes=['_hippo', '_caudate'])
spearmanr(df2['TWAS.Z_caudate'], df2['TWAS.Z_hippo'])

In [None]:
pp = (ggplot(df2, aes(x='TWAS.Z_caudate', y='TWAS.Z_hippo')) + geom_point() + 
      labs(x='TWAS Zscore Caudate', y='TWAS Zscore Hippocampus') + theme_light() + 
      theme(axis_text=element_text(size=14), axis_title=element_text(size=16, face='bold')))
pp

In [None]:
save_plot(pp, 'twas_zscore_comparison_hippo_caudate')

#### Significant TWAS

In [None]:
df2 = hippo.merge(caudate, on=['FILE'], 
                  suffixes=['_hippo', '_caudate'])
spearmanr(df2['TWAS.Z_caudate'], df2['TWAS.Z_hippo'])

In [None]:
pp = (ggplot(df2, aes(x='TWAS.Z_caudate', y='TWAS.Z_hippo')) + geom_point() + 
      labs(x='TWAS Zscore Caudate', y='TWAS Zscore Hippocampus') + theme_light() + 
      theme(axis_text=element_text(size=14), axis_title=element_text(size=16, face='bold')))
pp

In [None]:
save_plot(pp, 'twas_zscore_comparison_hippo_caudate_fdr05')

### DLPFC and Hippocampus

In [None]:
df3 = hippo0.merge(dlpfc0, on=['FILE'], how='outer',
                   suffixes=['_hippo', '_dlpfc']).fillna(0)
pp = (ggplot(df3, aes(x='TWAS.Z_dlpfc', y='TWAS.Z_hippo')) + geom_point() + 
      labs(x='TWAS Zscore DLPFC', y='TWAS Zscore Hippocampus') + theme_light() + 
      theme(axis_text=element_text(size=14), axis_title=element_text(size=16, face='bold')))
pp

In [None]:
save_plot(pp, 'twas_zscore_comparison_hippo_dlpfc_allFeatures')

In [None]:
df3 = hippo0.merge(dlpfc0, on=['FILE'], 
                   suffixes=['_hippo', '_dlpfc'])
spearmanr(df3['TWAS.Z_dlpfc'], df3['TWAS.Z_hippo'])

In [None]:
pp = (ggplot(df3, aes(x='TWAS.Z_dlpfc', y='TWAS.Z_hippo')) + geom_point() + 
      labs(x='TWAS Zscore DLPFC', y='TWAS Zscore Hippocampus') + theme_light() + 
      theme(axis_text=element_text(size=14), axis_title=element_text(size=16, face='bold')))
pp

In [None]:
save_plot(pp, 'twas_zscore_comparison_hippo_dlpfc')

#### Significant TWAS

In [None]:
df3 = dlpfc.merge(hippo, on=['FILE'], suffixes=['_dlpfc', '_hippo'])
spearmanr(df3['TWAS.Z_hippo'], df3['TWAS.Z_dlpfc'])

In [None]:
pp = (ggplot(df3, aes(x='TWAS.Z_dlpfc', y='TWAS.Z_hippo')) + geom_point() + 
      labs(x='TWAS Zscore DLPFC', y='TWAS Zscore Hippocampus') + theme_light() + 
      theme(axis_text=element_text(size=14), axis_title=element_text(size=16, face='bold')))
pp

In [None]:
save_plot(pp, 'twas_zscore_comparison_hippo_dlpfc_fdr05')