# eqtplot for eqtl-gwas colocalization

In [None]:
import pandas as pd
import numpy as np
import subprocess
import functools

In [None]:
%load_ext rpy2.ipython

## Functions

In [None]:
def flip_slope_by_allele(row):
    return [-1, 1][bool(row["pgc2_a1_same_as_our_counted"])] * row["NES"]


def get_ld(fn, eqtl_dfx, gwas_dfx, label):
    shared_df = gwas_dfx.merge(eqtl_dfx, left_on='SNP', right_on='SNP.Id')\
                        .sort_values('P', ascending=True)
    shared_df[['SNP.Id']].to_csv('snps_%s.txt' % label, index=None, header=None)
    cmd = '''/ceph/opt/plink-ng/1.9/plink \
                --bfile /ceph/projects/v4_phase3_paper/inputs/genotypes/_m/LIBD_Brain_TopMed \
                --extract snps_%s.txt \
                --keep-fam %s --r2 inter-chr \
                --write-snplist --ld-window-r2 0 \
                --out shared_snps_%s; 
            sed -i 's/ \+//; s/ \+/\t/g' shared_snps_%s.ld
      ''' % (label,fn,label,label)
    subprocess.run(cmd, shell=True)
    return pd.read_csv("shared_snps_%s.ld" % label, sep='\t', usecols=[*range(7)])

In [None]:
@functools.lru_cache()
def get_gwas():
    gwas_fn = '/ceph/projects/v4_phase3_paper/inputs/sz_gwas/'+\
              'pgc2_clozuk/map_phase3/zscore/_m/libd_hg38_pgc2sz_snps.tsv'
    return pd.read_csv(gwas_fn, sep="\t", dtype={'chrN':str}, index_col=0)


@functools.lru_cache()
def subset_gwas(chrom, pos, window):
    gwas_df = get_gwas().loc[(get_gwas()['chrN'] == chrom) & 
                             (get_gwas()['pos'] > pos - window) & 
                             (get_gwas()['pos'] < pos + window), 
                             ['chrN', 'pos', 'our_snp_id', 'P']]\
                        .rename(columns={'chrN':'CHR', 'pos':'BP', 'our_snp_id':'SNP'})
    ## Flip direction of OR based on alleles matching
    #dft.loc[:,'OR_flipped'] = dft.apply(flip_OR_by_allele, axis=1)
    #dft = get_gwas()[["OR", "pgc2_a1_same_as_our_counted"]]
    gwas_df['BETA'] = np.log(get_gwas()[["OR"]])
    gwas_df['PHE'] = 'SCZD'
    gwas_df['CHR'] = gwas_df['CHR'].astype(int)
    gwas_df['pgc2_a1_same_as_our_counted'] = get_gwas()[["pgc2_a1_same_as_our_counted"]]
    return gwas_df


@functools.lru_cache()
def get_eqtl(fn, feature):
    cmd = '''
    zcat %s | head -1; zcat %s | awk '$1 == "%s" {print}'
    ''' % (fn,fn,feature)
    #print(cmd)
    with subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE) as p:
        df = pd.read_csv(p.stdout, sep='\t')
    return df


@functools.lru_cache()
def annotate_eqtls(fn, feature):
    df = get_eqtl(fn, feature)
    eqtl_df = pd.DataFrame({'SNP.Id': df['variant_id'], 
                            'Gene.Symbol': df['gene_id'], 
                            'P.Value': df['pval_nominal'], 
                            'NES': df['slope'], 'Tissue': 'caudate'}, 
                           index=df.index)
    return eqtl_df

## Prepare data

### GWAS summary statistics data frame

In [None]:
## Parameters
chrom = '11'; pos = 113412884; window = 2e5
## Load GWAS summary stats
gwas_df = subset_gwas(chrom, pos, window)
print(gwas_df.dtypes)
print(gwas_df.shape)
gwas_df.head(2)

### Gene data frame

In [None]:
genes_df = pd.DataFrame({'CHR':[11,11,11],
                         'Start':[113412884,113412884,113414462],
                         'Stop':[113415420,113414374,113415420],
                         'Gene':["chr11:113412884-113415420(-)", 
                                 "chr11:113412884-113414374(-)", 
                                 "chr11:113414462-113415420(-)"],
                         'Build': ['hg38', 'hg38', 'hg38']})
genes_df

### Significant p-values

In [None]:
## P value nominal threshold by feature from permutation analysis
perm_pval = pd.DataFrame({'Gene':["chr11:113412884-113415420(-)", 
                                  "chr11:113412884-113414374(-)", 
                                  "chr11:113414462-113415420(-)"], 
                          'perm_pval': [9.0384e-05, 9.06858e-05, 8.55823e-05]})
perm_pval

### eQTL data frame

In [None]:
%%time
## Replace file with q-value annotated so that significant p-value is stable
## Need to generate the annotation?? Would this work??
fn = '/ceph/projects/v4_phase3_paper/analysis/eqtl_analysis/all/junctions/'+\
     'expression_gct/prepare_expression/fastqtl_nominal/_m/Brainseq_LIBD.allpairs.txt.gz'
#feature = "chr11:113412884-113415420(-)"
eqtl_df1 = annotate_eqtls(fn, genes_df.Gene[0])
eqtl_df2 = annotate_eqtls(fn, genes_df.Gene[1])
eqtl_df3 = annotate_eqtls(fn, genes_df.Gene[2])

In [None]:
eqtl_df1 = pd.merge(eqtl_df1, gwas_df, left_on="SNP.Id", right_on="SNP", how="left")\
         .drop(["CHR", "SNP", "BP", "P", "BETA", "PHE"], axis=1).fillna(True)
eqtl_df1.loc[:,'NES'] = eqtl_df1.apply(flip_slope_by_allele, axis=1)
eqtl_df1.drop(["pgc2_a1_same_as_our_counted"], axis=1, inplace=True)
print(eqtl_df1.shape)
eqtl_df1.sort_values("P.Value").head(5)

In [None]:
eqtl_df2 = pd.merge(eqtl_df2, gwas_df, left_on="SNP.Id", right_on="SNP", how="left")\
         .drop(["CHR", "SNP", "BP", "P", "BETA", "PHE"], axis=1).fillna(True)
eqtl_df2.loc[:,'NES'] = eqtl_df2.apply(flip_slope_by_allele, axis=1)
eqtl_df2.drop(["pgc2_a1_same_as_our_counted"], axis=1, inplace=True)
print(eqtl_df2.shape)
eqtl_df2.sort_values("P.Value").head(5)

In [None]:
eqtl_df3 = pd.merge(eqtl_df3, gwas_df, left_on="SNP.Id", right_on="SNP", how="left")\
         .drop(["CHR", "SNP", "BP", "P", "BETA", "PHE"], axis=1).fillna(True)
eqtl_df3.loc[:,'NES'] = eqtl_df3.apply(flip_slope_by_allele, axis=1)
eqtl_df3.drop(["pgc2_a1_same_as_our_counted"], axis=1, inplace=True)
print(eqtl_df3.shape)
eqtl_df3.sort_values("P.Value").head(5)

In [None]:
gwas_df.drop(["pgc2_a1_same_as_our_counted"], axis=1, inplace=True)
print(gwas_df.shape)

### LD data frame

In [None]:
fn_fam = "/ceph/projects/v4_phase3_paper/analysis/eqtl_analysis/all/junctions/expression_gct/_m/keepFam.txt"
ld_df1 = get_ld(fn_fam, eqtl_df1, gwas_df, "jxn5_7")
print(ld_df1.shape)
ld_df2 = get_ld(fn_fam, eqtl_df2, gwas_df, "jxn6_7")
print(ld_df2.shape)
ld_df3 = get_ld(fn_fam, eqtl_df3, gwas_df, "jxn5_6")
print(ld_df3.shape)

## Plot

In [None]:
%%R -i gwas_df,genes_df,perm_pval
library(eQTpLot)
perm_pval

### Junction 5-7

In [None]:
%%R -i ld_df1,eqtl_df1
ld_df = ld_df1
eqtl_df = eqtl_df1
# based on permutation p-value
pval = perm_pval$perm_pval[1]
gene = perm_pval$Gene[1]

p = eQTpLot(GWAS.df = gwas_df, eQTL.df = eqtl_df, Genes.df = genes_df,  
            LD.df = ld_df, LDmin = 10, R2min = 0.25, LDcolor = 'black', 
            gene = gene, trait = 'SCZD', gbuild = 'hg38', tissue = 'caudate', 
            sigpvalue_eQTL = pval, CollapseMethod = "min", congruence = FALSE)

In [None]:
%%R

p = eQTpLot(GWAS.df = gwas_df, eQTL.df = eqtl_df, Genes.df = genes_df,  
            LD.df = ld_df, LDmin = 10, R2min = 0.25, LDcolor = 'black', 
            gene = gene, trait = 'SCZD', gbuild = 'hg38', tissue = 'caudate', 
            sigpvalue_eQTL = pval, CollapseMethod = "min", congruence = TRUE)