In [1]:
setwd("/frazer01/projects/CARDIPS/analysis/cardiac_eqtls")

source("script/packages.R"  )
source("script/input_data.R")
source("script/functions.R" )
source("script/colors.R"    )


In [4]:
manifest      = fread("pipeline/6.1.coloc_gwas/traits.manifest.txt"                    , sep = "\t", header = TRUE, data.table = FALSE)
eqtl_genes    = fread("pipeline/3.2.eqtls/eqtls/cardiac_eqtls.gene.egenes.txt"         , sep = "\t", header = TRUE, data.table = FALSE)
int_genes     = fread("pipeline/3.2.eqtls/eqtls/cardiac_eqtls.gene.interactions.txt"   , sep = "\t", header = TRUE, data.table = FALSE)
eqtl_isoforms = fread("pipeline/3.2.eqtls/eqtls/cardiac_eqtls.isoform.egenes.txt"      , sep = "\t", header = TRUE, data.table = FALSE)
int_isoforms  = fread("pipeline/3.2.eqtls/eqtls/cardiac_eqtls.isoform.interactions.txt", sep = "\t", header = TRUE, data.table = FALSE)
coloc_gene    = fread("pipeline/6.1.coloc_gwas/coloc.eqtls.gene.txt"                   , sep = "\t", header = TRUE , data.table = FALSE)
coloc_isoform = fread("pipeline/6.1.coloc_gwas/coloc.eqtls.isoform.txt"                , sep = "\t", header = TRUE , data.table = FALSE)

int_genes   $tr2type = paste(int_genes   $transcript_id, int_genes   $type)
int_isoforms$tr2type = paste(int_isoforms$transcript_id, int_isoforms$type)


In [22]:
coloc = fread("pipeline/3.2.eqtls/eqtl_overlap/cardiac_eqtls.coloc_by_gene.txt"     , sep = "\t", header = TRUE, data.table = FALSE)
coloc = coloc[grepl("ENSG", coloc$transcript_id2) == TRUE, c("transcript_id1", "transcript_id2", "type1", "type2", "PP.H3.abf", "PP.H4.abf")]
colnames(coloc) = c("transcript_id", "gene_id", "type_isoform", "type_gene", "pp3_iso2gene", "pp4_iso2gene")

In [12]:
e2c_genes    = merge(int_genes   [,c("tr2type", "transcript_id", "gene_id", "gene_name", "id", "type", "interaction", "cell")], aggregate(PP.H4.abf ~ tr2type, data = coloc_gene   , FUN = max), all.x = TRUE)
e2c_isoforms = merge(int_isoforms[,c("tr2type", "transcript_id", "gene_id", "gene_name", "id", "type", "interaction", "cell")], aggregate(PP.H4.abf ~ tr2type, data = coloc_isoform, FUN = max), all.x = TRUE)

colnames(e2c_genes   )[[ncol(e2c_genes   )]] = "pp_gwas"
colnames(e2c_isoforms)[[ncol(e2c_isoforms)]] = "pp_gwas"

e2c_genes   [is.na(e2c_genes   $pp_gwas) == TRUE, "pp_gwas"] = 0
e2c_isoforms[is.na(e2c_isoforms$pp_gwas) == TRUE, "pp_gwas"] = 0

e2c_genes   $gwas = FALSE
e2c_isoforms$gwas = FALSE

e2c_genes   [e2c_genes   $pp_gwas >= 0.5, "gwas"] = TRUE
e2c_isoforms[e2c_isoforms$pp_gwas >= 0.5, "gwas"] = TRUE


In [24]:
e2c = merge(e2c_isoforms, e2c_genes[, c("gene_id", "id", "type", "interaction", "cell", "pp_gwas", "gwas")], by = "gene_id", suffixes = c("_isoform", "_gene"))
e2c = merge(e2c, coloc, by = c("transcript_id", "gene_id", "type_isoform", "type_gene"))

In [28]:
same_cell         = unique(e2c[e2c$interaction_isoform == e2c$interaction_gene & e2c$cell_isoform == TRUE & e2c$cell_gene == TRUE, c("transcript_id", "gene_id")])
same_cell$tr2gene = paste(same_cell$transcript_id, same_cell$gene_id)
e2c      $tr2gene = paste(e2c      $transcript_id, e2c      $gene_id)
e2c_diff          = e2c[!e2c$tr2gene %in% same_cell$tr2gene & e2c$pp4_iso2gene < 0.5 & (e2c$cell_gene == TRUE | e2c$cell_isoform == TRUE) & (e2c$gwas_gene == TRUE | e2c$gwas_isoform == TRUE), ]


In [51]:
e2c_filtered = aggregate(list(cell_gene = e2c_diff$cell_gene, cell_isoform = e2c_diff$cell_isoform), 
                         by = e2c_diff[,c("transcript_id", "gene_id", "gene_name", "type_isoform", "type_gene", "id_isoform", "id_gene", "gwas_isoform", "gwas_gene", "pp_gwas_isoform", "pp_gwas_gene", "pp3_iso2gene", "pp4_iso2gene")],
                         FUN = function(x){ifelse(test = length(x[x == TRUE]) > 0, yes = TRUE, no = FALSE)})

e2c_filtered$tr2type_isoform = paste(e2c_filtered$transcript_id, e2c_filtered$type_isoform)
e2c_filtered$tr2type_gene    = paste(e2c_filtered$gene_id      , e2c_filtered$type_gene   )

for(cell in sort(unique(int_genes$interaction)))
{
    mycol = sub("cibersort.regular\\.", "", cell)
    e2c_filtered[, paste(mycol, "gene"   , sep = ".")] = FALSE
    e2c_filtered[, paste(mycol, "isoform", sep = ".")] = FALSE
    
    e2c_filtered[e2c_filtered$tr2type_gene    %in% int_genes   [int_genes   $cell == TRUE & int_genes   $interaction == cell, "tr2type"], paste(mycol, "gene"   , sep = ".")] = TRUE
    e2c_filtered[e2c_filtered$tr2type_isoform %in% int_isoforms[int_isoforms$cell == TRUE & int_isoforms$interaction == cell, "tr2type"], paste(mycol, "isoform", sep = ".")] = TRUE
}

gwas2gene    = aggregate(trait ~ tr2type, data = coloc_gene   [ coloc_gene   $PP.H4.abf >= 0.5,], FUN = function(x){paste(sort(unique(manifest[manifest$id %in% x, "description"])), collapse = "; ")})
gwas2isoform = aggregate(trait ~ tr2type, data = coloc_isoform[ coloc_isoform$PP.H4.abf >= 0.5,], FUN = function(x){paste(sort(unique(manifest[manifest$id %in% x, "description"])), collapse = "; ")})

colnames(gwas2gene   )[[2]] = "gwas_trait.gene"
colnames(gwas2isoform)[[2]] = "gwas_trait.isoform"

e2c_filtered = merge(e2c_filtered, gwas2gene   , all.x = TRUE, by.x = "tr2type_gene"   , by.y = "tr2type")
e2c_filtered = merge(e2c_filtered, gwas2isoform, all.x = TRUE, by.x = "tr2type_isoform", by.y = "tr2type")

e2c_filtered[is.na(e2c_filtered$gwas_trait.gene   ) == TRUE, "gwas_trait.gene"   ] = ""
e2c_filtered[is.na(e2c_filtered$gwas_trait.isoform) == TRUE, "gwas_trait.isoform"] = ""

dir.create("pipeline/6.1.coloc_gwas/differences_gene_isoform", showWarnings = FALSE)

fwrite(e2c_filtered, "pipeline/6.1.coloc_gwas/differences_gene_isoform/e2c_filtered.txt", sep = "\t", col.names = TRUE, row.names = FALSE)
fwrite(e2c         , "pipeline/6.1.coloc_gwas/differences_gene_isoform/e2c.txt"         , sep = "\t", col.names = TRUE, row.names = FALSE)


# Examples
## iPSC-CVPC-specific eIsoforms that colocalize with GWAS but do not colocalize with eGene:
- MLST8: BMI, cholesterol; involved in mTOR pathway; A LOT of associations between mTOR and weight or cholesterol
- **LPP**: blood pressure, gene involved in cell-cell adhesion and cell motility. Mechanosensitive protein (PMC3642136). Involved in smooth muscle differentiation (16397143)
- **KANK2**: pulse rate, gene involved in apoptosis by sequestering the proapoptotic factor AIFM1 in mitochondria. May play a role in MI by targeting NFKB (32570033)
- ADAM15: blood pressure, migration and cell adhesion of smooth muscle cells
- **NOD1**: LDL. NOD1 activation induces cardiac dysfunction and modulates cardiac fibrosis and cardiomyocyte apoptosis. Genetic inactivation of the innate immune receptor NOD1 prevents vascular inflammation linked to atherosclerosis (30496704)
- ATXN2L: BMI, blood pressure, cholesterol; KO induces developmental delay and embryonic lethality in mice. No known function
- NOMO3: heart rate, obesity; regulates NODAL (involved in embryonic development)
- DKK3: heart rate; protects against cardiac dysfunction and ventricular remodelling following myocardial infarction

## iPSC-CVPC-specific eGenes with adult-specific eIsoforms, only eIsoforms colocalize with GWAS (not great GWAS):
- NOTCH4
- ATG10

## iPSC-CVPC-specific eIsoforms with adult-specific eGenes, only eIsoforms colocalize with GWAS (not great GWAS):
- KANSL1: BMI, triglicerydes; Infants with Koolen-De Vries syndrome also tend to have poor weight gain. 17q21.31 microdeletion syndrome (del17q21.31), also known as the Koolen-De Vries Syndrome, has a critical region that overlaps KANSL1

## other interesting genes:
- PTPA: blood pressure. phosphatase that regulates myocyte function and may be associated with cardiac disease (PMC5939568)
- **TRIOBP**: blood pressure and heart rate. Actin cytoskeleton organization. Different signals (primary and conditional) are associated with different traits.
- **STAU1**: atheroclerosis, hypertension, cholesterol, blood pressure. Disease modifier in the neuromuscular disorder Myotonic Dystrophy Type I (DM1). Involved in splicing of insulin receptor
- IP6K2: BMI, obesity, atherosclerosis, hypertension, blood pressure (only isoform: smooth muscle). involved in apoptosis and cell migration (PMC6383672). Treatment with inositols in obese PCOS patients is also effective in reducing BMI (PMC5766352)
- SNHG1: blood pressure. The Long Non-Coding RNA SNHG1 Attenuates Cell Apoptosis by Regulating miR-195 and BCL2-Like Protein 2 in Human Cardiomyocytes
- **DPH1**: pulse rate. biosynthesis of diphthamide. DPH1 syndrome is an autosomal recessive disorder associated with developmental delay, abnormal head circumference (microcephaly or macrocephaly), short stature, and congenital heart disease
- **EIF4E2**: pulse rate (gene: adult), blood pressure (isoform). Response to hypoxia (https://www.hindawi.com/journals/omcl/2017/6098107/)


In [52]:
mygenes = c("IP6K2", "EIF4E2", "TRIOBP", "MLST8", "KANSL1", "LPP", "ATXN2L", "STAU1", "DKK3", "KANK2", "ADAM15", "NOD1", "DPH1", "NOMO3")
message(paste('render_html("', mygenes, '")', sep = "", collapse = "\n"))

render_html("IP6K2")
render_html("EIF4E2")
render_html("TRIOBP")
render_html("MLST8")
render_html("KANSL1")
render_html("LPP")
render_html("ATXN2L")
render_html("STAU1")
render_html("DKK3")
render_html("KANK2")
render_html("ADAM15")
render_html("NOD1")
render_html("DPH1")
render_html("NOMO3")



In [50]:
manifest

id,trait_type,phenocode,description,description_more,coding_description,category,n_cases_full_cohort_both_sexes,n_controls_total,saige_heritability_EUR,filename
<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<int>,<int>,<dbl>,<chr>
prescriptions-lisinopril-both_sexes,prescriptions,lisinopril,,,,"ACE inhibitor,anti-hypertensive",11552,425821,0.066174,/frazer01/projects/CARDIPS/analysis/cardiac_eqtls/input/gwas/pan_ukbb/summary_statistics/prescriptions-lisinopril-both_sexes.txt.gz
prescriptions-candesartan-both_sexes,prescriptions,candesartan,,,,"angiotensin receptor blocker,ARB,anti-hypertensive",6065,430637,0.086125,/frazer01/projects/CARDIPS/analysis/cardiac_eqtls/input/gwas/pan_ukbb/summary_statistics/prescriptions-candesartan-both_sexes.txt.gz
phecode-401.1-both_sexes,phecode,401.1,Essential hypertension,,,circulatory system,108554,340580,0.064991,/frazer01/projects/CARDIPS/analysis/cardiac_eqtls/input/gwas/pan_ukbb/summary_statistics/phecode-401.1-both_sexes.txt.gz
phecode-278-both_sexes,phecode,278,"Overweight, obesity and other hyperalimentation",,,endocrine/metabolic,18558,420686,0.097427,/frazer01/projects/CARDIPS/analysis/cardiac_eqtls/input/gwas/pan_ukbb/summary_statistics/phecode-278-both_sexes.txt.gz
continuous-MAP-both_sexes-manual_irnt,continuous,MAP,"Mean arterial pressure, manual reading",(SBP [93] + 2 * DBP [94]) / 3,,,40497,0,0.054508,/frazer01/projects/CARDIPS/analysis/cardiac_eqtls/input/gwas/pan_ukbb/summary_statistics/continuous-MAP-both_sexes-manual_irnt.txt.gz
continuous-MAP-both_sexes-manual_raw,continuous,MAP,"Mean arterial pressure, manual reading",(SBP [93] + 2 * DBP [94]) / 3,,,40497,0,0.054623,/frazer01/projects/CARDIPS/analysis/cardiac_eqtls/input/gwas/pan_ukbb/summary_statistics/continuous-MAP-both_sexes-manual_raw.txt.gz
continuous-SBP-both_sexes-manual_medadj_irnt,continuous,SBP,"Systolic blood pressure, manual reading, adjusted by medication",SBP (manual reading; 93) adjusted by the use of blood pressure lowering medication (6153_2 and 6177_2),,,39889,0,0.090545,/frazer01/projects/CARDIPS/analysis/cardiac_eqtls/input/gwas/pan_ukbb/summary_statistics/continuous-SBP-both_sexes-manual_medadj_irnt.txt.gz
continuous-SBP-both_sexes-manual_medadj_raw,continuous,SBP,"Systolic blood pressure, manual reading, adjusted by medication",SBP (manual reading; 93) adjusted by the use of blood pressure lowering medication (6153_2 and 6177_2),,,39889,0,0.090849,/frazer01/projects/CARDIPS/analysis/cardiac_eqtls/input/gwas/pan_ukbb/summary_statistics/continuous-SBP-both_sexes-manual_medadj_raw.txt.gz
prescriptions-atenolol-both_sexes,prescriptions,atenolol,,,,"selective beta1 antagonist,beta blocker",19326,420463,0.072783,/frazer01/projects/CARDIPS/analysis/cardiac_eqtls/input/gwas/pan_ukbb/summary_statistics/prescriptions-atenolol-both_sexes.txt.gz
continuous-12336-both_sexes-irnt,continuous,12336,Ventricular rate,Ventricular rate during ECG measurement.,,"UK Biobank Assessment Centre > Physical measures > ECG at rest, 12-lead",36357,0,0.086593,/frazer01/projects/CARDIPS/analysis/cardiac_eqtls/input/gwas/pan_ukbb/summary_statistics/continuous-12336-both_sexes-irnt.txt.gz


In [40]:
str(coloc_gene)

'data.frame':	2011740 obs. of  17 variables:
 $ nsnps        : int  2817 2817 2817 2817 2815 2816 2563 2814 2817 2813 ...
 $ PP.H0.abf    : num  0.321 0.302 0.247 0.329 0.301 ...
 $ PP.H1.abf    : num  0.622 0.585 0.478 0.638 0.584 ...
 $ PP.H2.abf    : num  0.0167 0.0346 0.0834 0.0092 0.0336 ...
 $ PP.H3.abf    : num  0.0324 0.067 0.1616 0.0178 0.0651 ...
 $ PP.H4.abf    : num  0.00717 0.01221 0.03008 0.00592 0.0161 ...
 $ transcript_id: chr  "ENSG00000000457.14_7" "ENSG00000000457.14_7" "ENSG00000000457.14_7" "ENSG00000000457.14_7" ...
 $ type         : int  0 0 0 0 0 0 0 0 0 0 ...
 $ id           : chr  "VAR_1_169860528_C_A" "VAR_1_169860528_C_A" "VAR_1_169690313_ACT_A" "VAR_1_169852613_A_G" ...
 $ chrom        : int  1 1 1 1 1 1 1 1 1 1 ...
 $ pos          : int  169860528 169860528 169690313 169852613 169860528 169860528 169860528 169860528 169747630 169860528 ...
 $ ref          : chr  "C" "C" "ACT" "A" ...
 $ alt          : chr  "A" "A" "A" "G" ...
 $ af           : num  0.13 0.