In [None]:
####
#
# Adapted from:
# https://github.com/LieberInstitute/brainseq_phase2.git
#
#
#

### libraries
library(SummarizedExperiment)
library(jaffelab)
library(MatrixEQTL)
library(sva)

In [None]:
######################
### load data ####
######################

load("/ceph/projects/v3_phase3_paper/inputs/phase3/_m/count_data/caudate_brainseq_phase3_hg38_rseTx_merged_n464.rda")
load("/ceph/projects/v3_phase3_paper/inputs/phase3/_m/count_data/caudate_brainseq_phase3_hg38_rseJxn_merged_n464.rda")
load("/ceph/projects/v3_phase3_paper/inputs/phase3/_m/count_data/caudate_brainseq_phase3_hg38_rseExon_merged_n464.rda")
load("/ceph/projects/v3_phase3_paper/inputs/phase3/_m/count_data/caudate_brainseq_phase3_hg38_rseGene_merged_n464.rda")

In [None]:
#caudate382rnum = read.delim('../_h/382_rnum.txt', header = FALSE, stringsAsFactors = FALSE)$V1

In [None]:
## filter, based on brainseq phase2
rse_gene = rse_gene[rowData(rse_gene)$meanExprs > 0.2,]
rse_exon = rse_exon[rowData(rse_exon)$meanExprs > 0.2,]

rowRanges(rse_jxn)$Length <- 100
jRp10m = recount::getRPKM(rse_jxn, 'Length')
rse_jxn = rse_jxn[rowMeans(jRp10m) > 0.4,]

rse_tx = rse_tx[rowMeans(assays(rse_tx)$tpm) > 0.4,]

## keep adult samples & correct region
keepInd = which(colData(rse_gene)$Age > 13)
rse_gene = rse_gene[,keepInd]
rse_exon = rse_exon[,keepInd]
rse_jxn = rse_jxn[,keepInd]
rse_tx = rse_tx[,keepInd]

## extract pd and rpkms
pd = colData(rse_gene)
geneRpkm = recount::getRPKM(rse_gene, "Length")
exonRpkm = recount::getRPKM(rse_exon, "Length")
jxnRp10m = recount::getRPKM(rse_jxn, 'Length')
txTpm = assays(rse_tx)$tpm


# save expression
geneExpression = log2(geneRpkm+1)
exonExpression = log2(exonRpkm+1)
jxnExpression = log2(jxnRp10m+1)
txExpression = log2(txTpm+1)

save(geneExpression, exonExpression, jxnExpression, txExpression, file="expression.rda")

######################
### snp data ####
######################

In [None]:
## load SNP data
load("/ceph/projects/v3_phase3_paper/inputs/phase3/_m/genotype_data/BrainSeq_Phase3_Caudate_RiboZero_Genotypes_n464.rda")

### make mds and snp dimensions equal to N
###(repeat rows or columns for BrNum replicates)
mds = mds[pd$RNum,]
snp = snp[,pd$RNum]

## drop SNPs not mapping to hg38
keepIndex = which(!is.na(snpMap$chr_hg38))
snpMap = snpMap[keepIndex,]
snp = snp[keepIndex,]

In [None]:
######################
# statistical model ##
######################

pd$Dx = factor(pd$Dx,levels = c("Control", "Schizo", "Bipolar"))

mod = model.matrix(~Dx + Sex + as.matrix(mds[,1:5]), data = pd)

colnames(mod)[5:9] = colnames(mds)[1:5]

######################
# create SNP objects #
######################

theSnps = SlicedData$new(as.matrix(snp))
theSnps$ResliceCombined(sliceSize = 50000)

snpspos = snpMap[,c("SNP","chr_hg38","pos_hg38")]
colnames(snpspos) = c("name","chr","pos")

In [None]:
#######################
####### do PCA ########
#######################

pcaGene = prcomp(t(log2(geneRpkm+1)))
kGene = num.sv(log2(geneRpkm+1), mod)
genePCs = pcaGene$x[,1:kGene]

pcaExon = prcomp(t(log2(exonRpkm+1)))
kExon = num.sv(log2(exonRpkm+1), mod, vfilter=50000)
exonPCs = pcaExon$x[,1:kExon]

pcaJxn = prcomp(t(log2(jxnRp10m+1)))
kJxn = num.sv(log2(jxnRp10m+1), mod, vfilter=50000)
jxnPCs = pcaJxn$x[,1:kJxn]

pcaTx = prcomp(t(log2(txTpm+1)))
kTx = num.sv(log2(txTpm+1), mod, vfilter=50000)
txPCs = pcaTx$x[,1:kTx]

##save(genePCs, file = "pca_rdas/pcs_caudate_4features_filtered_over13.rda")

dir.create('pca_rdas')
save(genePCs, exonPCs, jxnPCs, txPCs, 
     file="pca_rdas/pcs_caudate_4features_filtered_over13.rda")

In [None]:
##################
### Covariates ###
##################

load("pca_rdas/pcs_caudate_4features_filtered_over13.rda")

covsGene0 = t(cbind(mod[,-1],genePCs))
covsExon0 = t(cbind(mod[,-1],exonPCs))
covsJxn0 = t(cbind(mod[,-1],jxnPCs))
covsTx0 = t(cbind(mod[,-1],txPCs))

save(covsGene0, covsExon0, covsJxn0, covsTx0, file="covariates.rda")

covsGene = SlicedData$new(covsGene0)
covsExon = SlicedData$new(covsExon0)
covsJxn = SlicedData$new(covsJxn0)
covsTx = SlicedData$new(covsTx0)


In [None]:
##########################
### feature annotation ###
##########################

###### gene level
posGene = as.data.frame(rowRanges(rse_gene))[,1:3]
posGene$name = rownames(posGene)
posGene = posGene[,c(4,1:3)]

##### exon level 
posExon = as.data.frame(rowRanges(rse_exon))[,1:3]
posExon$name = rownames(posExon)
posExon = posExon[,c(4,1:3)]

##### junction level 
posJxn = as.data.frame(rowRanges(rse_jxn))[,1:3]
posJxn$name = rownames(posJxn)
posJxn = posJxn[,c(4,1:3)]
names(posJxn)[2:4] = c("Chr", "Start","End")

##### transcript level 
posTx = as.data.frame(rowRanges(rse_tx))[,1:3]
posTx$name = rownames(posTx)
posTx = posTx[,c(4,1:3)]
names(posTx)[2:4] = c("Chr", "Start","End")


#############################
### sliced expression data ##
geneSlice = SlicedData$new(geneExpression)
exonSlice = SlicedData$new(exonExpression)
jxnSlice = SlicedData$new(jxnExpression)
txSlice = SlicedData$new(txExpression)

geneSlice$ResliceCombined(sliceSize = 5000)
exonSlice$ResliceCombined(sliceSize = 5000)
jxnSlice$ResliceCombined(sliceSize = 5000)
txSlice$ResliceCombined(sliceSize = 5000)

In [None]:
##########################
### Run EQTLs ############
##########################
print("Begin eQTL analysis")

meGene = Matrix_eQTL_main(snps=theSnps, gene = geneSlice, 
    cvrt = covsGene, output_file_name.cis =  "cis_eqtls_genes.ctxt" ,
    pvOutputThreshold.cis = 0.01,  pvOutputThreshold=0,
    snpspos = snpspos, genepos = posGene, 
    useModel = modelLINEAR, cisDist=5e5,
    pvalue.hist = 100,min.pv.by.genesnp = TRUE)	

meExon = Matrix_eQTL_main(snps=theSnps, gene = exonSlice, 
    cvrt = covsExon, output_file_name.cis =  "cis_eqtl_exons.ctxt" ,
    pvOutputThreshold.cis = 0.01,  pvOutputThreshold=0,
    snpspos = snpspos, genepos = posExon, 
    useModel = modelLINEAR, cisDist=5e5,
    pvalue.hist = 100,min.pv.by.genesnp = TRUE)

meJxn = Matrix_eQTL_main(snps=theSnps, gene = jxnSlice, 
    cvrt = covsJxn, output_file_name.cis =  "cis_eqtl_junctions.ctxt" ,
    pvOutputThreshold.cis = 0.01,  pvOutputThreshold=0,
    snpspos = snpspos, genepos = posJxn, 
    useModel = modelLINEAR, cisDist=5e5,
    pvalue.hist = 100,min.pv.by.genesnp = TRUE)	

meTx = Matrix_eQTL_main(snps=theSnps, gene = txSlice, 
    cvrt = covsTx, output_file_name.cis =  "cis_eqtl_trasncripts.ctxt" ,
    pvOutputThreshold.cis = 0.01,  pvOutputThreshold=0,
    snpspos = snpspos, genepos = posTx, 
    useModel = modelLINEAR, cisDist=5e5,
    pvalue.hist = 100,min.pv.by.genesnp = TRUE)	

dir.create('eqtl_tables')
save(meGene, meExon, meJxn, meTx,
     file="eqtl_tables/matrixEqtl_output_caudate_4features_p01.rda")

#save(meGene,
#    file="eqtl_tables/matrixEqtl_output_caudate_4features_p01.rda")

In [None]:
######################
###### annotate ######

load("eqtl_tables/matrixEqtl_output_caudate_4features_p01.rda")

# extract
geneEqtl = meGene$cis$eqtls
geneEqtl$gene = as.character(geneEqtl$gene)
geneEqtl$snps = as.character(geneEqtl$snps)

exonEqtl = meExon$cis$eqtls
exonEqtl$gene = as.character(exonEqtl$gene)
exonEqtl$snps = as.character(exonEqtl$snps)

jxnEqtl = meJxn$cis$eqtls
jxnEqtl$gene = as.character(jxnEqtl$gene)
jxnEqtl$snps = as.character(jxnEqtl$snps)

txEqtl = meTx$cis$eqtls
txEqtl$gene = as.character(txEqtl$gene)
txEqtl$snps = as.character(txEqtl$snps)

################################
# add gene annotation info #####
################################

geneEqtl$Symbol = rowRanges(rse_gene)$Symbol[match(geneEqtl$gene, rownames(rse_gene))]
geneEqtl$EnsemblGeneID = rowRanges(rse_gene)$ensemblID[match(geneEqtl$gene, rownames(rse_gene))]
geneEqtl$Type = "Gene"
geneEqtl$Class = "InGen"
geneEqtl = DataFrame(geneEqtl)
# geneEqtl$gene_type = rowRanges(rse_gene)$gene_type[match(geneEqtl$gene, rownames(rse_gene))]

exonEqtl$Symbol = rowRanges(rse_exon)$Symbol[match(exonEqtl$gene, rownames(rse_exon))]
exonEqtl$EnsemblGeneID = rowRanges(rse_exon)$ensemblID[match(exonEqtl$gene, rownames(rse_exon))]
exonEqtl$Type = "Exon"
exonEqtl$Class = "InGen"
exonEqtl = DataFrame(exonEqtl)
# exonEqtl$gene_type = rowRanges(rse_exon)$gene_type[match(exonEqtl$gene, rownames(rse_exon))]

jxnEqtl$Symbol = rowRanges(rse_jxn)$newGeneSymbol[match(jxnEqtl$gene, rownames(rse_jxn))]
jxnEqtl$EnsemblGeneID = rowRanges(rse_jxn)$newGeneID[match(jxnEqtl$gene, rownames(rse_jxn))]
jxnEqtl$Type = "Jxn"
jxnEqtl$Class = rowRanges(rse_jxn)$Class[match(jxnEqtl$gene, rownames(rse_jxn))]
jxnEqtl = DataFrame(jxnEqtl)
# jxnEqtl$gene_type = rowRanges(rse_jxn)$gene_type[match(jxnEqtl$gene, rownames(rse_jxn))]

txEqtl$Symbol = rowRanges(rse_tx)$gene_name[match(txEqtl$gene, rownames(rse_tx))]
txEqtl$EnsemblGeneID = ss(rowRanges(rse_tx)$gene_id[match(txEqtl$gene, rownames(rse_tx))],"\\.",1)
txEqtl$Type = "Tx"
txEqtl$Class = "InGen"
txEqtl = DataFrame(txEqtl)
# txEqtl$gene_type = rowRanges(rse_tx)$gene_type[match(txEqtl$gene, rownames(rse_tx))]


# merge
allEqtl = rbind(geneEqtl, exonEqtl, jxnEqtl, txEqtl)
allEqtl$gencodeTx = CharacterList(c(as.list(rowRanges(rse_gene)$gencodeTx[match(geneEqtl$gene, 
    rownames(rse_gene))]),
    as.list(rowRanges(rse_exon)$gencodeTx[match(exonEqtl$gene, rownames(rse_exon))]),
    as.list(rowRanges(rse_jxn)$gencodeTx[match(jxnEqtl$gene, rownames(rse_jxn))]),
    as.list(txEqtl$gene)))

## add snp rs number
allEqtl$snpRsNum = snpMap$name[match(allEqtl$snps, snpMap$SNP)]

##geneEqtl$snpRsNum = snpMap$name[match(geneEqtl$snps, snpMap$SNP)]


## save
save(allEqtl, file="eqtl_tables/mergedEqtl_output_caudate_4features.rda",compress=TRUE)

## significance filter
allEqtl_signif = allEqtl[allEqtl$FDR < 0.05,]
save(allEqtl_signif, file="eqtl_tables/mergedEqtl_output_caudate_4features_FDR05.rda",compress=TRUE)

## significance filter
allEqtl_signif = allEqtl[allEqtl$FDR < 0.01,]
save(allEqtl_signif, file="eqtl_tables/mergedEqtl_output_caudate_4features_FDR01.rda",compress=TRUE)

##save(geneEqtl, file="eqtl_tables/gene_eqtl.rda")
##geneEqtl_signif = geneEqtl[geneEqtl$FDR < 0.05,]
##save(geneEqtl_signif, file="eqtl_tables/gene_eqtl_fdr05.rda")
