In [1]:
setwd("/frazer01/projects/CARDIPS/analysis/cardiac_eqtls")

source("script/packages.R"  )
source("script/input_data.R")
source("script/functions.R" )


In [2]:
gene_info               = fread("input/phenotypes/gene_info.txt"   , sep = "\t", header = TRUE, data.table = FALSE)
isof_info               = fread("input/phenotypes/isoform_info.txt", sep = "\t", header = TRUE, data.table = FALSE)
gene_info$transcript_id = gene_info$gene_id
promoters               = fread("/frazer01/reference/private/Gencode.v34lift37/promoters.bed", sep = "\t", header = FALSE, data.table = FALSE)


In [3]:
diffexp         = fread("pipeline/4.1.differential_expression/diffexp_cell.txt", sep = "\t", header = TRUE, data.table = FALSE)


In [4]:
tmpwd = "pipeline/5.1.expression_enrichments/cell_type_enrichment_only_cell"
dir.create(tmpwd, showWarnings = FALSE)

In [5]:
cell_types = sort(unique(diffexp$cell_type))

In [9]:
run_homer_tissue = function(cell, type, genes, isof_info, promoters)
{
    isoforms           = promoters[ promoters[,4] %in% isof_info[ isof_info$gene_id %in% genes | isof_info$transcript_id %in% genes, "transcript_id"],]
    colnames(isoforms) = c("chrom", "from", "to", "transcript_id")
    isoforms           = merge(isoforms, isof_info[,c("transcript_id", "strand")])
    isoforms$score     = 0
    isoforms           = isoforms[order(isoforms[,"chrom"], isoforms[,"from"], isoforms[,"to"]),]
    analysis_name      = paste(type, cell, sep = ".")
    bed_file           = paste(tmpwd, paste(analysis_name,             "bed", sep = "."), sep = "/")
    bed_file_unmerged  = paste(tmpwd, paste(analysis_name, "unmerged", "bed", sep = "."), sep = "/")
    motif_file         = "/frazer01/reference/public/hocomoco_v11/HOCOMOCOv11_core_HUMAN_mono_homer_format_0.0001.motif"

    fwrite(isoforms[,c("chrom", "from", "to", "transcript_id", "score", "strand")], bed_file_unmerged, sep = "\t", col.names = FALSE, row.names = FALSE)
    
    system(paste("bedtools merge -c 4 -o distinct", "-i", bed_file_unmerged, ">", bed_file))
    
    command = paste("findMotifsGenome.pl", bed_file, "hg19", paste(tmpwd, analysis_name, sep = "/"), 
                    "-mknown" , motif_file, 
                    "-mcheck" , motif_file, 
                    "-nomotif", 
                    "-size"   , 200,
                    ""
                   )
    
    return(command)
}

enrichment_epigenome = function(diffexp, cell, type, isof_info, promoters)
{
    message(paste(cell, type))
    
    x   = diffexp[diffexp$cell_type == cell & diffexp$type == type & diffexp$qval <= 0.05 & diffexp$beta > 0, ]
    out = run_homer_tissue(cell, type, unique(x$transcript_id), isof_info, promoters)
    
    return(out)
}

to_run = unlist(lapply(cell_types, function(cell)
{
    out2 = unlist(lapply(c("gene_tpm", "isoform_use"), function(type)
    {
        enrichment_epigenome(diffexp, cell, type, isof_info, promoters)
    }))
    return(out2)
}))


cibersort.regular.cardiac_muscle gene_tpm

cibersort.regular.cardiac_muscle isoform_use

cibersort.regular.cardiac_neuron gene_tpm

cibersort.regular.cardiac_neuron isoform_use

cibersort.regular.endocardial gene_tpm

cibersort.regular.endocardial isoform_use

cibersort.regular.endothelial gene_tpm

cibersort.regular.endothelial isoform_use

cibersort.regular.fibroblast gene_tpm

cibersort.regular.fibroblast isoform_use

cibersort.regular.immune gene_tpm

cibersort.regular.immune isoform_use

cibersort.regular.myofibroblast gene_tpm

cibersort.regular.myofibroblast isoform_use

cibersort.regular.smooth_muscle gene_tpm

cibersort.regular.smooth_muscle isoform_use



In [15]:
shfile = paste(tmpwd, "run_homer.sh", sep = ".")
writeLines(to_run, con = shfile, sep = "\n")

message(paste("bash", shfile))
message(paste("nohup", "bash", shfile, "&"))

bash pipeline/5.1.expression_enrichments/cell_type_enrichment_only_cell.run_homer.sh

nohup bash pipeline/5.1.expression_enrichments/cell_type_enrichment_only_cell.run_homer.sh &



In [10]:
motifs           = fread("/frazer01/reference/public/hocomoco_v11/HOCOMOCOv11_core_annotation_HUMAN_mono.tsv", sep = "\t", header = TRUE, data.table = FALSE)
motifs$gene_name = motifs[, "Transcription factor"]
colnames(motifs) = gsub(" ", "_", tolower(colnames(motifs)))


In [14]:
motifs[ motifs$model == "SP1_HUMAN.H11MO.0.A",]

Unnamed: 0_level_0,model,transcription_factor,model_length,quality,model_rank,consensus,model_release,data_source,best_auroc_(human),best_auroc_(mouse),peak_sets_in_benchmark_(human),peak_sets_in_benchmark_(mouse),aligned_words,tf_family,tf_subfamily,hgnc,entrezgene,uniprot_id,uniprot_ac,gene_name
Unnamed: 0_level_1,<chr>,<chr>,<int>,<chr>,<int>,<chr>,<chr>,<chr>,<dbl>,<dbl>,<int>,<int>,<int>,<chr>,<chr>,<int>,<chr>,<chr>,<chr>,<chr>
288,SP1_HUMAN.H11MO.0.A,SP1,22,A,0,nRGGGGCGGGGCSdSSvSSSvS,HOCOMOCOv11,ChIP-Seq,0.999738,0.9568786,53,11,499,Three-zinc finger Krüppel-related factors{2.3.1},Sp1-like factors{2.3.1.1},11205,6667,SP1_HUMAN,P08047,SP1
