In [1]:
library(chromVAR)
library(SummarizedExperiment)
library(Matrix)
library(motifmatchr)
library(BiocParallel)
register(SerialParam())
library(BSgenome.NV.200.Nvec200)
library(BSgenome.hydra.AEP.v02)
library(dplyr)



Loading required package: MatrixGenerics

Loading required package: matrixStats


Attaching package: ‘MatrixGenerics’


The following objects are masked from ‘package:matrixStats’:

    colAlls, colAnyNAs, colAnys, colAvgsPerRowSet, colCollapse,
    colCounts, colCummaxs, colCummins, colCumprods, colCumsums,
    colDiffs, colIQRDiffs, colIQRs, colLogSumExps, colMadDiffs,
    colMads, colMaxs, colMeans2, colMedians, colMins, colOrderStats,
    colProds, colQuantiles, colRanges, colRanks, colSdDiffs, colSds,
    colSums2, colTabulates, colVarDiffs, colVars, colWeightedMads,
    colWeightedMeans, colWeightedMedians, colWeightedSds,
    colWeightedVars, rowAlls, rowAnyNAs, rowAnys, rowAvgsPerColSet,
    rowCollapse, rowCounts, rowCummaxs, rowCummins, rowCumprods,
    rowCumsums, rowDiffs, rowIQRDiffs, rowIQRs, rowLogSumExps,
    rowMadDiffs, rowMads, rowMaxs, rowMeans2, rowMedians, rowMins,
    rowOrderStats, rowProds, rowQuantiles, rowRanges, rowRanks,
    rowSdDiffs, rowSds, rowSums2, 

## N. vectensis

In [2]:
# peaks 
peaks <- getPeaks("./Peaks/Nv.merged_peaks.narrowPeak", sort_peaks = FALSE)

“Peaks are not equal width!Use resize(peaks, width = x, fix = "center") to make peaks equal in size, where x is the desired size of the peaks)”
“Peaks not sorted”


In [3]:
peaks

GRanges object with 44185 ranges and 0 metadata columns:
          seqnames      ranges strand
             <Rle>   <IRanges>  <Rle>
      [1]     chr1 30757-30973      *
      [2]     chr1 36089-36254      *
      [3]     chr1 54526-54996      *
      [4]     chr1 55222-55654      *
      [5]     chr1 82708-82980      *
      ...      ...         ...    ...
  [44181]   chrUn9 43781-44097      *
  [44182]   chrUn9 48110-48279      *
  [44183]   chrUn9 48357-48584      *
  [44184]   chrUn9 48840-49257      *
  [44185]   chrUn9 50007-50210      *
  -------
  seqinfo: 25 sequences from an unspecified genome; no seqlengths

In [4]:
# counts 
bamfiles <- c("./Cnidarian_bam/NV/ELAV_positive_1_final_q3_shifted.bam",
              "./Cnidarian_bam//NV/ELAV_positive_2_final_q3_shifted.bam", 
              "./Cnidarian_bam/NV/ELAV_negative_1_final_q3_shifted.bam",
              "./Cnidarian_bam/NV/ELAV_negative_2_final_q3_shifted.bam")

fragment_counts <- getCounts(bamfiles,
                             peaks, 
                             paired =  FALSE, 
                             by_rg = FALSE, 
                             format = "bam", 
                             colData = DataFrame(celltype = c("ELAV_positive_1", "ELAV_positive_2", "ELAV_negative_1", "ELAV_negative_2")))

Reading in file: /media/gary/Chew/NV_hydra/raw_fastq/NV/ELAV_positive_1_final_q3_shifted.bam

Reading in file: /media/gary/Chew/NV_hydra/raw_fastq/NV/ELAV_positive_2_final_q3_shifted.bam

Reading in file: /media/gary/Chew/NV_hydra/raw_fastq/NV/ELAV_negative_1_final_q3_shifted.bam

Reading in file: /media/gary/Chew/NV_hydra/raw_fastq/NV/ELAV_negative_2_final_q3_shifted.bam



In [5]:
counts_GC <- addGCBias(fragment_counts, genome = BSgenome.NV.200.Nvec200 )
counts_GC

class: RangedSummarizedExperiment 
dim: 44185 4 
metadata(0):
assays(1): counts
rownames: NULL
rowData names(1): bias
colnames(4): ELAV_positive_1_final_q3_shifted.bam
  ELAV_positive_2_final_q3_shifted.bam
  ELAV_negative_1_final_q3_shifted.bam
  ELAV_negative_2_final_q3_shifted.bam
colData names(2): celltype depth

In [6]:
counts_filtered <- filterPeaks(counts_GC, non_overlapping = TRUE)


In [7]:
### Let's test out if the annotation of peak x motif works or not 
annotation_df = read.table("./FilteredPeakMotifHits/Nv.filt_peak_motif_hits.tsv",sep="\t",header=TRUE,check.names=FALSE)
annotation_df = annotation_df[-c(1)]
annotation_df <- annotation_df %>% mutate_if(is.numeric,as.logical)


In [8]:
anno_ix <- getAnnotations(annotation_df, rowRanges = rowRanges(counts_filtered))

In [9]:
anno_ix

class: RangedSummarizedExperiment 
dim: 44185 1262 
metadata(0):
assays(1): annotationMatches
rownames: NULL
rowData names(1): bias
colnames(1262): JC_0000|BIM2(0.105) JC_0001|Pax2(0.321) ...
  WC_2050|ZSCAN4(5.930) WC_2051|SCRT2(4.452)
colData names(0):

In [10]:
dev <- computeDeviations(object = counts_filtered, 
                                 annotations = anno_ix)

In [14]:
assays(dev)$z

Unnamed: 0,ELAV_positive_1_final_q3_shifted.bam,ELAV_positive_2_final_q3_shifted.bam,ELAV_negative_1_final_q3_shifted.bam,ELAV_negative_2_final_q3_shifted.bam
JC_0000|BIM2(0.105),1.0591807,0.03768602,-1.82173742,1.1237257
JC_0001|Pax2(0.321),-12.3852948,-4.32313985,14.02765923,5.2672810
JC_0002|br(0.006),-2.7077133,-0.65209605,1.77733689,2.3270889
JC_0003|br(var.2)(0.000),-0.8231889,0.43503492,-0.57671666,1.5177029
JC_0004|HCM1(0.290),-7.2846960,-1.74670730,2.86865552,8.0003284
JC_0005|Foxq1(0.530),-3.0956893,-3.98193776,3.72706239,2.8345472
JC_0006|MOT2(0.189),-2.4258008,-1.48438022,-0.54035078,6.5371013
JC_0007|usp(0.031),-0.3841510,-0.94881464,0.81316248,0.2927752
JC_0008|Ddit3--Cebpa(0.667),1.7479453,0.21899377,-0.76491766,-1.9176472
JC_0009|PBF(0.050),-12.9625200,0.16234443,-1.59437414,20.8533100


In [12]:
write.csv(as.data.frame(assays(dev)$z), './ChromVarDeviations/NV_chromvarzscore.csv', quote=FALSE)

## H.vulgaris

In [None]:
## Let's try it on hydra 
peaks <- getPeaks("./Peaks/Hv.merged_peaks.narrowPeak", sort_peaks = FALSE)

In [None]:
# counts 
bamfiles <- c("./Cnidarian_bam/HV/nGreen1_final_shift.bam",
              "./Cnidarian_bam/HV/nGreen2_final_shift.bam",
              "./Cnidarian_bam/HV/nGreen3_final_shift.bam",
              "./Cnidarian_bam/HV/nGreen4_final_shift.bam",
              "./Cnidarian_bam/HV/PN1_final_shift.bam",
              "./Cnidarian_bam/HV/PN2_final_shift.bam",
              "./Cnidarian_bam/HV/AEP1_final_shift.bam",
              "./Cnidarian_bam/HV/AEP2_final_shift.bam",
              "./Cnidarian_bam/HV/AEP3_final_shift.bam")

fragment_counts <- getCounts(bamfiles,
                             peaks, 
                             paired =  TRUE, 
                             by_rg = FALSE, 
                             format = "bam", 
                             colData = DataFrame(celltype = c("nGreen1","nGreen2","nGreen3","nGreen4","PN1","PN2","AEP1","AEP2","AEP3")))

In [None]:
counts_GC <- addGCBias(fragment_counts, genome = BSgenome.hydra.AEP.v02 )
counts_GC

In [None]:
counts_filtered <- filterPeaks(counts_GC, non_overlapping = TRUE)


In [None]:
### Let's test out if the annotation of peak x motif works or not 
annotation_df = read.table("./FilteredPeakMotifHits/Hv.filt_peak_motif_hits.tsv",sep="\t",header=TRUE,check.names=FALSE)
annotation_df = annotation_df[-c(1)]
annotation_df <- annotation_df %>% mutate_if(is.numeric,as.logical)


In [None]:
anno_ix <- getAnnotations(annotation_df, rowRanges = rowRanges(counts_filtered))

In [None]:
dev <- computeDeviations(object = counts_filtered, 
                                 annotations = anno_ix)

In [None]:
write.csv(as.data.frame(assays(dev)$z), './ChromVARDeviations/HV_chromvarzscore.csv', quote=FALSE)