In [None]:
# Install BiocManager if not already installed
if (!requireNamespace("BiocManager", quietly = TRUE)) {
  install.packages("BiocManager")
}

# Install CRAN packages
install.packages(c("knitr", "RColorBrewer", "stringr"))

# Install Bioconductor packages
BiocManager::install(c(
  "limma",
  "minfi",
  "IlluminaHumanMethylation450kanno.ilmn12.hg19",
  "IlluminaHumanMethylation450kmanifest",
  "missMethyl",
  "minfiData",
  "Gviz",
  "DMRcate",
  "methylationArrayAnalysis"
))


# Differential methylation analysis

## Environemnt setup

In [29]:
library(IlluminaHumanMethylationEPICanno.ilm10b4.hg19)


In [2]:
# Load the libraries
library(knitr)
library(limma)
library(minfi)
library(IlluminaHumanMethylation450kanno.ilmn12.hg19)
library(IlluminaHumanMethylation450kmanifest)
library(RColorBrewer)
library(missMethyl)
library(minfiData)
library(Gviz)
library(DMRcate)
library(stringr)

Loading required package: BiocGenerics


Attaching package: ‘BiocGenerics’


The following object is masked from ‘package:limma’:

    plotMA


The following objects are masked from ‘package:stats’:

    IQR, mad, sd, var, xtabs


The following objects are masked from ‘package:base’:

    Filter, Find, Map, Position, Reduce, anyDuplicated, aperm, append,
    as.data.frame, basename, cbind, colnames, dirname, do.call,
    duplicated, eval, evalq, get, grep, grepl, intersect, is.unsorted,
    lapply, mapply, match, mget, order, paste, pmax, pmax.int, pmin,
    pmin.int, rank, rbind, rownames, sapply, setdiff, sort, table,
    tapply, union, unique, unsplit, which.max, which.min


Loading required package: GenomicRanges

Loading required package: stats4

Loading required package: S4Vectors


Attaching package: ‘S4Vectors’


The following objects are masked from ‘package:base’:

    I, expand.grid, unname


Loading required package: IRanges

Loading required package: GenomeInfoDb

Loading 

## Loading the data

In [73]:
# get the 450k annotation data
ann450k <- getAnnotation(IlluminaHumanMethylation450kanno.ilmn12.hg19)
dim(ann450k)

In [74]:
# write.csv(ann450k, file = paste(dataDirectory, "48639-non-specific-probes-Illumina450k.csv", sep='/'))

In [72]:
# set up a path to the data directory
# batch <- '207881760037'
# dataDirectory <- paste('../Dataset/Methylomics/FIN13296/iDAT', batch, sep='/')
dataDirectory <- '../Dataset/Methylomics/FIN13296/iDAT'

# read in the sample sheet for the experiment
targets <- read.metharray.sheet(dataDirectory, pattern="Sample_Sheet.csv")
head(targets)

[read.metharray.sheet] Found the following CSV files:



 [1] "../Dataset/Methylomics/FIN13296/iDAT/207881760004/Sample_Sheet.csv"
 [2] "../Dataset/Methylomics/FIN13296/iDAT/207881760037/Sample_Sheet.csv"
 [3] "../Dataset/Methylomics/FIN13296/iDAT/207881760097/Sample_Sheet.csv"
 [4] "../Dataset/Methylomics/FIN13296/iDAT/207881760106/Sample_Sheet.csv"
 [5] "../Dataset/Methylomics/FIN13296/iDAT/207881760108/Sample_Sheet.csv"
 [6] "../Dataset/Methylomics/FIN13296/iDAT/207881760117/Sample_Sheet.csv"
 [7] "../Dataset/Methylomics/FIN13296/iDAT/207881760119/Sample_Sheet.csv"
 [8] "../Dataset/Methylomics/FIN13296/iDAT/207881760120/Sample_Sheet.csv"
 [9] "../Dataset/Methylomics/FIN13296/iDAT/207881760121/Sample_Sheet.csv"
[10] "../Dataset/Methylomics/FIN13296/iDAT/207881760123/Sample_Sheet.csv"
[11] "../Dataset/Methylomics/FIN13296/iDAT/207881760128/Sample_Sheet.csv"
[12] "../Dataset/Methylomics/FIN13296/iDAT/207881760129/Sample_Sheet.csv"


Unnamed: 0_level_0,Sample_ID,Sample_Well,Sample_Plate,Sample_Name,Pool_ID,Sample_Group,Array,Slide,Basename
Unnamed: 0_level_1,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>
1,FIN13295A89,A12,207881760004_R01C01_FIN13295A89_A12,I_D_102_DNA,,207881760004_R01C01,R01C01,207881760004,../Dataset/Methylomics/FIN13296/iDAT/207881760004/207881760004_R01C01/207881760004_R01C01
2,FIN13295A90,B12,207881760004_R02C01_FIN13295A90_B12,I_D_103_DNA,,207881760004_R02C01,R02C01,207881760004,../Dataset/Methylomics/FIN13296/iDAT/207881760004/207881760004_R02C01/207881760004_R02C01
3,FIN13295A91,C12,207881760004_R03C01_FIN13295A91_C12,I_D_104_DNA,,207881760004_R03C01,R03C01,207881760004,../Dataset/Methylomics/FIN13296/iDAT/207881760004/207881760004_R03C01/207881760004_R03C01
4,FIN13295A92,D12,207881760004_R04C01_FIN13295A92_D12,I_D_105_DNA,,207881760004_R04C01,R04C01,207881760004,../Dataset/Methylomics/FIN13296/iDAT/207881760004/207881760004_R04C01/207881760004_R04C01
5,FIN13295A93,E12,207881760004_R05C01_FIN13295A93_E12,I_D_106_DNA,,207881760004_R05C01,R05C01,207881760004,../Dataset/Methylomics/FIN13296/iDAT/207881760004/207881760004_R05C01/207881760004_R05C01
6,FIN13295A94,F12,207881760004_R06C01_FIN13295A94_F12,I_D_107_DNA,,207881760004_R06C01,R06C01,207881760004,../Dataset/Methylomics/FIN13296/iDAT/207881760004/207881760004_R06C01/207881760004_R06C01


In [68]:
# set up a path to the data directory
dataDirectory <- system.file("extdata", package = "methylationArrayAnalysis")
targets <- read.metharray.sheet(dataDirectory, pattern="SampleSheet.csv")
targets

[read.metharray.sheet] Found the following CSV files:



[1] "/opt/anaconda3/envs/Idimension/lib/R/library/methylationArrayAnalysis/extdata/SampleSheet.csv"


Sample_Name,Sample_Well,Sample_Source,Sample_Group,Sample_Label,Pool_ID,Array,Slide,Basename
<int>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>
1,A1,M28,naive,naive,,R01C01,6264509100,/opt/anaconda3/envs/Idimension/lib/R/library/methylationArrayAnalysis/extdata/6264509100/6264509100_R01C01
2,B1,M28,rTreg,rTreg,,R02C01,6264509100,/opt/anaconda3/envs/Idimension/lib/R/library/methylationArrayAnalysis/extdata/6264509100/6264509100_R02C01
3,C1,M28,act_naive,act_naive,,R03C01,6264509100,/opt/anaconda3/envs/Idimension/lib/R/library/methylationArrayAnalysis/extdata/6264509100/6264509100_R03C01
4,D1,M29,naive,naive,,R04C01,6264509100,/opt/anaconda3/envs/Idimension/lib/R/library/methylationArrayAnalysis/extdata/6264509100/6264509100_R04C01
5,E1,M29,act_naive,act_naive,,R05C01,6264509100,/opt/anaconda3/envs/Idimension/lib/R/library/methylationArrayAnalysis/extdata/6264509100/6264509100_R05C01
6,F1,M29,act_rTreg,act_rTreg,,R06C01,6264509100,/opt/anaconda3/envs/Idimension/lib/R/library/methylationArrayAnalysis/extdata/6264509100/6264509100_R06C01
7,G1,M30,naive,naive,,R01C02,6264509100,/opt/anaconda3/envs/Idimension/lib/R/library/methylationArrayAnalysis/extdata/6264509100/6264509100_R01C02
8,H1,M30,rTreg,rTreg,,R02C02,6264509100,/opt/anaconda3/envs/Idimension/lib/R/library/methylationArrayAnalysis/extdata/6264509100/6264509100_R02C02
9,A2,M30,act_naive,act_naive,,R03C02,6264509100,/opt/anaconda3/envs/Idimension/lib/R/library/methylationArrayAnalysis/extdata/6264509100/6264509100_R03C02
10,B2,M30,act_rTreg,act_rTreg,,R04C02,6264509100,/opt/anaconda3/envs/Idimension/lib/R/library/methylationArrayAnalysis/extdata/6264509100/6264509100_R04C02


In [69]:
# read in the raw data from the IDAT files
rgSet <- read.metharray.exp(targets=targets)
rgSet


class: RGChannelSet 
dim: 622399 11 
metadata(0):
assays(2): Green Red
rownames(622399): 10600313 10600322 ... 74810490 74810492
rowData names(0):
colnames(11): 6264509100_R01C01 6264509100_R02C01 ... 6264509100_R04C02
  5975827018_R06C02
colData names(10): Sample_Name Sample_Well ... Basename filenames
Annotation
  array: IlluminaHumanMethylation450k
  annotation: ilmn12.hg19

In [70]:
annotation(rgSet) 

In [75]:
# give the samples descriptive names
targets$ID <- paste(targets$Sample_Group, targets$Sample_Name, sep=".")
sampleNames(rgSet) <- targets$ID
rgSet

ERROR: Error in `rownames<-`(`*tmp*`, value = value[[2L]]): invalid rownames length


## Quality control

In [65]:
# normalize the data; this results in a GenomicRatioSet object
mSetSq <- preprocessQuantile(rgSet) 

[preprocessQuantile] Mapping to genome.



ERROR: Error: [matrixStats (>= 1.2.0)] useNames = NA is defunct. Instead, specify either useNames = TRUE or useNames = FALSE. See also ?matrixStats::matrixStats.options
