/
getG_symDMatrix.Rd
89 lines (89 loc) · 3.33 KB
/
getG_symDMatrix.Rd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
\name{getG_symDMatrix}
\alias{getG_symDMatrix}
\title{Computes a Very Large Genomic Relationship Matrix}
\description{
Computes a positive semi-definite symmetric genomic relation matrix G=XX'
offering options for centering and scaling the columns of \code{X}
beforehand.
}
\usage{
getG_symDMatrix(X, center = TRUE, scale = TRUE, impute = TRUE, scaleG = TRUE,
minVar = 1e-05, blockSize = 5000L,
folderOut = paste0("symDMatrix_", randomString()), vmode = "double",
i = seq_len(nrow(X)), j = seq_len(ncol(X)), chunkSize = 5000L,
nCores = getOption("mc.cores", 2L), verbose = FALSE)
}
\arguments{
\item{X}{
A matrix-like object, typically the genotypes of a \code{BGData}
object.
}
\item{center}{
Either a logical value or a numeric vector of length equal to the
number of columns of \code{X}. If \code{FALSE}, no centering is done.
Defaults to \code{TRUE}.
}
\item{scale}{
Either a logical value or a numeric vector of length equal to the
number of columns of \code{X}. If \code{FALSE}, no scaling is done.
Defaults to \code{TRUE}.
}
\item{impute}{
Indicates whether missing values should be imputed. Defaults to
\code{TRUE}.
}
\item{scaleG}{
TRUE/FALSE whether xx' must be scaled.
}
\item{minVar}{
Columns with variance lower than this value will not be used in the
computation (only if \code{scale} is not \code{FALSE}).
}
\item{blockSize}{
The number of rows and columns of each block. If \code{NULL}, a single
block of the same length as \code{i} will be created. Defaults to 5000.
}
\item{folderOut}{
The path to the folder where to save the \code{symDMatrix} object.
Defaults to a random string prefixed with "symDMatrix_".
}
\item{vmode}{
vmode of \code{ff} objects.
}
\item{i}{
Indicates which rows of \code{X} should be used. Can be integer,
boolean, or character. By default, all rows are used.
}
\item{j}{
Indicates which columns of \code{X} should be used. Can be integer,
boolean, or character. By default, all columns are used.
}
\item{chunkSize}{
The number of columns of \code{X} that are brought into physical memory
for processing per core. If \code{NULL}, all columns of \code{X} are
used. Defaults to 5000.
}
\item{nCores}{
The number of cores (passed to \code{mclapply}). Defaults to the number
of cores as detected by \code{detectCores}.
}
\item{verbose}{
Whether progress updates will be posted. Defaults to \code{FALSE}.
}
}
\details{
Even very large genomic relationship matrices are supported by partitioning
\code{X} into blocks and calling \code{getG} on these blocks. This function
performs the block computations sequentially, which may be slow. In an HPC
environment, performance can be improved by manually distributing these
operations to different nodes.
}
\value{
A \code{symDMatrix} object.
}
\seealso{
\code{\link{multi-level-parallelism}} for more information on multi-level
parallelism. \code{\link[symDMatrix]{symDMatrix-class}} and
\code{\link{BGData-class}} for more information on the \code{BGData} class.
\code{\link{getG}} to learn more about the underlying method.
}