-
Notifications
You must be signed in to change notification settings - Fork 1
/
plot_mds.R
167 lines (141 loc) · 5.55 KB
/
plot_mds.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
#' @title Multidimensional Scaling Plot
#' @description Plot samples on a two-dimensional scatterplot so that
#' distances on the plot approximate the typical log2 fold
#' changes between the samples.
#' @param screenR_Object The Object of the package
#' \code{\link{create_screenr_object}}
#' @param groups The vector that has to be used to fill the plot if NULL the
#' function will use the default groups slot in the object passed
#' as input.
#' @param alpha The opacity of the labels.
#' Possible value are in a range from 0 to 1.
#' @param size The dimension of the labels. The default value is 2.5
#' @param color The color of the labels. The default value is black
#' @importFrom ggplot2 geom_label labs
#' @importFrom ggplot2 scale_y_continuous
#' @importFrom limma plotMDS
#' @return The MDS Plot
#' @export
#' @examples
#' object <- get0("object", envir = asNamespace("ScreenR"))
#'
#' plot_mds(object)
plot_mds <- function(screenR_Object, groups = NULL, alpha = 0.8, size = 2.5,
color = "black") {
# We have to convert the screenR obj into an edgeR obj
DGEList <- create_edger_obj(screenR_Object)
# The Standard plotMDS
plotMDS <- limma::plotMDS(DGEList, plot = FALSE, ndim = 2)
# Create the Updated plot MDS
PLTdata <- data.frame(
Sample = rownames(plotMDS$distance.matrix.squared),
x = plotMDS$x, y = plotMDS$y
)
if (is.null(groups)) {
PLTdata$group <- DGEList$samples$group
} else {
PLTdata$group <- groups
}
plot <- ggplot2::ggplot(PLTdata, aes(
x = .data$x, y = .data$y,
fill = .data$group
)) +
ggplot2::geom_label(aes(label = .data$Sample),
color = color, size = size, alpha = alpha
) +
ggplot2::labs(x = "First Dimension", y = "Second Dimension")
return(plot)
}
#' @title Plot the explained variance by the PC
#' @description This function plot the explained variance by the
#' Principal Component analysis.
#' @param screenR_Object The ScreenR object obtained using the
#' \code{\link{create_screenr_object}}
#' @param cumulative A boolean value which indicates whether or not to plot
#' the cumulative variance. The default value is FALSE.
#' @param color The color to fill the barplot the default value is steelblue
#' @importFrom scales percent
#' @return The explained variance plot
#' @export
#' @examples
#' object <- get0("object", envir = asNamespace("ScreenR"))
#'
#' plot_explained_variance(object)
#'
#' # For the cumulative plot
#' plot_explained_variance(object, cumulative = TRUE)
plot_explained_variance <- function(screenR_Object, cumulative = FALSE,
color = "steelblue") {
PC <- compute_explained_variance(screenR_Object)
# Remove the Standard deviation row
PC <- filter(PC, .data$Name != "Standard deviation")
# Get only the numeric columns which corresponds to the PCs
numeric_col <- colnames(PC[, unlist(lapply(PC, is.numeric))])
# Transform the data in a longer format
PC <- tidyr::pivot_longer(
data = PC, cols = all_of(numeric_col),
names_to = "name"
)
PC <- dplyr::mutate(PC, name = factor(
x = .data$name,
levels = unique(.data$name)
))
plot <- NULL
if (cumulative) {
# Select only the Cumulative Proportion
PC <- dplyr::filter(PC, .data$Name == "Cumulative Proportion")
plot <- ggplot2::ggplot(PC, aes(x = .data$name, y = .data$value)) +
geom_bar(stat = "identity", fill = color, col = "black") +
geom_point() +
geom_line(aes(group = .data$Name)) +
scale_y_continuous(labels = scales::percent) +
labs(
x = NULL,
y = "Cumulative Expressed Variance (%)"
)
} else {
# Select only the Proportion of Variance
PC <- dplyr::filter(PC, .data$Name == "Proportion of Variance")
plot <- ggplot2::ggplot(PC, aes(x = .data$name, y = .data$value)) +
geom_bar(stat = "identity", fill = color, col = "black") +
geom_point() +
geom_line(aes(group = .data$Name)) +
scale_y_continuous(labels = percent) +
labs(x = NULL, y = "Expressed Variance (%)")
}
return(plot)
}
#' @title Compute explained variance
#' @description This is an internal function used to compute
#' the explained variance by each of the Principal Components.
#' @importFrom stats prcomp
#' @param screenR_Object The Object of the package
#' @return A data.frame containing all the information of the variance
#' expressed by the components
#' @keywords internal
#' @export
#' @examples
#' object <- get0("object", envir = asNamespace("ScreenR"))
#'
#' compute_explained_variance(object)
compute_explained_variance <- function(screenR_Object) {
data <- screenR_Object@count_table
# Get only the numeric columns
numeric_col <- unlist(lapply(data, is.numeric))
# The data for the PC corresponds only on the numeric column
data_PC <- data[, numeric_col]
# To compute the PC the features has to be columns
data_PC <- t(data_PC)
# Rename the columns
colnames(data_PC) <- data$Barcode
# Computing the PCS
PC <- prcomp(data_PC)
# Extract the importance ad create a data.frame
PC <- data.frame(summary(PC)$importance)
PC <- tibble::rownames_to_column(PC, var = "Name")
PC <- dplyr::mutate(PC, Name = factor(
x = .data$Name,
levels = unique(.data$Name)
))
return(PC)
}