/
pat_internalFit.R
175 lines (146 loc) · 5.76 KB
/
pat_internalFit.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
#' @export
#' @importFrom rlang .data
#' @importFrom stats lm
#' @import dplyr
#' @import graphics
#'
#' @title Linear model fitting of channel A and B time series data
#'
#' @param pat PurpleAir Timeseries \emph{pat} object.
#' @param showPlot Logical specifying whether to generate a model fit plot.
#' @param size Size of points.
#' @param a_color Color of time series channel A points.
#' @param b_color Color of time series channel B points.
#' @param alpha Opacity of points.
#' @param lr_shape Symbol to use for linear regression points.
#' @param lr_color Color of linear regression points.
#' @param lr_lwd Width of linear regression line.
#' @param lr_lcolor Color of linear regression line.
#' @param lr_lalpha Opacity of linear regression line.
#' @param ts_shape Symbol to use for time series points.
#' @param xylim Vector of (lo,hi) limits used as limits on the correlation plot
#' axes -- useful for zooming in.
#'
#' @description Uses a linear model to fit data from channel B to data from
#' channel A.
#'
#' A diagnostic plot is produced if \code{showPlot = TRUE}.
#'
#' @return A linear model, fitting the \code{pat} B channel readings to A channel
#' readings.
#'
#' @examples
#' library(AirSensor)
#'
#' example_pat %>%
#' pat_internalFit()
pat_internalFit <- function(
pat = NULL,
showPlot = TRUE,
size = 1,
a_color = "red",
b_color = "blue",
alpha = 0.25,
lr_shape = 15,
lr_color = "black",
lr_lwd = 1.5,
lr_lcolor = "tomato",
lr_lalpha = 0.45,
ts_shape = 1,
xylim = NULL
) {
# ----- Validate parameters --------------------------------------------------
MazamaCoreUtils::stopIfNull(pat)
if ( !pat_isPat(pat) )
stop("Parameter 'pat' is not a valid 'pa_timeseries' object.")
if ( pat_isEmpty(pat) )
stop("Parameter 'pat' has no data.")
# Remove any duplicate data records
pat <- pat_distinct(pat)
# For easier access
meta <- pat$meta
data <- pat$data
if ( is.null(xylim) ) {
dataMin <- min(c(0, data$pm25_A, data$pm25_B), na.rm = TRUE)
dataMax <- max(c(data$pm25_A, data$pm25_B), na.rm = TRUE)
xylim <- c(dataMin, dataMax)
}
a_pm25 <-
data %>%
dplyr::select(.data$datetime, .data$pm25_A)
b_pm25 <-
data %>%
dplyr::select(.data$datetime, .data$pm25_B)
# Create a tidy dataframe appropriate for ggplot
tidy_data <-
dplyr::full_join(a_pm25, b_pm25, by = "datetime") %>%
tidyr::gather("channel", "pm25", -.data$datetime)
# ----- Linear model ---------------------------------------------------------
# Model A as a function of B (data should lie on a line)
model <- lm(data$pm25_A ~ data$pm25_B, subset = NULL, weights = NULL)
slope <- as.numeric(model$coefficients[2]) # as.numeric() to remove name
intercept <- as.numeric(model$coefficients[1])
r_squared <- summary(model)$r.squared
# Label for linear fit
equationLabel <-
ggplot2::annotate(
geom = "text",
x = 0.75 * xylim[2],
y = c(0.4, 0.3, 0.2) * xylim[2],
label = c(paste0("Slope = ", round(slope, digits = 2)),
paste0("Intercept = ", round(intercept, digits = 1)),
paste0("R\U00B2 = ", round(r_squared, digits = 3))) )
# ----- Plot -----------------------------------------------------------------
if ( showPlot ) {
timezone <- pat$meta$timezone[1]
year <- strftime(pat$data$datetime[1], "%Y", tz=timezone)
# LH Linear regression plot
lr_plot <-
pat$data %>%
ggplot2::ggplot(ggplot2::aes(x = .data$pm25_B, y = .data$pm25_A)) +
ggplot2::geom_point(size = size,
shape = lr_shape,
color = lr_color,
alpha = alpha) +
ggplot2::geom_smooth(formula = y ~ x, method = "lm",
size = 0, alpha = 0.45) +
ggplot2::stat_smooth(formula = y ~ x, method = "lm", geom = "line",
color = lr_lcolor, alpha = lr_lalpha, size = lr_lwd) +
ggplot2::labs(title = "Channel Linear Regression",
x = "Channel B (\u03bcg / m\u00b3)",
y = "Channel A (\u03bcg / m\u00b3)") +
ggplot2::theme_bw() +
ggplot2::xlim(xylim) +
ggplot2::ylim(xylim) +
ggplot2::coord_fixed() + # square aspect ratio
equationLabel
# Set time axis to sensor local time
tidy_data$datetime <- lubridate::with_tz(tidy_data$datetime,
tzone = timezone)
ts_plot <-
tidy_data %>%
ggplot2::ggplot() +
ggplot2::geom_point(ggplot2::aes(x = .data$datetime,
y = .data$pm25,
color = .data$channel),
size = size,
shape = ts_shape,
alpha = alpha) +
ggplot2::scale_color_manual(values = c(a_color, b_color),
name = "Channel",
labels = c("A", "B")) +
ggplot2::ylim(xylim) +
ggplot2::ggtitle(expression("PM"[2.5])) +
ggplot2::xlab(year) + ggplot2::ylab("\u03bcg / m\u00b3")
# Gather and arrange the linear regression and time series plots with a banner title
bannerText <- paste0("A / B Channel Comparison -- ", pat$meta$label)
bannerGrob <- grid::textGrob(bannerText,
just = "left",
x = 0.025,
gp = grid::gpar(fontsize = 18, col="grey50"))
plot <- gridExtra::grid.arrange(bannerGrob, lr_plot, ts_plot,
ncol = 1, heights = c(1, 6, 3))
}
# ----- Return ---------------------------------------------------------------
return(invisible(model))
}