-
Notifications
You must be signed in to change notification settings - Fork 19
/
ci_single_mean_theo.R
78 lines (61 loc) 路 2.31 KB
/
ci_single_mean_theo.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
#' Confidence interval for two proportions, using CLT based T-interval
#'
#' Helper for the `inference()` function
#'
#' @param y Response variable, can be numerical or categorical
#' @param conf_level confidence level, value between 0 and 1
#' @param y_name Name of response variable as a character string (passed
#' from inference function)
#' @param show_var_types print variable types, set to verbose by default
#' @param show_summ_stats print summary stats, set to verbose by default
#' @param show_eda_plot print EDA plot, set to verbose by default
#' @param show_inf_plot print inference plot, set to verbose by default
#' @param show_res print results, set to verbose by default
ci_single_mean_theo <- function(y, conf_level, y_name,
show_var_types, show_summ_stats, show_res,
show_eda_plot, show_inf_plot){
# calculate sample size
n <- length(y)
# calculate x-bar
y_bar <- mean(y)
# define degrees of freedom
df <- n - 1
# find percentile associated with critical value
perc_crit_value <- conf_level + ((1 - conf_level) / 2)
# find critical value
t_star <- qt(perc_crit_value, df)
# calculate s
s <- sd(y)
# calculate SE
se <- s / sqrt(n)
# calculate ME
me <- t_star * se
# calculate CI
ci <- y_bar + c(-1, 1)* me
# print variable types
if(show_var_types == TRUE){
cat("Single numerical variable\n")
}
# print summary statistics
if(show_summ_stats == TRUE){
cat(paste0("n = ", n, ", y-bar = ", round(y_bar, 4), ", s = ", round(s, 4), "\n"))
}
# print results
if(show_res == TRUE){
conf_level_perc = conf_level * 100
cat(paste0(conf_level_perc, "% CI: (", round(ci[1], 4), " , ", round(ci[2], 4), ")\n"))
}
# eda_plot
d_eda <- data.frame(y = y)
eda_plot <- ggplot2::ggplot(data = d_eda, ggplot2::aes(x = y), environment = environment()) +
ggplot2::geom_histogram(fill = "#8FDEE1", binwidth = diff(range(y)) / 20) +
ggplot2::xlab(y_name) +
ggplot2::ylab("") +
ggplot2::ggtitle("Sample Distribution") +
ggplot2::geom_vline(xintercept = y_bar, col = "#1FBEC3", lwd = 1.5)
# print plots
if(show_eda_plot){ print(eda_plot) }
if(show_inf_plot){ warning("No inference plot available.", call. = FALSE) }
# return
return(list(df = df, SE = se, ME = me, CI = ci))
}