-
Notifications
You must be signed in to change notification settings - Fork 0
/
5.1-CI_mean.R
62 lines (50 loc) · 3.2 KB
/
5.1-CI_mean.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
#This scripts will simulate a single sample, and calculate the mean
#The gold background illustrates the 95% prediction interval (PI), The orange background illustrates the 95% confidence interval
#The black dotted line illustrates the true mean. 95% of the CI should contain the true mean
#Then, a simulation is run. The simulations generates a large number of additional samples
#The simulation returns the number of CI that contain the mean, and returns the % of means from future studies that fall within the 95% of the original study
#This is known as the capture percentage. It differs from (and is lower than) the confidence interval
if(!require(ggplot2)){install.packages('ggplot2')}
library(ggplot2)
if(!require(Rcpp)){install.packages('Rcpp')}
library(Rcpp)
n=20 #set sample size
nSims<-100000 #set number of simulations
x<-rnorm(n = n, mean = 100, sd = 15) #create sample from normal distribution
#95% Confidence Interval
CIU<-mean(x)+qt(0.975, df = n-1)*sd(x)*sqrt(1/n)
CIL<-mean(x)-qt(0.975, df = n-1)*sd(x)*sqrt(1/n)
#95% Prediction Interval
PIU<-mean(x)+qt(0.975, df = n-1)*sd(x)*sqrt(1+1/n)
PIL<-mean(x)-qt(0.975, df = n-1)*sd(x)*sqrt(1+1/n)
#plot data
#png(file="CI_mean.png",width=2000,height=2000, res = 300)
ggplot(as.data.frame(x), aes(x)) +
geom_rect(aes(xmin=PIL, xmax=PIU, ymin=0, ymax=Inf), fill="gold") + #draw orange CI area
geom_rect(aes(xmin=CIL, xmax=CIU, ymin=0, ymax=Inf), fill="#E69F00") + #draw orange CI area
geom_histogram(colour="black", fill="grey", aes(y=..density..), binwidth=2) +
xlab("IQ") + ylab("number of people") + ggtitle("Data") + theme_bw(base_size=20) +
theme(panel.grid.major.x = element_blank(), axis.text.y = element_blank(), panel.grid.minor.x = element_blank()) +
geom_vline(xintercept=mean(x), colour="black", linetype="dashed", size=1) +
coord_cartesian(xlim=c(50,150)) + scale_x_continuous(breaks=c(50,60,70,80,90,100,110,120,130,140,150)) +
annotate("text", x = mean(x), y = 0.02, label = paste("Mean = ",round(mean(x)),"\n","SD = ",round(sd(x)),sep=""), size=6.5)
#dev.off()
#Simulate Confidence Intervals
CIU_sim<-numeric(nSims)
CIL_sim<-numeric(nSims)
mean_sim<-numeric(nSims)
for(i in 1:nSims){ #for each simulated experiment
x<-rnorm(n = n, mean = 100, sd = 15) #create sample from normal distribution
CIU_sim[i]<-mean(x)+qt(0.975, df = n-1)*sd(x)*sqrt(1/n)
CIL_sim[i]<-mean(x)-qt(0.975, df = n-1)*sd(x)*sqrt(1/n)
mean_sim[i]<-mean(x) #store means of each sample
}
#Save only those simulations where the true value was inside the 95% CI
CIU_sim<-CIU_sim[CIU_sim<100]
CIL_sim<-CIL_sim[CIL_sim>100]
cat((100*(1-(length(CIU_sim)/nSims+length(CIL_sim)/nSims))),"% of the 95% confidence intervals contained the true mean")
#Calculate how many times the observed mean fell within the 95% CI of the original study
mean_sim<-mean_sim[mean_sim>CIL&mean_sim<CIU]
cat("The capture percentage for the plotted study, or the % of values within the observed confidence interval from",CIL,"to",CIU,"is:",100*length(mean_sim)/nSims,"%")
#© Daniel Lakens, 2016.
# This work is licensed under a Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License. https://creativecommons.org/licenses/by-nc-sa/4.0/