# **R scripts for Lecture 4**

In [None]:
# This R environment comes with many helpful analytics packages installed
# It is defined by the kaggle/rstats Docker image: https://github.com/kaggle/docker-rstats
# For example, here's a helpful package to load

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
#This code loads a compiled version of JAGS and rjags from a zip file on Onedrive, and loads rjags. It should only take a few seconds.
#IMPORTANT: Go to the Kaggle Settings (right hand side click on K icon) and enable the Internet option in Settings before running this.
system("wget --no-check-certificate -r 'https://uoe-my.sharepoint.com/:u:/g/personal/dpaulin_ed_ac_uk/EX_-yUc-bIZJhLXHcZxpOj8Ba6dwC15X_MjYoox-xM2KlQ?download=1' -O /kaggle/working/kaggle_JAGS.zip")
system("unzip /kaggle/working/kaggle_JAGS.zip")
system("rm /kaggle/working/kaggle_JAGS.zip")
system("cd /kaggle/working/JAGS-4.3.0")
system("make install")
library(rjags,lib.loc="/kaggle/working")
#If it ran correctly, you should see 
#Loading required package: coda
#Linked to JAGS 4.3.0
#Loaded modules: basemod,bugs

#In case you are still experiencing difficulties with this, please use the following code (this compiles and installs JAGS from the source, it takes 6-7 minutes):
#system("wget https://sourceforge.net/projects/mcmc-jags/files/JAGS/4.x/Source/JAGS-4.3.0.tar.gz -P /kaggle/working")
#system("tar xvfz /kaggle/working/JAGS-4.3.0.tar.gz")
#system("cd /kaggle/working/JAGS-4.3.0")
#system("/kaggle/working/JAGS-4.3.0/configure")
#system("make")
#system("make install")
#install.packages("rjags", lib="/kaggle/working")
#library(rjags,lib.loc="/kaggle/working")

In [None]:
#This code unzips an installation of R-INLA from an online source, and loads INLA
#IMPORTANT: Go to the Kaggle Settings (right hand side) and enable the Internet option before running this.
system("wget --no-check-certificate -r 'https://uoe-my.sharepoint.com/:u:/g/personal/dpaulin_ed_ac_uk/EUNBvDg_EJVFqSZJA3Xz7LsB5cVgqYk0HWWnOp74_Dr28A?download=1' -O /kaggle/working/kaggle_INLA.zip")
system("unzip /kaggle/working/kaggle_INLA.zip")
system("rm /kaggle/working/kaggle_INLA.zip")
library(INLA,lib.loc="/kaggle/working")
#If INLA has been successfully loaded, you should see the following:
#This is INLA_20.03.17 built 2021-01-02 20:27:47 UTC.
#See www.r-inla.org/contact-us for how to get help.
#To enable PARDISO sparse library; see inla.pardiso()

#The following code does the full installation. You can try it if the previous code fails, but this takes longer.
#install.packages("INLA",repos=c(getOption("repos"),INLA="https://inla.r-inla-download.org/R/stable"), dep=TRUE,lib="/kaggle/working")
#library(INLA,lib.loc="/kaggle/working")


# Crabs Data

**Loading the dataset**

In [None]:
#### Read and condition crabs data ####

# You need to enable the Internet in Settings in Kaggle (right hand side menu, click on K on top right corner) before running this
system("wget --no-check-certificate -r 'https://drive.google.com/uc?export=download&id=1F6ff2LIRIDC_foL7EtS9dmoKrZGCiCUV' -O /kaggle/working/crab.txt")
crabs<- read.table("/kaggle/working/crab.txt")

names(crabs) <- c("Obs","Colour","Spine","Weight","Width","Satellites")
colour.map <- c("lt.med","med","dk.med","dk")
crabs$Colour <- as.factor(colour.map[crabs$Colour])
spine.map  <- c("both.gd","one.bad","both.bad")
crabs$Spine <- as.factor(spine.map[crabs$Spine])

**Frequentist Analysis**

In [None]:
#### Frequentist analysis of the crab data ####

# Inference with three different link functions
m1 <- glm(formula = Satellites ~ Width, family=poisson(link=log), 
          data=crabs )
m1.identity <- glm(formula = Satellites ~ Width, 
                   family=poisson(link=identity), data=crabs, start=c(0,1))
m1.sqrt <- glm(formula = Satellites ~ Width, 
               family=poisson(link=sqrt), data=crabs )

# Comparison of the estimated coefficients
round(rbind(coef(m1),coef(m1.identity),coef(m1.sqrt)),2)

# Plots of the prediction
newdata <- data.frame(Width=seq(1,6,0.1))

pred1 <- predict(m1, newdata=newdata, type="response")
pred1.identity <- predict(m1.identity, newdata=newdata, type="response")
pred1.sqrt <- predict(m1.sqrt, newdata=newdata, type="response")

plot(crabs$Width, crabs$Satellites, xlab = "Width", ylab = "Satellites")
lines(newdata$Width, pred1, col="firebrick2", lwd = 3, lty = 1)
lines(newdata$Width, pred1.identity, col="dodgerblue2", lwd = 3, lty = 5)
lines(newdata$Width, pred1.sqrt, col="forestgreen", lwd = 3, lty = 6)
legend("bottomright", c("Log link", "identity link", "sqrt link"), col = c("firebrick2", "dodgerblue2", "forestgreen"), lty = c(1,5,6), lwd = 3)

**Bayesian Analysis - JAGS**

In [None]:
#### Bayesian analysis for the crabs data ####

require(rjags)

# Data
n <- length(crabs$Obs)
crabs.data <- list(n=n,Satellites=crabs$Satellites,Width=crabs$Width)

# Inits
crabs.inits <- list(list(beta0=-1,beta1=-1),
                    list(beta0=0, beta1=1),
                    list(beta0=3, beta1=2))

# Model
crabs.model <-   "model {
  # Hyperparameters
  beta.mu.0   <- 0         
  beta.tau.0  <- 0.01

  # prior
  beta0 ~ dnorm(beta.mu.0,beta.tau.0)
  beta1 ~ dnorm(beta.mu.0,beta.tau.0)

 #Likelihood
  for(i in 1:n) {
      # Note: link function on LHS of fn assignment
      log(mu[i])  <- beta0+beta1*(Width[i]-mean(Width[]))
      Satellites[i] ~ dpois(mu[i])
    } 
  }"


# Run JAGS to the completion of the "adaption" stage 
results.crabs.A <- jags.model(file=textConnection(crabs.model), 
                              data=crabs.data, inits=crabs.inits, 
                              n.chains=3)

# Burn-in of 5000 iterations
update(results.crabs.A, n.iter=5000)

# Longer run for making inferences, assuming chains have converged
results.crabs.B <- coda.samples(results.crabs.A, 
                                variable.names=c("beta0","beta1"),n.iter=10000)

# Summary 
summary(results.crabs.B)

In [None]:
# Check the chains
  # Trace plots and density
plot(results.crabs.B)  


In [None]:
  # Brooks-Gelman-Rubin statistic (want a value near 1)
gelman.plot(results.crabs.B)
 

In [None]:
 # Efective sample size
effectiveSize(results.crabs.B[[1]][,"beta0"])   
effectiveSize(results.crabs.B[[1]][,"beta1"]) 

In [None]:
  # Autocorrelation plots
autocorr.plot(results.crabs.B[[1]][,"beta0"],main="Intercept")
autocorr.plot(results.crabs.B[[1]][,"beta1"],main="Slope")

In [None]:
#### Bayesian analysis of the crab data with categorical covariates ####

# Data
n <- length(crabs$Obs)
crabs.mult.data <- list(n=n, Satellites=crabs$Satellites,
                        Width=crabs$Width, 
                        lt.med.Ind=crabs$Colour=="lt.med", 
                        med.Ind=crabs$Colour=="med", 
                        dk.med.Ind=crabs$Colour=="dk.med")

# Model
crabs.mult.model <-   "model {
  #Hyperparameters 
  beta.mu.0   <- 0
  beta.tau.0  <- 0.001
  # prior
  beta0       ~ dnorm(beta.mu.0,beta.tau.0)
  beta.width  ~ dnorm(beta.mu.0,beta.tau.0)
  beta.lt.med ~ dnorm(beta.mu.0,beta.tau.0)
  beta.med    ~ dnorm(beta.mu.0,beta.tau.0)
  beta.dk.med ~ dnorm(beta.mu.0,beta.tau.0)
  #Likelihood
  for(i in 1:n) {
    log(mu[i])  <- beta0+beta.width*(Width[i]-mean(Width[])) +
                beta.lt.med*lt.med.Ind[i] + beta.med*med.Ind[i] +
                beta.dk.med*dk.med.Ind[i]
    Satellites[i] ~ dpois(mu[i])
    }
  }"


# Run JAGS to the completion of the "adaption" stage 
results.crabs.mult.A <- jags.model(file=textConnection(crabs.mult.model), 
                                   data=crabs.mult.data,
                                   n.chains=3)

# Burn-in of 5000 iterations
update(results.crabs.mult.A, n.iter=5000)

# Longer run for making inferences, assuming chains have converged
results.crabs.mult.B <- coda.samples(results.crabs.mult.A, 
                                     variable.names=c("beta0","beta.width","beta.lt.med",
                                                      "beta.med","beta.dk.med"),
                                     n.iter=10000)

# Summary 
summary(results.crabs.mult.B)

In [None]:
# Plot of the approximate expected value according to colour
x <- seq(1,6,0.1)
m.crabs <- summary(results.crabs.mult.B)$statistics[,"Mean"]

plot(crabs$Width, crabs$Satellites, col="black", xlab="Width", ylab="Satellites")
legend("topleft", c("Dark", "Dark med", "Med", "Light med"), col = c("black", "orangered4", "orangered", "orange"), lty = c(1,5,6,3), lwd = 3)
lines(x, exp(m.crabs["beta0"] + m.crabs["beta.width"]*(x-mean(crabs$Width))), lwd=3)
lines(x, exp(m.crabs["beta0"] + m.crabs["beta.width"]*(x-mean(crabs$Width) + m.crabs["beta.dk.med"])), col="orangered4", lwd=3, lty=5)
lines(x, exp(m.crabs["beta0"] + m.crabs["beta.width"]*(x-mean(crabs$Width) + m.crabs["beta.med"])), col="orangered", lwd=3, lty=6)
lines(x, exp(m.crabs["beta0"] + m.crabs["beta.width"]*(x-mean(crabs$Width) + m.crabs["beta.lt.med"])), col="orange", lwd=3, lty=3)


In [None]:
#### Calculations of posterior and predictive probabilities ####

#library(runjags)    # Library to combine the MCMC chains using combine.mcmc function
#fit.crabs <- as.data.frame(combine.mcmc(results.crabs.mult.B))
#otherwise:
fit.crabs <- do.call(rbind.data.frame,results.crabs.mult.B)

# Pr(beta_med > 0 | y)
mean(fit.crabs$beta.med>0)

# Prediction for particular values of the covariate
  # mu* | y, Width=3, Colour=Med
mustar <- exp(fit.crabs$beta0 + 
                fit.crabs$beta.width*(3-mean(crabs$Width)) + 
                fit.crabs$beta.med)
  # y* | y, Width=3, Colour=Med
ystar <- rpois(length(mustar), mustar)

barplot(table(ystar) )
mean(ystar>=3)   # Pr(y* > 3 | y, Width=3, Colour=Med)

**Bayesian Analysis - INLA**

In [None]:
# In INLA, the log link function is the only available option for Poisson likelihood
# This is the default link, so it does not have to be specified explicitly

#Creating centered Width covariate
crabs$Width.ctr=crabs$Width-mean(crabs$Width)

#This list encodes the means and precisions of the Gaussian prior for the regression coefficients beta
prior.beta <- list(mean.intercept = 0, prec.intercept = 0.01,
                    mean = 0, prec = 0.01)

#Fitting the model in INLA with centered width covariate
#control.fixed=prior.beta sets the prior for the regression coefficients
#By setting control.predictor = list(compute = TRUE), inla will compute the fitted values (i.e. means mu_i of the observations)
m1.I <- inla(formula = Satellites ~ Width.ctr, family="poisson", 
          data=crabs, control.fixed=prior.beta,control.predictor = list(compute = TRUE))
summary(m1.I)



The summary statistics are essentially identical to those obtained with JAGS.

In [None]:
plot(m1.I$marginals.fixed$'(Intercept)',type='l',xlab="x",ylab="Density",main="Posterior density of beta0 (intercept)",xlim=c(0.8,1.2))
#This is equivalent to
#plot(m.mtcars.I$marginals.fixed[[1]],type='l',xlab="x",ylab="Density",main="Posterior density of beta0 (intercept)",xlim=c(0.8,1.2))

In [None]:
plot(m1.I$marginals.fixed$Width.ctr,type='l',xlab="x",ylab="Density",main="Posterior density of beta1",xlim=c(0.3,0.9))

The marginals also look identical to those obtained with JAGS.

Now we are going to plot the fitted values (the posterior means of $\mu_i=E(y_i|\beta)$), and the data.

In [None]:
fittedvaluesm=m1.I$summary.fitted.values$mean
plot(crabs$Width, crabs$Satellites, xlab = "Width", ylab = "Satellites",main="Posterior mean of fitted val. of Satellites vs Width-Bayesian Pois. reg.")
lines(crabs$Width[order(crabs$Width)], fittedvaluesm[order(crabs$Width)], col="forestgreen", lwd = 3, lty = 6)
#The width are not in increasing order in the dataset
#For plotting, we can put them into increasing order by order(crabs$Width)

In [None]:
#### Bayesian analysis of the crab data with categorical covariates - INLA ####

#In JAGS, we needed to manually create $K-1$ indicator covariates in the dataset for a categorical variable with $K$ possible values
#In INLA, this is automatically done by the system when we use categorical variables in the regression formula

#We use the same prior for the regression coefficients as previously. This also applies to the new regression coefficients.
prior.beta <- list(mean.intercept = 0, prec.intercept = 0.01,
                    mean = 0, prec = 0.01)

#Fitting the model in INLA with centered width covariate, and Colour categorical covariate
#control.fixed=prior.beta sets the prior for the regression coefficients
#By setting control.predictor = list(compute = TRUE), inla will compute the fitted values (i.e. means mu_i of the observations)
m2.I <- inla(formula = Satellites ~ Width.ctr+Colour, family="poisson", 
          data=crabs, control.fixed=prior.beta,control.predictor = list(compute = TRUE))
summary(m2.I)

In [None]:
The summary statistics are essentially identical to what we got from JAGS.

Now we are going to plot the posterior mean of $\mu$, $E[\mu]$, as a function of the width, for a set of equally spaced widths, separately for each 4 colours.

This can be done by including new rows in the dataset with the response variable (Satellites) set to NA.



In [None]:
head(crabs)

In [None]:
#We create a sequence of widths at which we will compute the posterior means of the fitted values

x=seq(1,6,0.1)
lx=length(x)

#We create new rows in the dataset for these widths, for all 4 possible colours, with response variable Satellites set to NA
newdata <- rbind(data.frame(Width=x,Colour=as.factor("dk"),Satellites=NA), data.frame(Width=x,Colour=as.factor("dk.med"),Satellites=NA),
                 data.frame(Width=x,Colour=as.factor("med"),Satellites=NA),data.frame(Width=x,Colour=as.factor("lt.med"),Satellites=NA))


#We join the new rows with the relevant columns from the original dataframe, and include the centered width covariate
newdata=rbind(newdata,data.frame(Satellites=crabs$Satellites,Width=crabs$Width,Colour=crabs$Colour))
newdata$Width.ctr=newdata$Width-mean(crabs$Width)

#We fit the model in INLA. Note that we need to set control.predictor = list(compute = TRUE, link=1),
# where the link=1 tells INLA to compute the marginals and means of the fitted values mu_i for the rows where the response is set to NA
#normally, when the response is set to NA, the statistics in inla.model$summary.fitted.values will refer to the linear predictor eta_i, and not mu_i.
#So whenever we use a model with link function that is not the identity, and we want to compute the posterior marginal and mean of mu_i and not eta_i,
#we need to set link=1 in control.predictor

m3.I <- inla(formula = Satellites ~ Width.ctr+Colour, family="poisson", 
          data=newdata, control.fixed=prior.beta,control.predictor = list(compute = TRUE, link=1), control.compute=list(config=TRUE))
summary(m3.I)

In [None]:

#We extract the posterior mean of the fitted values by m3.I$summary.fitted.values$mean, and plot them
#Rows 1:lx contain the results for colour Light med, rows lx+1:2*lx contain the results for colour Med, etc.

fittedvaluesm=m3.I$summary.fitted.values$mean

plot(crabs$Width, crabs$Satellites, col="black", xlab="Width", ylab="Satellites")
legend("topleft", c("Dark", "Dark med", "Med", "Light med"), col = c("black", "orangered4", "orangered", "orange"),
       lty = c(1,5,6,3), lwd = 3,bty = "n")
lines(x, fittedvaluesm[1:lx], col="black",lwd=3)
lines(x, fittedvaluesm[(lx+1):(2*lx)], col="orangered4", lwd=3, lty=5)
lines(x, fittedvaluesm[(2*lx+1):(3*lx)], col="orangered", lwd=3, lty=6)
lines(x, fittedvaluesm[(3*lx+1):(4*lx)], col="orange", lwd=3, lty=3)

This is the same as what we got by JAGS.

In [None]:
nbsamp=100000
#We have already included the Width=3, Colour = Med in the newdata dataframe in row 51*2+21=123
#We only include this single linear predictor and the regression coefficient for colour med 
#in the output of inla.posterior.sample by including selection=list(Predictor=123,Colourmed=1)

crab.samples=inla.posterior.sample(n=nbsamp, result=m3.I,selection=list(Predictor=123,Colourmed=1))


In [None]:
beta.med.samples=inla.posterior.sample.eval(function(...) {Colourmed},
crab.samples)
mean(beta.med.samples>0)

In [None]:
predictor.samples=inla.posterior.sample.eval(function(...) {Predictor},
crab.samples)
post.pred.samples=rpois(n=nbsamp, lambda=exp(predictor.samples))

In [None]:
library(repr)# It changes image size in Kaggle
options(repr.plot.width=10, repr.plot.height = 4)
barplot(table(post.pred.samples))

In [None]:
mean(post.pred.samples>=3)

We got the same results as with JAGS.

# Beetles Data

**Loading the dataset**

In [None]:
#### Introduction of the beatles data ####
n.exposed <- c(59, 60, 62, 56, 63, 59, 62, 60)
CS2.level <- c(1.6907, 1.7242, 1.7552, 1.7842, 1.8113, 
               1.8369, 1.8610, 1.8839)
y.dead    <- c(6, 13, 18, 28, 52, 53, 61, 60)
beetles <- data.frame(n.exposed, CS2.level, y.dead)

**Frequentist Analysis**

In [None]:
#### Frequentist analysis for the beetles data ####

# Inference with three different link functions
m.logit   <- glm(cbind(y.dead, n.exposed-y.dead) ~ CS2.level, 
                 family=binomial(link=logit),   data = beetles)
m.cloglog <- glm(cbind(y.dead, n.exposed-y.dead) ~ CS2.level, 
                 family=binomial(link=cloglog), data = beetles)
m.probit  <- glm(cbind(y.dead, n.exposed-y.dead) ~ CS2.level, 
                 family=binomial(link=probit),  data = beetles)  

# Comparison of the estimated coefficients
print(round(rbind(coef(m.logit),coef(m.cloglog),coef(m.probit)),2))

# Plots of the prediction
newdata <- data.frame(CS2.level=seq(1.69, 1.89, 0.01))

pred.logit <- predict(m.logit, newdata=newdata, type="response"  )
pred.cloglog <- predict(m.cloglog, newdata=newdata, type="response"  )
pred.probit <- predict(m.probit, newdata=newdata, type="response"  )

plot(beetles$CS2.level, beetles$y.dead/beetles$n.exposed, xlab = "CS_2 level", ylab = "Fraction dead")
lines(newdata$CS2.level, pred.logit, col="firebrick2", lwd = 3)
lines(newdata$CS2.level, pred.cloglog, col="dodgerblue2", lwd = 3, lty=5)
lines(newdata$CS2.level, pred.probit, col="forestgreen", lwd = 3, lty=6)
legend("bottomright", c("logit link", "c-log-log link", "probit link"), col = c("firebrick2", "dodgerblue2", "forestgreen"), lty = c(1,5,6), lwd = 3)

**Bayesian Analysis - JAGS**

In [None]:
#### Bayesian analysis of the beetles data with JAGS####

require(rjags)

# Data
n <- length(beetles$n.exposed)
beetles.data <- list(n=n,n.exposed=beetles$n.exposed,
                     y.dead=beetles$y.dead,CS2.level=beetles$CS2.level)


# Inits
beetles.inits <- function(){
  beta0 <- rnorm(1,0,1)
  beta1 <- rnorm(1,0,1)
  return( list(beta0=beta0, beta1=beta1) )
}

# Model
beetles.model <- "model {
#Hyperparameters 
beta.mu.0   <- 0
beta.tau.0  <- 0.0001
# prior
beta0 ~ dnorm(beta.mu.0,beta.tau.0)
beta1 ~ dnorm(beta.mu.0,beta.tau.0)
#Likelihood
for(i in 1:n) {
   logit(mu[i])  <- beta0+beta1*(CS2.level[i]-mean(CS2.level[]))
  y.dead[i] ~ dbin(mu[i],n.exposed[i])
    }
}"

# Run JAGS to the completion of the "adaption" stage
results.beetles.A <- jags.model(file=textConnection(beetles.model),
                                data=beetles.data, inits=beetles.inits, n.chains=3)

# Burn-in of 5000 iterations
update(results.beetles.A, n.iter=5000)

# Longer run for making inferences, assuming chains have converged
results.beetles.B <- coda.samples(results.beetles.A,
                                  variable.names=c("beta0","beta1"),n.iter=10000)

# Check the chains
  # Trace plots and density
plot(results.beetles.B)  
  # Brooks-Gelman-Rubin statistic (want a value near 1)
gelman.plot(results.beetles.B)
  # Efective sample size
effectiveSize(results.beetles.B[[1]][,"beta0"])   
effectiveSize(results.beetles.B[[1]][,"beta1"]) 
  # Autocorrelation plots
autocorr.plot(results.beetles.B[[1]][,"beta0"],main="Intercept")
autocorr.plot(results.beetles.B[[1]][,"beta1"],main="Slope")


# Summary 
summary(results.beetles.B)

**Bayesian Analysis - INLA**

In [None]:
#### Bayesian analysis for the beetles data with INLA####

#We create centered covariates for CS2.level
beetles$CS2.level.ctr=beetles$CS2.level-mean(beetles$CS2.level);


#This list encodes the means and precisions of the Gaussian prior for the regression coefficients beta
#This is the same prior that we used for JAGS
prior.beta <- list(mean.intercept = 0, prec.intercept = 0.0001,
                    mean = 0, prec = 0.0001)


# Inference with three different link functions
m.I.logit   <- inla(y.dead ~ CS2.level.ctr, Ntrials=n.exposed,
                 family="binomial", control.family=list(link="logit"),   control.fixed=prior.beta, data = beetles)
summary(m.I.logit)



m.I.cloglog   <- inla(y.dead ~ CS2.level.ctr, Ntrials=n.exposed,
                 family="binomial", control.family=list(link="cloglog"),   control.fixed=prior.beta, data = beetles)

summary(m.I.cloglog)


m.I.probit   <- inla(y.dead ~ CS2.level.ctr, Ntrials=n.exposed,
                 family="binomial", control.family=list(link="probit"),   control.fixed=prior.beta, data = beetles)

summary(m.I.probit)

In [None]:
# Plots of the prediction
x=seq(1.69, 1.89, 0.01)
lx=length(x)

#We create a new dataframe that also includes the new positions where we want to compute the posterior mean of mu_i
#The response variable is set as NA in these rows
#The number of exposed is set to 1 in the new rows, as we are interested in the probabilies of death at a given exposure level, which can be then read from the posterior means of mu_i

newdata <- data.frame(CS2.level=x,n.exposed=1,y.dead=NA)
newdata$CS2.level.ctr=newdata$CS2.level-mean(beetles$CS2.level)
beetles.pred=rbind(newdata, beetles)

#We need to include link=1 in control.predictor to tell inla to apply the link function when computing the fitted values, i.e. compute the posterior mean of mu_i

m.I.logit.pred=inla(y.dead ~ CS2.level.ctr, Ntrials=n.exposed,
                 family="binomial", control.family=list(link="logit"),   data = beetles.pred, control.fixed=prior.beta, control.predictor=list(compute=TRUE,link=1))

m.I.cloglog.pred=inla(y.dead ~ CS2.level.ctr, Ntrials=n.exposed,
                 family="binomial", control.family=list(link="cloglog"),   data = beetles.pred, control.fixed=prior.beta, control.predictor=list(compute=TRUE,link=1))

m.I.probit.pred=inla(y.dead ~ CS2.level.ctr, Ntrials=n.exposed,
                 family="binomial", control.family=list(link="probit"),   data = beetles.pred, control.fixed=prior.beta, control.predictor=list(compute=TRUE,link=1))

In [None]:
#We plot the posterior means of the fitted values for the 3 models

plot(beetles$CS2.level, beetles$y.dead/beetles$n.exposed, xlab = "CS_2 level", ylab = "Fraction dead",main="Posterior means of fitted values in INLA")
lines(x, m.I.logit.pred$summary.fitted.values$mean[1:lx], col="firebrick2", lwd = 3)
lines(x, m.I.cloglog.pred$summary.fitted.values$mean[1:lx], col="dodgerblue2", lwd = 3, lty=5)
lines(x, m.I.probit.pred$summary.fitted.values$mean[1:lx], col="forestgreen", lwd = 3, lty=6)
legend("topleft", c("logit link", "c-log-log link", "probit link"), col = c("firebrick2", "dodgerblue2", "forestgreen"), lty = c(1,5,6), lwd = 3)

Cloglog seems to be doing the best job here. The log marginal likelihood is also the largest for that model,
indicating that it fits the data the best among the models corresponding to the 3 different link functions.