v1.1

AgrDataSci · Jun 27, 2023 · cd2f862 · cd2f862
1 parent a5f7e64
commit cd2f862
Show file tree

Hide file tree

Showing 11 changed files with 528 additions and 96 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: gosset
 Type: Package
 Title: Tools for Data Analysis in Experimental Agriculture
-Version: 1.0
+Version: 1.1
 Authors@R: c(person("Kauê", "de Sousa", 
         email = "desousa.kaue@gmail.com", role = c("aut", "cre"),
         comment = c(ORCID = "0000-0002-7571-7845")),
@@ -10,7 +10,9 @@ Authors@R: c(person("Kauê", "de Sousa",
         person("David", "Brown", role = c("aut"),
         comment = c(ORCID = "0000-0003-2859-1618")), 
         person("Jonathan", "Steinke", role = c("aut"),
-        comment = c(ORCID = "0000-0001-5742-702X")))
+        comment = c(ORCID = "0000-0001-5742-702X")),
+        person("Joost", "van Heerwaarden", role = c("aut"),
+        comment = c(ORCID = "0000-0002-4959-3914")))
 URL: https://agrdatasci.github.io/gosset/
 BugReports: https://github.com/agrdatasci/gosset/issues
 Description: Toolkit to analyse experimental agriculture data, 

diff --git a/NAMESPACE b/NAMESPACE
@@ -31,6 +31,7 @@ S3method(rowbind,default)
 S3method(rowbind,list)
 S3method(worth_map,default)
 S3method(worth_map,list)
+export(LR_test)
 export(akaike_weights)
 export(btpermute)
 export(compare)
@@ -117,6 +118,7 @@ importFrom(stats,model.frame)
 importFrom(stats,model.response)
 importFrom(stats,model.weights)
 importFrom(stats,na.omit)
+importFrom(stats,pchisq)
 importFrom(stats,pnorm)
 importFrom(stats,predict)
 importFrom(stats,reorder)

diff --git a/R/LR_test.R b/R/LR_test.R
@@ -0,0 +1,55 @@
+#' Likelihood-ratio test
+#' 
+#' Assesses the goodness of fit of two competing
+#'  statistical models
+#' 
+#' @param x a object of class rankings or grouped_rankings
+#' @param split a vector indicating the splitting rule for the test
+#' @param ... additional arguments passed to methods
+#' @author Joost van Heerwaarden and Kauê de Sousa
+#' @examples 
+#' library("PlackettLuce")
+#' example("beans", package = "PlackettLuce")
+#' G = group(R, rep(seq_len(nrow(beans)), 4))
+#' d = cbind(G, beans)
+#' 
+#' split = ifelse(d$maxTN < 18.7175, T, F)
+#' 
+#' LR_test(G, split)
+#' @importFrom stats pchisq
+#' @importFrom PlackettLuce PlackettLuce
+#' @export
+LR_test = function(x, split, ...) {
+
+  # fit model with all data
+  PL_all = PlackettLuce::PlackettLuce(x)
+
+  # iterate over splits, estimate worth, and store models in list
+  PL_model_list = c()
+  DF_resid_sum = 0 
+  LL_sum = 0 
+
+  # split the data 
+  x_split = split(x, split)
+
+  for(i in 1:length(x_split)){
+    #fit the model
+    PL_model_lev  =  PlackettLuce::PlackettLuce(x_split[[i]])
+    LL_sum = LL_sum + PL_model_lev$loglik
+    DF_resid_sum = DF_resid_sum + PL_model_lev$df.residual
+  }
+
+  DF_delta = PL_all$df.residual - DF_resid_sum
+  Deviance = -2 * (PL_all$loglik - LL_sum)
+
+  # Chisq P value
+  p_val_chisq  =  1 - stats::pchisq(Deviance, DF_delta)
+
+  out  =  data.frame(deviance = Deviance, DF_delta = DF_delta, p_chisq = p_val_chisq)
+
+  class(out) = union("gosset_df", class(out))
+
+  return(out)
+
+}
+
diff --git a/dev/GxE_LRT_demo/.here b/dev/GxE_LRT_demo/.here
diff --git a/dev/GxE_LRT_demo/scripts/0.function.PL_LR_test.R b/dev/GxE_LRT_demo/scripts/0.function.PL_LR_test.R
@@ -0,0 +1,54 @@
+library(PlackettLuce)
+library(ClimMobTools)
+library(gosset)
+
+PL_LR_test<-function(tricot.data,test.factor){
+
+###first create PL ranks for whole trial
+PL.data.all<-rank_numeric(data = tricot.data,
+                        items = "geno",
+                        input = "rank", 
+                        id = "farm", 
+                        ascending = FALSE)
+
+##fit PL model
+PL.model.all<-PlackettLuce(PL.data.all)
+
+###now iterate over environments, estimate worths, and store models in list
+PL.model.list<-c()
+DF.resid.sum=0 ##DF
+LL.sum=0 ##LL
+
+level.list<-unique(tricot.data[, test.factor])
+
+
+for(lev in 1:length(level.list)){
+tricot.data.lev <- tricot.data[tricot.data[, test.factor] == level.list[lev], ]	
+
+PL.data.lev <- rank_numeric(data = tricot.data.lev,
+                            items = "geno",
+                            input = "rank", 
+                            id = "farm", 
+                            ascending = FALSE)
+##fit PL model
+PL.model.lev <- PlackettLuce(PL.data.lev)
+
+LL.sum = LL.sum + PL.model.lev$loglik
+DF.resid.sum = DF.resid.sum + PL.model.lev$df.residual
+
+##write to list
+PL.model.list[[lev]]<-PL.model.lev
+
+}
+
+DF.delta= PL.model.all$df.residual-DF.resid.sum
+Deviance = -2 * (PL.model.all$loglik - LL.sum)
+
+##Chisq P value
+p.val.chisq <- 1 - pchisq(Deviance, DF.delta)
+
+out <- c(Deviance = Deviance ,DF.delta = DF.delta, p.val.chisq = p.val.chisq)
+
+return(out)	
+
+}
diff --git a/dev/GxE_LRT_demo/scripts/0.function.ge.sim.simple.R b/dev/GxE_LRT_demo/scripts/0.function.ge.sim.simple.R
@@ -0,0 +1,71 @@
+library(mvtnorm)
+library(PlackettLuce)
+library(ClimMobTools)
+
+ge.sim.simple<-function(n.env,
+                        n.geno, 
+                        n.farms.env, 
+                        sigma.env, cov.env, 
+                        sigma.plot){
+
+##make mockup worth data with genetic gain, multiple environments, partial overlap for different environments, with SEs and GxY
+
+###for now genrate multivariate normal with some covarance matrix to get multy environment data
+##set up random environental gradients covering each farm
+# n.env=5
+# n.geno=10
+# n.farms.env<-50
+
+# #all sigma are variances
+
+# ##environmental vcov
+# sigma.env=250^2 ##variances
+# cov.env=250^2 ##variances
+# sigma.plot=700^2
+
+npackages = n.farms.env* n.env
+
+vcov.env<-matrix(cov.env, n.env, n.env)
+diag(vcov.env)=sigma.env #covariance matrix   ####
+
+
+mu.env<-rep(0, n.env)
+ge.mat<-rmvnorm(n.geno, mean = mu.env, sigma = vcov.env)
+colnames(ge.mat)<-paste("environment",1:n.env,sep="_")
+rownames(ge.mat)<-1:n.geno
+ge.mat<-data.frame(geno=rownames(ge.mat), ge.mat)
+
+
+ge.mat.long<-reshape(ge.mat,direction="long",idvar="geno", v.names="environment", varying=2:ncol(ge.mat))
+
+colnames(ge.mat.long)[colnames(ge.mat.long)=="environment"]<-"trait"
+colnames(ge.mat.long)[colnames(ge.mat.long)=="time"]<-"environment"
+
+ge.mat.long$geno<-as.factor(ge.mat.long$geno)
+ge.mat.long$geno.environment<-paste(ge.mat.long$geno,ge.mat.long$environment)
+
+####make tricot design and divide farms over environments
+tricot.data<-randomise(npackages = npackages, itemnames =  ge.mat$geno)
+tricot.data<-data.frame(farm=1:nrow(tricot.data),environment=rep(1:n.env,each=ceiling(npackages/n.env)), tricot.data)
+
+##convert to long format and order
+tricot.data<-reshape(tricot.data,direction="long",idvar="farm", v.names="item", varying=3:ncol(tricot.data))
+tricot.data<-tricot.data[order(tricot.data$farm, tricot.data$environment),]
+##change column names
+colnames(tricot.data)[match(c("time","item"),colnames(tricot.data))]<-c("rank","geno")
+##add geno.environment and assign trait.true
+tricot.data$geno.environment<-with(tricot.data,paste(geno, environment))
+tricot.data$trait.true<-ge.mat.long$trait[match(tricot.data$geno.environment, ge.mat.long $geno.environment)]
+#add plot error
+tricot.data$trait.obs<-tricot.data$trait.true+rnorm(nrow(tricot.data),0, sqrt(sigma.plot))
+
+##now change rank for actual observed rank
+tricot.data$rank<-unlist(with(tricot.data,tapply(trait.obs, farm,function(x) rank(-1*x))))
+
+
+return(tricot.data)
+
+
+}
+
+
diff --git a/dev/GxE_LRT_demo/scripts/1.ge.detec.power.analysis.R b/dev/GxE_LRT_demo/scripts/1.ge.detec.power.analysis.R
@@ -0,0 +1,78 @@
+library(here)
+library(car)
+
+#set working directory
+setwd(here())
+
+##read functions
+
+function.list<-list.files("scripts") 
+function.list<-function.list[grep("0.", function.list)]
+function.list<-paste("scripts/",function.list,sep="")
+
+funs<-lapply(function.list,source)
+rm(funs)
+
+
+#####now run simulations with and without GxE and store p values
+
+sim.results.null <- c()
+sim.results.gxe <- c()
+n.sim=10#00  ###. number of simulations, 1000 takes a bit without parallelisation
+
+for(sim in 1:n.sim){
+
+##without gxe
+
+##simulate 
+sim.data.null<-ge.sim.simple(n.env=2,
+                             n.geno=10, 
+                             n.farms.env=100, 
+                             sigma.env=250^2, 
+                             cov.env=250^2, 
+                             sigma.plot=500^2)
+
+##test for gxe
+test.null<-PL_LR_test(sim.data.null, test.factor="environment")
+
+sim.results.null<-rbind(sim.results.null, test.null)
+
+##simulate 
+sim.data.gxe<-ge.sim.simple(n.env=2, 
+                            n.geno=10, 
+                            n.farms.env=100, 
+                            sigma.env=250^2, 
+                            cov.env=150^2, 
+                            sigma.plot=500^2)
+
+##test for gxe
+test.gxe<-PL_LR_test(sim.data.gxe, test.factor="environment")
+
+sim.results.gxe <-rbind(sim.results.gxe, test.gxe)
+
+print(sim)	
+}
+
+sim.results.null<-as.data.frame(sim.results.null)
+sim.results.gxe <-as.data.frame(sim.results.gxe)
+
+h.null <- hist(sim.results.null$p.val.chisq, breaks= 20, plot=FALSE)
+h.null$counts= h.null$counts/sum(h.null$counts)
+
+h.gxe <- hist(sim.results.gxe$p.val.chisq, breaks= 20, plot=FALSE)
+h.gxe$counts= h.gxe$counts/sum(h.gxe$counts)
+
+
+pdf("results/ims/power_hist.pdf", width=20,height=10)
+par(mfrow=c(1,2))
+plot(h.null,xlab="p-value",main="null",col="gray",cex.axis=1.5,cex.lab=1.5)
+plot(h.gxe,xlab="p-value",main="GxE",col="gray",cex.axis=1.5,cex.lab=1.5)
+dev.off()
+
+
+##plot QQ
+pdf("results/ims/power_qq.pdf", width=20,height=10)
+par(mfrow=c(1,2))
+qqPlot(sim.results.null$p.val.chisq, distribution="unif",ylab="observed",main="null", envelope=F)
+qqPlot(sim.results.gxe$p.val.chisq, distribution="unif",ylab="observed",main="GxE", envelope=F)
+dev.off()