Skip to content

Commit

Permalink
v1.1
Browse files Browse the repository at this point in the history
  • Loading branch information
kauedesousa committed Jun 27, 2023
1 parent a5f7e64 commit cd2f862
Show file tree
Hide file tree
Showing 11 changed files with 528 additions and 96 deletions.
6 changes: 4 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: gosset
Type: Package
Title: Tools for Data Analysis in Experimental Agriculture
Version: 1.0
Version: 1.1
Authors@R: c(person("Kauê", "de Sousa",
email = "desousa.kaue@gmail.com", role = c("aut", "cre"),
comment = c(ORCID = "0000-0002-7571-7845")),
Expand All @@ -10,7 +10,9 @@ Authors@R: c(person("Kauê", "de Sousa",
person("David", "Brown", role = c("aut"),
comment = c(ORCID = "0000-0003-2859-1618")),
person("Jonathan", "Steinke", role = c("aut"),
comment = c(ORCID = "0000-0001-5742-702X")))
comment = c(ORCID = "0000-0001-5742-702X")),
person("Joost", "van Heerwaarden", role = c("aut"),
comment = c(ORCID = "0000-0002-4959-3914")))
URL: https://agrdatasci.github.io/gosset/
BugReports: https://github.com/agrdatasci/gosset/issues
Description: Toolkit to analyse experimental agriculture data,
Expand Down
2 changes: 2 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ S3method(rowbind,default)
S3method(rowbind,list)
S3method(worth_map,default)
S3method(worth_map,list)
export(LR_test)
export(akaike_weights)
export(btpermute)
export(compare)
Expand Down Expand Up @@ -117,6 +118,7 @@ importFrom(stats,model.frame)
importFrom(stats,model.response)
importFrom(stats,model.weights)
importFrom(stats,na.omit)
importFrom(stats,pchisq)
importFrom(stats,pnorm)
importFrom(stats,predict)
importFrom(stats,reorder)
Expand Down
55 changes: 55 additions & 0 deletions R/LR_test.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
#' Likelihood-ratio test
#'
#' Assesses the goodness of fit of two competing
#' statistical models
#'
#' @param x a object of class rankings or grouped_rankings
#' @param split a vector indicating the splitting rule for the test
#' @param ... additional arguments passed to methods
#' @author Joost van Heerwaarden and Kauê de Sousa
#' @examples
#' library("PlackettLuce")
#' example("beans", package = "PlackettLuce")
#' G = group(R, rep(seq_len(nrow(beans)), 4))
#' d = cbind(G, beans)
#'
#' split = ifelse(d$maxTN < 18.7175, T, F)
#'
#' LR_test(G, split)
#' @importFrom stats pchisq
#' @importFrom PlackettLuce PlackettLuce
#' @export
LR_test = function(x, split, ...) {

# fit model with all data
PL_all = PlackettLuce::PlackettLuce(x)

# iterate over splits, estimate worth, and store models in list
PL_model_list = c()
DF_resid_sum = 0
LL_sum = 0

# split the data
x_split = split(x, split)

for(i in 1:length(x_split)){
#fit the model
PL_model_lev = PlackettLuce::PlackettLuce(x_split[[i]])
LL_sum = LL_sum + PL_model_lev$loglik
DF_resid_sum = DF_resid_sum + PL_model_lev$df.residual
}

DF_delta = PL_all$df.residual - DF_resid_sum
Deviance = -2 * (PL_all$loglik - LL_sum)

# Chisq P value
p_val_chisq = 1 - stats::pchisq(Deviance, DF_delta)

out = data.frame(deviance = Deviance, DF_delta = DF_delta, p_chisq = p_val_chisq)

class(out) = union("gosset_df", class(out))

return(out)

}

Empty file added dev/GxE_LRT_demo/.here
Empty file.
54 changes: 54 additions & 0 deletions dev/GxE_LRT_demo/scripts/0.function.PL_LR_test.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
library(PlackettLuce)
library(ClimMobTools)
library(gosset)

PL_LR_test<-function(tricot.data,test.factor){

###first create PL ranks for whole trial
PL.data.all<-rank_numeric(data = tricot.data,
items = "geno",
input = "rank",
id = "farm",
ascending = FALSE)

##fit PL model
PL.model.all<-PlackettLuce(PL.data.all)

###now iterate over environments, estimate worths, and store models in list
PL.model.list<-c()
DF.resid.sum=0 ##DF
LL.sum=0 ##LL

level.list<-unique(tricot.data[, test.factor])


for(lev in 1:length(level.list)){
tricot.data.lev <- tricot.data[tricot.data[, test.factor] == level.list[lev], ]

PL.data.lev <- rank_numeric(data = tricot.data.lev,
items = "geno",
input = "rank",
id = "farm",
ascending = FALSE)
##fit PL model
PL.model.lev <- PlackettLuce(PL.data.lev)

LL.sum = LL.sum + PL.model.lev$loglik
DF.resid.sum = DF.resid.sum + PL.model.lev$df.residual

##write to list
PL.model.list[[lev]]<-PL.model.lev

}

DF.delta= PL.model.all$df.residual-DF.resid.sum
Deviance = -2 * (PL.model.all$loglik - LL.sum)

##Chisq P value
p.val.chisq <- 1 - pchisq(Deviance, DF.delta)

out <- c(Deviance = Deviance ,DF.delta = DF.delta, p.val.chisq = p.val.chisq)

return(out)

}
71 changes: 71 additions & 0 deletions dev/GxE_LRT_demo/scripts/0.function.ge.sim.simple.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
library(mvtnorm)
library(PlackettLuce)
library(ClimMobTools)

ge.sim.simple<-function(n.env,
n.geno,
n.farms.env,
sigma.env, cov.env,
sigma.plot){

##make mockup worth data with genetic gain, multiple environments, partial overlap for different environments, with SEs and GxY

###for now genrate multivariate normal with some covarance matrix to get multy environment data
##set up random environental gradients covering each farm
# n.env=5
# n.geno=10
# n.farms.env<-50

# #all sigma are variances

# ##environmental vcov
# sigma.env=250^2 ##variances
# cov.env=250^2 ##variances
# sigma.plot=700^2

npackages = n.farms.env* n.env

vcov.env<-matrix(cov.env, n.env, n.env)
diag(vcov.env)=sigma.env #covariance matrix ####


mu.env<-rep(0, n.env)
ge.mat<-rmvnorm(n.geno, mean = mu.env, sigma = vcov.env)
colnames(ge.mat)<-paste("environment",1:n.env,sep="_")
rownames(ge.mat)<-1:n.geno
ge.mat<-data.frame(geno=rownames(ge.mat), ge.mat)


ge.mat.long<-reshape(ge.mat,direction="long",idvar="geno", v.names="environment", varying=2:ncol(ge.mat))

colnames(ge.mat.long)[colnames(ge.mat.long)=="environment"]<-"trait"
colnames(ge.mat.long)[colnames(ge.mat.long)=="time"]<-"environment"

ge.mat.long$geno<-as.factor(ge.mat.long$geno)
ge.mat.long$geno.environment<-paste(ge.mat.long$geno,ge.mat.long$environment)

####make tricot design and divide farms over environments
tricot.data<-randomise(npackages = npackages, itemnames = ge.mat$geno)
tricot.data<-data.frame(farm=1:nrow(tricot.data),environment=rep(1:n.env,each=ceiling(npackages/n.env)), tricot.data)

##convert to long format and order
tricot.data<-reshape(tricot.data,direction="long",idvar="farm", v.names="item", varying=3:ncol(tricot.data))
tricot.data<-tricot.data[order(tricot.data$farm, tricot.data$environment),]
##change column names
colnames(tricot.data)[match(c("time","item"),colnames(tricot.data))]<-c("rank","geno")
##add geno.environment and assign trait.true
tricot.data$geno.environment<-with(tricot.data,paste(geno, environment))
tricot.data$trait.true<-ge.mat.long$trait[match(tricot.data$geno.environment, ge.mat.long $geno.environment)]
#add plot error
tricot.data$trait.obs<-tricot.data$trait.true+rnorm(nrow(tricot.data),0, sqrt(sigma.plot))

##now change rank for actual observed rank
tricot.data$rank<-unlist(with(tricot.data,tapply(trait.obs, farm,function(x) rank(-1*x))))


return(tricot.data)


}


78 changes: 78 additions & 0 deletions dev/GxE_LRT_demo/scripts/1.ge.detec.power.analysis.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
library(here)
library(car)

#set working directory
setwd(here())

##read functions

function.list<-list.files("scripts")
function.list<-function.list[grep("0.", function.list)]
function.list<-paste("scripts/",function.list,sep="")

funs<-lapply(function.list,source)
rm(funs)


#####now run simulations with and without GxE and store p values

sim.results.null <- c()
sim.results.gxe <- c()
n.sim=10#00 ###. number of simulations, 1000 takes a bit without parallelisation

for(sim in 1:n.sim){

##without gxe

##simulate
sim.data.null<-ge.sim.simple(n.env=2,
n.geno=10,
n.farms.env=100,
sigma.env=250^2,
cov.env=250^2,
sigma.plot=500^2)

##test for gxe
test.null<-PL_LR_test(sim.data.null, test.factor="environment")

sim.results.null<-rbind(sim.results.null, test.null)

##simulate
sim.data.gxe<-ge.sim.simple(n.env=2,
n.geno=10,
n.farms.env=100,
sigma.env=250^2,
cov.env=150^2,
sigma.plot=500^2)

##test for gxe
test.gxe<-PL_LR_test(sim.data.gxe, test.factor="environment")

sim.results.gxe <-rbind(sim.results.gxe, test.gxe)

print(sim)
}

sim.results.null<-as.data.frame(sim.results.null)
sim.results.gxe <-as.data.frame(sim.results.gxe)

h.null <- hist(sim.results.null$p.val.chisq, breaks= 20, plot=FALSE)
h.null$counts= h.null$counts/sum(h.null$counts)

h.gxe <- hist(sim.results.gxe$p.val.chisq, breaks= 20, plot=FALSE)
h.gxe$counts= h.gxe$counts/sum(h.gxe$counts)


pdf("results/ims/power_hist.pdf", width=20,height=10)
par(mfrow=c(1,2))
plot(h.null,xlab="p-value",main="null",col="gray",cex.axis=1.5,cex.lab=1.5)
plot(h.gxe,xlab="p-value",main="GxE",col="gray",cex.axis=1.5,cex.lab=1.5)
dev.off()


##plot QQ
pdf("results/ims/power_qq.pdf", width=20,height=10)
par(mfrow=c(1,2))
qqPlot(sim.results.null$p.val.chisq, distribution="unif",ylab="observed",main="null", envelope=F)
qqPlot(sim.results.gxe$p.val.chisq, distribution="unif",ylab="observed",main="GxE", envelope=F)
dev.off()
Loading

0 comments on commit cd2f862

Please sign in to comment.