Permalink
Browse files

Merge pull request #38 from brookslogan/calculate-weights

Add calculate-weights.R and outputted weight csv's
  • Loading branch information...
nickreich committed Oct 13, 2017
2 parents c06c36f + 2fe846f commit 1d1050d75bdd11eea06247c6d23934588cf92c61
View
@@ -0,0 +1,210 @@
library("pipeR")
library("epiforecast")
## Set up parallel:
options(mc.cores=parallel::detectCores()-1L)
## Specify target types:
target.types.list = list(
"seasonal"=c("Season onset", "Season peak week", "Season peak percentage"),
"weekly"=paste0(1:4," wk ahead")
)
## Specify portions of cv_apply indexer lists corresponding to Location, Target:
weighting.scheme.partial.indexer.lists = list(
"constant" = list(all=NULL, all=NULL),
"target-type-based" = list(all=NULL, subsets=target.types.list),
"target-based" = list(all=NULL, each=NULL),
"target-and-region-based" = list(each=NULL, each=NULL)
)
## Read in & tweak scores.csv
component.score.df = read.csv("../scores/scores.csv", check.names=FALSE, stringsAsFactors=FALSE) %>>%
tibble::as_tibble() %>>%
dplyr::mutate(Score = dplyr::if_else(is.nan(Score), -Inf, Score)) %>>%
dplyr::mutate(Metric = "some log score") %>>%
{.}
## Perform some checks:
if (any(is.na(component.score.df[["Score"]]))) {
stop ("No NA's are allowed for the component")
}
multiple.entry.df =
component.score.df %>>%
dplyr::group_by(Season, `Model Week`, Location, Target, Metric, Model) %>>%
dplyr::filter(n()!=1L) %>>%
dplyr::mutate(`Entry Count`=n()) %>>%
dplyr::ungroup() %>>%
{.}
if (nrow(multiple.entry.df) != 0L) {
stop ("There should not be multiple Score's for the same Season, `Model Week`, Location, Target, Metric, and Model.")
}
## Cast to array, introducing NA's for missing entries in Cartesian product:
component.score.array =
component.score.df %>>%
reshape2::acast(Season ~ `Model Week` ~ Location ~ Target ~ Metric ~ Model, value.var="Score") %>>%
{names(dimnames(.)) <- c("Season", "Model Week", "Location", "Target", "Metric", "Model"); .}
## Replace NA's corresponding to incomplete sets of forecasts with -Inf's:
n.models = length(dimnames(component.score.array)[["Model"]])
na.score.counts = apply(is.na(component.score.array), 1:5, sum)
incomplete.forecast.set.flags = ! na.score.counts %in% c(0L, n.models)
if (any(incomplete.forecast.set.flags)) {
warning("Some models have incomplete sets of forecasts; assigning them scores of -Inf for those weeks.")
incomplete.forecast.score.flags =
rep(incomplete.forecast.set.flags, n.models) &
is.na(component.score.array)
dim(incomplete.forecast.score.flags) <- dim(component.score.array)
dimnames(incomplete.forecast.score.flags) <- dimnames(component.score.array)
print(apply(incomplete.forecast.score.flags, 6L, sum))
component.score.array[incomplete.forecast.score.flags] <- -Inf
}
## All NA's should now correspond to missing model week 73 scores for seasons
## the don't include a model week 73.
neginf.score.counts = apply(!is.na(component.score.array) &
component.score.array == -Inf, 1:5, sum)
neginf.forecast.set.flags = neginf.score.counts == n.models
if (any(neginf.forecast.set.flags)) {
apply(neginf.forecast.set.flags, 2L, sum)
stop ("Either there is some instance where all models received a log score of -Inf, or there was a bug in preparing the component score array.")
}
## na.score.countsp = apply(is.na(component.score.array), 1:5, sum)
## na.forecast.set.flagsp = na.score.countsp != 0L
## apply(na.forecast.set.flagsp, 2L, sum)
## Indexer lists for prospective forecasts:
weighting.scheme.prospective.indexer.lists =
weighting.scheme.partial.indexer.lists %>>%
lapply(function(partial.indexer.list) {
c(list(all=NULL, all=NULL), # Season, Model Week
partial.indexer.list, # Location, Target
list(all=NULL), # Metric
list(all=NULL) # Model should always be all=NULL
)
})
## Indexer lists for CV forecasts:
weighting.scheme.cv.indexer.lists =
weighting.scheme.partial.indexer.lists %>>%
lapply(function(partial.indexer.list) {
c(list(loo=NULL, all=NULL), # Season, Model Week
partial.indexer.list, # Location, Target
list(all=NULL), # Metric
list(all=NULL) # Model should always be all=NULL
)
})
generate_indexer_list_weights = function(component.score.array, indexer.list) {
component.score.array %>>%
cv_apply(
indexer.list,
function(train, test) {
if (!identical(dimnames(train)[[5L]], "some log score")) {
stop ('Weighting routine only supports optimizing for "some log score".')
}
instance.method.score.mat =
R.utils::wrap(train, list(1:5,6L))
na.counts = rowSums(is.na(instance.method.score.mat))
if (any(! na.counts %in% c(0L, ncol(instance.method.score.mat)))) {
stop ("NA appeared in probability matrix, but not from nonexistent EW53.")
}
instance.method.score.mat <- instance.method.score.mat[na.counts==0L,,drop=FALSE]
neginf.counts = rowSums(instance.method.score.mat==-Inf)
if (any(neginf.counts == ncol(instance.method.score.mat))) {
print(names(neginf.counts)[neginf.counts==ncol(instance.method.score.mat)])
stop ("All methods assigned a log score of -Inf for some instance.")
}
degenerate.em.weights = epiforecast:::degenerate_em_weights(exp(instance.method.score.mat))
return (degenerate.em.weights)
},
parallel_dim_i=1L # use parallelism across seasons (only helps for LOSOCV)
) %>>%
## --- Fix up dim, dimnames: ---
{
d = dim(.)
dn = dimnames(.)
## Remove Model="all" dimension:
stopifnot(identical(tail(dn, 1L), list(Model="all")))
new.d = head(d, -1L)
new.dn = head(dn, -1L)
## Call the new, unnamed dim 1 "Model":
stopifnot(identical(
head(dn, 1L),
stats::setNames(dimnames(component.score.array)["Model"], "")))
names(new.dn)[[1L]] <- "Model"
dim(.) <- new.d
dimnames(.) <- new.dn
.
} %>>%
## --- Convert to data.frame with desired format: ---
{
## Rename dimensions:
stopifnot(identical(
names(dimnames(.)),
c("Model", "Season", "Model Week", "Location", "Target", "Metric")
))
names(dimnames(.)) <-
c("component_model_id","season","Model Week","location","target","Metric")
## Drop ="all" dimensions:
d = dim(.)
dn = dimnames(.)
keep.dim.flags = !sapply(dn, identical, "all")
dim(.) <- d[keep.dim.flags]
dimnames(.) <- dn[keep.dim.flags]
.
} %>>%
## Melt into tibble:
reshape2::melt(value.name="weight") %>>% tibble::as_tibble() %>>%
dplyr::mutate_if(is.factor, as.character) %>>%
{.}
}
## Target-type df:
target.types.df =
lapply(target.types.list, tibble::as_tibble) %>>%
dplyr::bind_rows(.id="target.type") %>>%
dplyr::rename(target=value)
## Generate the weight files:
for (weighting.scheme.i in seq_along(weighting.scheme.partial.indexer.lists)) {
## extract info from lists:
weighting.scheme.name = names(weighting.scheme.partial.indexer.lists)[[weighting.scheme.i]]
weighting.scheme.cv.indexer.list = weighting.scheme.cv.indexer.lists[[weighting.scheme.i]]
weighting.scheme.prospective.indexer.list = weighting.scheme.prospective.indexer.lists[[weighting.scheme.i]]
print(weighting.scheme.name)
## determine season label for next season:
cv.season.ints = as.integer(gsub("/.*","",dimnames(component.score.array)[["Season"]]))
prospective.season.int = max(cv.season.ints) + 1L
prospective.season.label = prospective.season.int %>>% paste0("/",.+1L)
## generate weight df's and bind together:
cv.weight.df = generate_indexer_list_weights(
component.score.array, weighting.scheme.cv.indexer.list
)
prospective.weight.df = generate_indexer_list_weights(
component.score.array, weighting.scheme.prospective.indexer.list
) %>>%
dplyr::mutate(season=prospective.season.label)
combined.weight.df =
dplyr::bind_rows(cv.weight.df, prospective.weight.df)
## expand out target types if applicable:
if ("target" %in% names(combined.weight.df)) {
combined.weight.df <-
combined.weight.df %>>%
dplyr::rename(target.type=target) %>>%
dplyr::left_join(
dimnames(component.score.array)[["Target"]] %>>%
{tibble::tibble(target.type=., target=.)} %>>%
dplyr::bind_rows(target.types.df),
by = "target.type"
) %>>%
dplyr::select(-target.type) %>>%
## restore original column order:
magrittr::extract(names(combined.weight.df))
}
## print weight df and write to csv file:
print(combined.weight.df)
write.csv(combined.weight.df, file.path("..","weights",paste0(weighting.scheme.name,"-weights.csv")))
}
@@ -0,0 +1,121 @@
"","component_model_id","season","weight"
"1","CU-BMA","2010/2011",0.00662845039139193
"2","CU-EAKFC","2010/2011",0.0161935839456562
"3","Delphi-BasisRegression","2010/2011",8.627748040449e-59
"4","Delphi-DeltaDensity1","2010/2011",0.160170394468013
"5","Delphi-DeltaDensity2","2010/2011",0.0391351436595064
"6","Delphi-EmpiricalBayes1","2010/2011",0.00719254894898104
"7","Delphi-EmpiricalBayes2","2010/2011",3.54244915645993e-11
"8","Delphi-EmpiricalFuture","2010/2011",0.0454024731222317
"9","Delphi-EmpiricalTraj","2010/2011",4.77371592782356e-13
"10","Delphi-Stat","2010/2011",0.0805978816068611
"11","Delphi-Uniform","2010/2011",2.76918995188174e-66
"12","ReichLab-KCDE","2010/2011",0.360650329492706
"13","ReichLab-KDE","2010/2011",0.034905328904761
"14","ReichLab-SARIMA1","2010/2011",2.6865410890408e-05
"15","ReichLab-SARIMA2","2010/2011",0.2490970000131
"16","CU-BMA","2011/2012",0.0251035585742347
"17","CU-EAKFC","2011/2012",0.0111478509883234
"18","Delphi-BasisRegression","2011/2012",8.58698661632853e-59
"19","Delphi-DeltaDensity1","2011/2012",0.189792488984331
"20","Delphi-DeltaDensity2","2011/2012",0.012461181431268
"21","Delphi-EmpiricalBayes1","2011/2012",0.0141815403350113
"22","Delphi-EmpiricalBayes2","2011/2012",3.66638890907366e-07
"23","Delphi-EmpiricalFuture","2011/2012",0.0525582581635572
"24","Delphi-EmpiricalTraj","2011/2012",4.72952836432822e-25
"25","Delphi-Stat","2011/2012",0.0736732352007229
"26","Delphi-Uniform","2011/2012",3.83642082883923e-71
"27","ReichLab-KCDE","2011/2012",0.375361332481448
"28","ReichLab-KDE","2011/2012",1.47831458479968e-06
"29","ReichLab-SARIMA1","2011/2012",2.12477683224406e-08
"30","ReichLab-SARIMA2","2011/2012",0.245718687639859
"31","CU-BMA","2012/2013",0.0175642799398342
"32","CU-EAKFC","2012/2013",0.0111924004910501
"33","Delphi-BasisRegression","2012/2013",2.04744113928887e-61
"34","Delphi-DeltaDensity1","2012/2013",0.147763951289798
"35","Delphi-DeltaDensity2","2012/2013",0.0778842437781156
"36","Delphi-EmpiricalBayes1","2012/2013",0.0213184120202454
"37","Delphi-EmpiricalBayes2","2012/2013",3.14100622838621e-07
"38","Delphi-EmpiricalFuture","2012/2013",0.0337890403470896
"39","Delphi-EmpiricalTraj","2012/2013",5.98178764954363e-16
"40","Delphi-Stat","2012/2013",0.0728566085894828
"41","Delphi-Uniform","2012/2013",1.04263319727975e-69
"42","ReichLab-KCDE","2012/2013",0.321302134513801
"43","ReichLab-KDE","2012/2013",0.046112059861517
"44","ReichLab-SARIMA1","2012/2013",1.37111850412621e-06
"45","ReichLab-SARIMA2","2012/2013",0.250215183949939
"46","CU-BMA","2013/2014",0.0250145901417345
"47","CU-EAKFC","2013/2014",0.0141513073476872
"48","Delphi-BasisRegression","2013/2014",2.76509811224188e-62
"49","Delphi-DeltaDensity1","2013/2014",0.137028366462807
"50","Delphi-DeltaDensity2","2013/2014",0.0718830540435093
"51","Delphi-EmpiricalBayes1","2013/2014",0.0210047074404135
"52","Delphi-EmpiricalBayes2","2013/2014",1.51058375900043e-08
"53","Delphi-EmpiricalFuture","2013/2014",0.0361837079837647
"54","Delphi-EmpiricalTraj","2013/2014",6.71764758801285e-16
"55","Delphi-Stat","2013/2014",0.0659084982650972
"56","Delphi-Uniform","2013/2014",1.04645967479643e-70
"57","ReichLab-KCDE","2013/2014",0.367991488420518
"58","ReichLab-KDE","2013/2014",0.0450821820569222
"59","ReichLab-SARIMA1","2013/2014",3.14164633591294e-07
"60","ReichLab-SARIMA2","2013/2014",0.215751768567074
"61","CU-BMA","2014/2015",0.0273441816619702
"62","CU-EAKFC","2014/2015",0.0205954372857028
"63","Delphi-BasisRegression","2014/2015",3.62914498981867e-166
"64","Delphi-DeltaDensity1","2014/2015",0.217601630121189
"65","Delphi-DeltaDensity2","2014/2015",0.0517355875862947
"66","Delphi-EmpiricalBayes1","2014/2015",0.0114801276279397
"67","Delphi-EmpiricalBayes2","2014/2015",5.11580404024813e-24
"68","Delphi-EmpiricalFuture","2014/2015",0.0398168441860499
"69","Delphi-EmpiricalTraj","2014/2015",3.16688380186502e-40
"70","Delphi-Stat","2014/2015",0.00158153686455326
"71","Delphi-Uniform","2014/2015",6.56213576990202e-190
"72","ReichLab-KCDE","2014/2015",0.361200646826586
"73","ReichLab-KDE","2014/2015",0.0476516898594135
"74","ReichLab-SARIMA1","2014/2015",2.5403517656845e-09
"75","ReichLab-SARIMA2","2014/2015",0.220992315439949
"76","CU-BMA","2015/2016",0.0150073297224518
"77","CU-EAKFC","2015/2016",0.0184021725541569
"78","Delphi-BasisRegression","2015/2016",1.54428291541616e-40
"79","Delphi-DeltaDensity1","2015/2016",0.105188159833456
"80","Delphi-DeltaDensity2","2015/2016",0.0578786078878456
"81","Delphi-EmpiricalBayes1","2015/2016",0.0259021982687448
"82","Delphi-EmpiricalBayes2","2015/2016",3.41788074272904e-06
"83","Delphi-EmpiricalFuture","2015/2016",0.0396680501876802
"84","Delphi-EmpiricalTraj","2015/2016",1.53239385790132e-10
"85","Delphi-Stat","2015/2016",0.0800105745429743
"86","Delphi-Uniform","2015/2016",3.38920439666193e-45
"87","ReichLab-KCDE","2015/2016",0.312673708614036
"88","ReichLab-KDE","2015/2016",0.033032380939935
"89","ReichLab-SARIMA1","2015/2016",4.01391461948464e-06
"90","ReichLab-SARIMA2","2015/2016",0.312229385500117
"91","CU-BMA","2016/2017",0.00981254581495154
"92","CU-EAKFC","2016/2017",0.0188489714426367
"93","Delphi-BasisRegression","2016/2017",2.11609936244986e-126
"94","Delphi-DeltaDensity1","2016/2017",0.195015904516087
"95","Delphi-DeltaDensity2","2016/2017",0.0288934693811508
"96","Delphi-EmpiricalBayes1","2016/2017",0.0259321173074763
"97","Delphi-EmpiricalBayes2","2016/2017",9.8964188320308e-16
"98","Delphi-EmpiricalFuture","2016/2017",0.0437340017878809
"99","Delphi-EmpiricalTraj","2016/2017",2.11823796461728e-31
"100","Delphi-Stat","2016/2017",0.0257875956939218
"101","Delphi-Uniform","2016/2017",1.20553311203565e-144
"102","ReichLab-KCDE","2016/2017",0.355282182099694
"103","ReichLab-KDE","2016/2017",0.046005247551801
"104","ReichLab-SARIMA1","2016/2017",7.20989624777641e-11
"105","ReichLab-SARIMA2","2016/2017",0.250687964332299
"106","CU-BMA","2017/2018",0.0176806273419303
"107","CU-EAKFC","2017/2018",0.0155819029342376
"108","Delphi-BasisRegression","2017/2018",2.72413426615974e-86
"109","Delphi-DeltaDensity1","2017/2018",0.167890657748619
"110","Delphi-DeltaDensity2","2017/2018",0.0475398925337842
"111","Delphi-EmpiricalBayes1","2017/2018",0.0178575321682285
"112","Delphi-EmpiricalBayes2","2017/2018",2.07910952998106e-11
"113","Delphi-EmpiricalFuture","2017/2018",0.0423064937953058
"114","Delphi-EmpiricalTraj","2017/2018",4.1289758651304e-22
"115","Delphi-Stat","2017/2018",0.0544716809436721
"116","Delphi-Uniform","2017/2018",3.8515901840165e-98
"117","ReichLab-KCDE","2017/2018",0.35233570378755
"118","ReichLab-KDE","2017/2018",0.035556270327131
"119","ReichLab-SARIMA1","2017/2018",6.51806404432595e-09
"120","ReichLab-SARIMA2","2017/2018",0.248779231880686
Oops, something went wrong.

0 comments on commit 1d1050d

Please sign in to comment.