diff --git a/ecsades/DESCRIPTION b/ecsades/DESCRIPTION index be6827d..84d5a5c 100644 --- a/ecsades/DESCRIPTION +++ b/ecsades/DESCRIPTION @@ -1,8 +1,8 @@ Package: ecsades Type: Package Title: Environmental contours for safe design of ships and other marine structures -Version: 1.0.0 -Date: 2018-12-19 +Version: 1.0.1 +Date: 2019-01-24 Author: Ye Liu Maintainer: Ye Liu Description: diff --git a/ecsades/R/estimate_iform.r b/ecsades/R/estimate_iform.r index 2e82fef..5afaf67 100644 --- a/ecsades/R/estimate_iform.r +++ b/ecsades/R/estimate_iform.r @@ -141,8 +141,8 @@ estimate_iform = function( calc[, p1:=pnorm(u1)] calc[, p2:=pnorm(u2)] calc[, hs:=qweibull(p1, shape=wln$hs$par["shape"], scale=wln$hs$par["scale"])+wln$hs$par["loc"]] - calc[, m:=wln$tp$par["d"] + wln$tp$par["e"] * log(hs + wln$tp$par["f"])] - calc[, s:=sqrt(wln$tp$par["g"] + wln$tp$par["k"] * exp(wln$tp$par["m"] * (hs ^ wln$tp$par["q"])))] + calc[, m:=wln$tp$par[1] + wln$tp$par[2] * (hs ^ wln$tp$par[3])] + calc[, s:=wln$tp$par[4] + wln$tp$par[5] * exp(wln$tp$par[6] * hs)] calc[, tp:=qlnorm(p2, m, s)] # Return diff --git a/ecsades/R/fit_wln.r b/ecsades/R/fit_wln.r index f1d7197..3483a24 100644 --- a/ecsades/R/fit_wln.r +++ b/ecsades/R/fit_wln.r @@ -4,8 +4,7 @@ #' This function fits a Weibull-log-normal (\code{ht}) model to the given wave data, such that the wave height #' \code{hs} follows a translated (or 3-parameter) Weibull distribution, and the wave period given the wave #' follows a conditional log-normal distributuion with the location and scale parameters as functions -#' of the corresponding \code{hs} value. The formulation of the conditional log-normal distribution follows that -#' proposed in Haver and Winterstein (2008). +#' of the corresponding \code{hs} value. #' #' @param data the wave data in the form a \code{data.table} with wave height \code{hs} and wave period #' \code{tp} as columns. @@ -13,10 +12,33 @@ #' @param npy the number of data points per year, usually estimated by the number of rows in the #' supplied wave data divided by the total period of data coverage (in years). #' +#' @param hs_constraint_range a multiplier to the maximum \code{hs} in the data within such that the standard +#' deviation of the log-normal conditional distribution of \code{tp} given \code{hs} is constrained to be strictly +#' positive (see details for more information) over this user-defined range. The default value is 1.5. +#' +#' @param weighted_tp_fit whether or not the estimation of the conditional model for \code{tp} uses a weighted +#' likelihood (see details). The default option is \code{FALSE}. +#' #' @details #' The input \code{data} must be a \code{data.table} object with \code{hs} and \code{tp} columns. This can be #' generated by reading a CSV file using function \code{\link[data.table]{fread}}. #' +#' The formulation of the conditional distribution is given by +#' \deqn{log(tp | hs=h) ~ N(\mu(h), \sigma(h)^2)} +#' where the mean and the standard deviation are +#' \deqn{\mu(h) = a_0 + a_1 h^a_2 and \sigma(h) = b_0 + b_1 exp(h * b_2)} +#' +#' The strictly positive constraint for the standard deviation has a large influence on the estimated values for +#' coefficients \eqn{b_0}, \eqn{b_1} and \eqn{b_2}. In general, enforcing the standardard deviation to be strictly +#' positive over a larger range (larger \code{hs_constraint_range}) may result in a poorer fit of the model +#' to the provided observation data. +#' +#' There is an option to use weighted likelihood when fitting the log-normal distribution to \code{tp|hs}. When this +#' option is used, the observation data are divided to bins of equal sizes (every whole number in \code{hs}) and the +#' weight for each data point is inversely proportional to the total number of points within the same bin. This +#' option effectively weighs up points in the tail of the distribution, but weighs down the points in the centre +#' of the distribution. +#' #' @return An joint distribution object of class \code{wln} containing the key information of a fitted #' Weibull-log-normal model, including the three parameters of the Weibull distribution for \code{hs} #' and the seven parameters of the conditional log-normal distribution for \code{tp} given \code{tp} @@ -28,6 +50,9 @@ #' # Fit Weibull-log-normal distribution #' noaa_wln = fit_wln(data = noaa_ww3, npy = nrow(noaa_ww3)/10) #' +#' # Fit Weibull-log-normal distribution with additional options +#' noaa_wln2 = fit_wln(data = noaa_ww3, npy = nrow(noaa_ww3)/10, hs_constraint_range = 3, weighted_tp_fit = TRUE) +#' #' @references #' Haver, Sverre & Winterstein, Steven. (2009). Environmental Contour Lines: A Method for Estimating Long #' Term Extremes by a Short Term Analysis. Transactions - Society of Naval Architects and Marine Engineers. 116. @@ -35,28 +60,32 @@ #' @seealso \code{\link{fit_ht}}, \code{\link{sample_jdistr}} #' #' @export -fit_wln = function(data, npy){ +fit_wln = function(data, npy, hs_constraint_range = 1.5, weighted_tp_fit = FALSE){ res = list() class(res) = "wln" res$npy = npy res$hs = .fit_weibull(data = data$hs) - res$tp = .fit_iform_lnorm(hs = data$hs, tp = data$tp) + res$tp = .fit_iform_lnorm( + hs = data$hs, tp = data$tp, + hs_constraint_range = hs_constraint_range, weighted_tp_fit = weighted_tp_fit) return(res) } # Conditional log-normal used for IFROM ----------------------------------- -.fit_iform_lnorm = function(hs, tp){ +.fit_iform_lnorm = function(hs, tp, hs_constraint_range, weighted_tp_fit){ log_tp = log(tp) - # theta = c(d, e, f, g, k, m, q) - theta0 = c(d=mean(log_tp), e=0, f=0, g=sd(log_tp), k=0, m=0, q=0) + theta0 = c(mean(log_tp), 0, 0, sd(log_tp), 0, 0) op = optim( par = theta0, fn = .nll_iform_lnorm, - log_tp = log_tp, hs = hs, control = list(maxit=2e3)) + log_tp = log_tp, hs = hs, + hs_constraint_range = hs_constraint_range, + weighted_tp_fit = weighted_tp_fit, + control = list(maxit=2e3)) res = list() class(res) = "iform_lnorm" @@ -66,22 +95,34 @@ fit_wln = function(data, npy){ return(res) } -.nll_iform_lnorm = function(theta, log_tp, hs){ +.nll_iform_lnorm = function(theta, log_tp, hs, hs_constraint_range, weighted_tp_fit){ # Constraints - if(theta[3] <= 0 | theta[6]>0 | theta[4]<0 | theta[4]+theta[5]<0){ + mean_range = theta[1] + theta[2] * c(max(hs)*hs_constraint_range, .limit_zero)^theta[3] + sd_range = theta[4] + theta[5] * exp(theta[6] * c(max(hs)*hs_constraint_range, .limit_zero)) + + if(any(is.na(sd_range)) || any(is.na(mean_range)) || min(sd_range)<.limit_zero){ return(.limit_inf) } # mean & sd - norm_mean = theta[1] + theta[2] * log(hs + theta[3]) - norm_var = theta[4] + theta[5] * exp(theta[6] * (hs ^ theta[7])) - if (any(norm_var <= 0)){ + norm_mean = theta[1] + theta[2]* (hs^theta[3]) + norm_sd = theta[4] + theta[5] * exp(theta[6] * hs) + if (any(norm_sd <= 0)){ return(.limit_inf) } + # Weights + if(weighted_tp_fit){ + freq = data.table(hs, rhs =round(hs*2)) + freq[, weight:=1/.N, .(rhs)] + weight = freq$weight + }else{ + weight = 1 + } + # return - nll = -dnorm(log_tp, norm_mean, sqrt(norm_var), log=TRUE) + nll = -dnorm(log_tp, norm_mean, norm_sd, log=TRUE)*weight - log_tp*weight res = min(.limit_inf, sum(nll)) return(res) } diff --git a/ecsades/R/sample_jdistr.r b/ecsades/R/sample_jdistr.r index b583e60..0a4cdfd 100644 --- a/ecsades/R/sample_jdistr.r +++ b/ecsades/R/sample_jdistr.r @@ -110,9 +110,9 @@ sample_jdistr = function(jdistr, sim_year){ hs = rweibull(n_sim, shape = wln$hs$par["shape"], scale = wln$hs$par["scale"]) + wln$hs$par["loc"] # sample tp cond on hs - tp_norm_mean = wln$tp$par[1] + wln$tp$par[2] * log(hs + wln$tp$par[3]) - tp_norm_var = wln$tp$par[4] + wln$tp$par[5] * exp(wln$tp$par[6] * (hs ^ wln$tp$par[7])) - tp = exp(rnorm(n_sim, tp_norm_mean, sqrt(tp_norm_var))) + tp_norm_mean = wln$tp$par[1] + wln$tp$par[2] * (hs ^ wln$tp$par[3]) + tp_norm_sd = wln$tp$par[4] + wln$tp$par[5] * exp(wln$tp$par[6] * hs) + tp = exp(rnorm(n_sim, tp_norm_mean, tp_norm_sd)) res = data.table(hs, tp) return(res) diff --git a/ecsades/man/ecsades-package.Rd b/ecsades/man/ecsades-package.Rd index 8c192e1..a6e8985 100644 --- a/ecsades/man/ecsades-package.Rd +++ b/ecsades/man/ecsades-package.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/ecsades.R +% Please edit documentation in R/ecsades.r \docType{package} \name{ecsades-package} \alias{ecsades} diff --git a/ecsades/man/fit_wln.Rd b/ecsades/man/fit_wln.Rd index 29bef7c..50b2933 100644 --- a/ecsades/man/fit_wln.Rd +++ b/ecsades/man/fit_wln.Rd @@ -4,7 +4,7 @@ \alias{fit_wln} \title{Fitting Weibull-log-normal model to wave data} \usage{ -fit_wln(data, npy) +fit_wln(data, npy, hs_constraint_range = 1.5, weighted_tp_fit = FALSE) } \arguments{ \item{data}{the wave data in the form a \code{data.table} with wave height \code{hs} and wave period @@ -12,6 +12,13 @@ fit_wln(data, npy) \item{npy}{the number of data points per year, usually estimated by the number of rows in the supplied wave data divided by the total period of data coverage (in years).} + +\item{hs_constraint_range}{a multiplier to the maximum \code{hs} in the data within such that the standard +deviation of the log-normal conditional distribution of \code{tp} given \code{hs} is constrained to be strictly +positive (see details for more information) over this user-defined range. The default value is 1.5.} + +\item{weighted_tp_fit}{whether or not the estimation of the conditional model for \code{tp} uses a weighted +likelihood (see details). The default option is \code{FALSE}.} } \value{ An joint distribution object of class \code{wln} containing the key information of a fitted @@ -22,12 +29,27 @@ and the seven parameters of the conditional log-normal distribution for \code{tp This function fits a Weibull-log-normal (\code{ht}) model to the given wave data, such that the wave height \code{hs} follows a translated (or 3-parameter) Weibull distribution, and the wave period given the wave follows a conditional log-normal distributuion with the location and scale parameters as functions -of the corresponding \code{hs} value. The formulation of the conditional log-normal distribution follows that -proposed in Haver and Winterstein (2008). +of the corresponding \code{hs} value. } \details{ The input \code{data} must be a \code{data.table} object with \code{hs} and \code{tp} columns. This can be generated by reading a CSV file using function \code{\link[data.table]{fread}}. + +The formulation of the conditional distribution is given by +\deqn{log(tp | hs=h) ~ N(\mu(h), \sigma(h)^2)} +where the mean and the standard deviation are +\deqn{\mu(h) = a_0 + a_1 h^a_2 and \sigma(h) = b_0 + b_1 exp(h * b_2)} + +The strictly positive constraint for the standard deviation has a large influence on the estimated values for +coefficients \eqn{b_0}, \eqn{b_1} and \eqn{b_2}. In general, enforcing the standardard deviation to be strictly +positive over a larger range (larger \code{hs_constraint_range}) may result in a poorer fit of the model +to the provided observation data. + +There is an option to use weighted likelihood when fitting the log-normal distribution to \code{tp|hs}. When this +option is used, the observation data are divided to bins of equal sizes (every whole number in \code{hs}) and the +weight for each data point is inversely proportional to the total number of points within the same bin. This +option effectively weighs up points in the tail of the distribution, but weighs down the points in the centre +of the distribution. } \examples{ # Load data @@ -36,6 +58,9 @@ data(noaa_ww3) # Fit Weibull-log-normal distribution noaa_wln = fit_wln(data = noaa_ww3, npy = nrow(noaa_ww3)/10) +# Fit Weibull-log-normal distribution with additional options +noaa_wln2 = fit_wln(data = noaa_ww3, npy = nrow(noaa_ww3)/10, hs_constraint_range = 3, weighted_tp_fit = TRUE) + } \references{ Haver, Sverre & Winterstein, Steven. (2009). Environmental Contour Lines: A Method for Estimating Long