Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -105,3 +105,4 @@ export(transformation_counts)
export(vanKrevelenCognostics)
export(vanKrevelenPlot)
importFrom(dplyr,"%>%")
importFrom(data.table, data.table)
40 changes: 32 additions & 8 deletions R/calc_dbe.R
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,14 @@
#' Calculate double bond equivalent (DBE) and double bond equivalent minus Oxygen (DBE-O) values for peaks where empirical formula is available
#'
#' @param ftmsObj an object of class 'peakData' or 'compoundData', typically a result of \code{\link{as.peakData}} or \code{\link{mapPeaksToCompounds}}.
#' @param valences a named list with the valence for each element. Names must be any of 'C', 'H', 'N', 'O', 'S', 'P'. Values must be integers corresponding to the valence for each element. Defaults to NULL, in which case the valences that result in the formula given in the details section are used.
#'
#' @details DBE \eqn{= 1 + C - O - S - 0.5(N + P + H)}{= 1 + C - O - S - 0.5*(N + P + H)} and DBE-0 \eqn{= 1 + C - O - S - 0.5(N + P + H) - O}{= 1 + C - O - S - 0.5*(N + P + H) - O}
#' @details
#' \tabular{ll}{
#' \tab If no valences are provided, DBE \eqn{= 1 + C - O - S - 0.5*(N + P + H)} \cr
#' \tab If valences are provided DBE, \eqn{= 1 + \frac{\sum_{i}N_i(V_i-2)}{2}} where \eqn{N_i} and \eqn{V_i} are the number of atoms and corresponding valences.\cr
#' \tab DBE-0 \eqn{= 1 + C - O - S - 0.5(N + P + H) - O}{= 1 + C - O - S - 0.5*(N + P + H) - O} \cr
#' }
#'
#' @references Koch, B. P., & Dittmar, T. (2006). From mass to structure: an aromaticity index for high‐resolution mass data of natural organic matter. Rapid communications in mass spectrometry, 20(5), 926-932.
#' @references Errata: Koch, B. P., & Dittmar, T. (2016). From mass to structure: an aromaticity index for high-resolution mass data of natural organic matter. Rapid communications in mass spectrometery, 30(1), 250. DOI: 10.1002/rcm.7433
Expand All @@ -14,23 +20,41 @@
#' @author Lisa Bramer, Allison Thompson
#'

calc_dbe <- function(ftmsObj){

calc_dbe <- function(ftmsObj, valences = NULL){

# check that ftmsObj is of the correct class #
if(!inherits(ftmsObj, "peakData") & !inherits(ftmsObj, "compoundData")) stop("ftmsObj must be an object of class 'peakData' or 'compoundData'")

# get coefficients that will multiply each elemental count. Each coefficient is equal to {valence}-2
if(is.null(valences)){
# coefficients that result in the equation given in @details
coefs <- list('C' = 2, 'H' = -1, 'N' = 1, 'O' = 0, 'S' = 0, 'P' = 1)
}
else{
cond1 <- inherits(valences, 'list')
cond2 <- all(names(valences) %in% c('C', 'H', 'N', 'O', 'S', 'P'))
if(!all(cond1, cond2)) stop("argument valences must be a named list of integers with names 'C', 'H', 'N', 'O', 'S', 'P' and values representing the valence for each element.")
coefs <- lapply(c('C', 'H', 'N', 'O', 'S', 'P'), function(x) if(!is.null(valences[[x]])) valences[[x]]-2 else 2)
names(coefs) <- c('C', 'H', 'N', 'O', 'S', 'P')
}

# pull e_meta out of ftmsObj #
temp = ftmsObj$e_meta

temp$DBE = 1 + 0.5*(2*temp[,getCarbonColName(ftmsObj)] - temp[,getHydrogenColName(ftmsObj)]+ temp[,getNitrogenColName(ftmsObj)] + temp[,getPhosphorusColName(ftmsObj)])
# get existing elemental counts
C_counts = if(getCarbonColName(ftmsObj) %in% colnames(temp)) temp[,getCarbonColName(ftmsObj)] else 0
H_counts = if(getHydrogenColName(ftmsObj) %in% colnames(temp)) temp[,getHydrogenColName(ftmsObj)] else 0
N_counts = if(getNitrogenColName(ftmsObj) %in% colnames(temp)) temp[,getNitrogenColName(ftmsObj)] else 0
O_counts = if(getOxygenColName(ftmsObj) %in% colnames(temp)) temp[,getOxygenColName(ftmsObj)] else 0
S_counts = if(getSulfurColName(ftmsObj) %in% colnames(temp)) temp[,getSulfurColName(ftmsObj)] else 0
P_counts = if(getPhosphorusColName(ftmsObj) %in% colnames(temp)) temp[,getPhosphorusColName(ftmsObj)] else 0

temp$DBE_O = temp$DBE - temp[,getOxygenColName(ftmsObj)]
temp$DBE = 1 + 0.5*(coefs[['C']]*C_counts + coefs[['H']]*H_counts + coefs[['N']]*N_counts + coefs[['O']]*O_counts + coefs[['S']]*S_counts + coefs[['P']]*P_counts)

temp$DBE_AI = 1 + temp[,getCarbonColName(ftmsObj)] - temp[,getOxygenColName(ftmsObj)] - temp[,getSulfurColName(ftmsObj)] -
0.5*(temp[,getNitrogenColName(ftmsObj)] + temp[,getPhosphorusColName(ftmsObj)] + temp[,getHydrogenColName(ftmsObj)])
temp$DBE_O = 1 + 0.5*(2*C_counts - H_counts + N_counts + P_counts) - O_counts

temp$DBE_AI = 1 + C_counts - O_counts - S_counts - 0.5*(N_counts + P_counts + H_counts)


if(length(which(is.na(temp[,getMFColName(ftmsObj)]))) > 0){
temp$DBE[which(is.na(temp[,getMFColName(ftmsObj)]))] = NA
temp$DBE_O[which(is.na(temp[,getMFColName(ftmsObj)]))] = NA
Expand Down
28 changes: 23 additions & 5 deletions R/calc_kendrick.R
Original file line number Diff line number Diff line change
Expand Up @@ -3,33 +3,51 @@
#' Calculates the Kendrick mass and Kendrick defect needed for Kendrick plots
#'
#' @param ftmsObj an object of class 'peakData' or 'compoundData', typically a result of \code{\link{as.peakData}} or \code{\link{mapPeaksToCompounds}}. e_meta must be present.
#' @param base character, one of 'CH2', 'CO2', 'H2', 'H20', or 'CHO', the family of compounds to be used in determining the Kendrick Mass.
#'
#' @return an object of the same class as \code{ftmsObj} with columns in \code{e_meta} giving Kendrick mass and defects
#'
#' @details
#' \tabular{ll}{
#' \tab Kendrick-mass = \eqn{(Observed-Mass)*(Nominal-Mass(base)/Exact-Mass(base))} \cr
#' \tab Kendrick-defect = ceiling(Kendrick-mass) - Kendrick-mass
#' }
#'
#' @references Hughey, C. A., Hendrickson, C. L., Rodgers, R. P., Marshall, A. G., & Qian, K. (2001). Kendrick mass defect spectrum: a compact visual analysis for ultrahigh-resolution broadband mass spectra. Analytical Chemistry, 73(19), 4676-4681.
#'
#' @author Lisa Bramer
#'

calc_kendrick <- function(ftmsObj){

calc_kendrick <- function(ftmsObj, base = 'CH2'){
### DA PLAN: Calculate ratio = nominal mass/exact mass for each proposed molecule

# check that ftmsObj is of the correct class #
if(!inherits(ftmsObj, "peakData") & !inherits(ftmsObj, "compoundData")) stop("ftmsObj must be an object of class 'peakData' or 'compoundData'")

# check that ftmsObj doesn't already have cnames specified for ratios in e_meta #
if(!is.null(getKendrickDefectColName(ftmsObj)) | !is.null(getKendrickMassColName(ftmsObj))) message("mass_cname and/or defect_cname were already specified and will be overwritten")

if(!(base %in% c('CH2', 'CO2', 'H2', 'H2O', 'CHO'))) stop("Base compound must be one of 'CH2', 'CO2', 'H2', 'H2O', or 'CHO'")

mass_cname = getMassColName(ftmsObj)

# check that all the cnames are character strings #
if(class(mass_cname) != "character") stop("mass_cname must be a character string")



# pull e_meta out of ftmsObj #
temp = ftmsObj$e_meta

# determine which ratio to use: CH2, CO2, H2, H20, CHO
coef = switch(base,
'CH2' = 14/14.01565,
'CO2' = 44/43.98983,
'H2' = 2/2.015650,
'H2O' = 18/18.010565,
'CHO' = 29/29.00274
)

# calculate kendrick mass #
temp$kmass = as.numeric(as.character(temp[,mass_cname]))*(14/14.01565)
temp$kmass = as.numeric(as.character(temp[,mass_cname]))*coef

# calculate kendrick defect #
temp$kdefect = ceiling(temp$kmass) - temp$kmass
Expand Down
16 changes: 13 additions & 3 deletions R/compound_calcs.R
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
#'
#' @param ftmsObj an object of class 'peakData' or 'compoundData', typically a result of \code{\link{as.peakData}} or \code{\link{mapPeaksToCompounds}}.
#' @param calc_fns a character string specifying which calculations to perform. Available options are: calc_aroma, calc_dbe, calc_gibbs, calc_kendrick, calc_nosc, and calc_vankrev.
#' @param calc_args a list with names corresponding to the available calc_fns. Each element is a named sub-list of extra arguments for the specified function.
#'
#' @details The calculations are as follows for each of the `calc_fns`:
#'
Expand All @@ -13,13 +14,15 @@
#' \tab AI_Mod \eqn{= \frac{1 + C - 0.5O - S - 0.5(N + P + H)}{C - 0.5*O - S - N - P}}{= (1 + C - 0.5*O - S - 0.5*(N + P + H))/(C - 0.5*O - S - N - P)} \cr
#' \tab \cr
#' calc_dbe \tab calculates double bond equivalent (DBE) and double bond equivalent minux Oxygent (DBE_O) \cr
#' \tab DBE \eqn{= 1 + C - O - S - 0.5(N + P + H)}{= 1 + C - O - S - 0.5*(N + P + H)} \cr
#' \tab DBE \eqn{= 1 + C - O - S - 0.5*(N + P + H)} \cr
#' \tab DBE_0 \eqn{= 1 + C - O - S - 0.5(N + P + H) - O}{= 1 + C - O - S - 0.5*(N + P + H) - O} \cr
#' \tab \cr
#' calc_gibbs \tab calculates Cox Gibbs Free Energy (GFE) \cr
#' \tab GFE = \eqn{= 60.3 - 28.5NOSC}{= 60.3 - 28.5*NOSC} \cr
#' \tab \cr
#' calc_kendrick \tab calculates Kendrick Mass and Kendrick Defect \cr
#' \tab Kendrick-mass = \eqn{(Observed-Mass)*(Nominal-Mass(base)/Exact-Mass(base))} \cr
#' \tab Kendrick-defect = ceiling(Kendrick-mass) - Kendrick-mass \cr
#' \tab \cr
#' calc_nosc \tab calculates nominal oxidation state of Carbon (NOSC) \cr
#' \tab NOSC \eqn{= -(\frac{4C + H - 3N - 2O + 5P - 2S}{C}) + 4}{= -((4*C + H - 3*N - 2*O + 5*P - 2*S)/(C)) + 4} \cr
Expand All @@ -31,13 +34,15 @@
#' @references Koch, B. P., & Dittmar, T. (2006). From mass to structure: an aromaticity index for high‐resolution mass data of natural organic matter. Rapid communications in mass spectrometry, 20(5), 926-932.
#' @references Errata: Koch, B. P., & Dittmar, T. (2016). From mass to structure: an aromaticity index for high-resolution mass data of natural organic matter. Rapid communications in mass spectrometery, 30(1), 250. DOI: 10.1002/rcm.7433
#' @references LaRowe and Van Cappellen, 2011, "Degradation of natural organic matter: A thermodynamic analysis". Geochimica et Cosmochimica Acta. 75.
#' @references Hughey, C. A., Hendrickson, C. L., Rodgers, R. P., Marshall, A. G., & Qian, K. (2001). Kendrick mass defect spectrum: a compact visual analysis for ultrahigh-resolution broadband mass spectra. Analytical Chemistry, 73(19), 4676-4681.
#'
#' @return an object of the same class as \code{ftmsData} with columns in \code{e_meta} giving the newly calculated values
#'
#' @author Kelly Stratton
#' @export

compound_calcs <- function(ftmsObj, calc_fns=c("calc_aroma", "calc_dbe", "calc_gibbs", "calc_kendrick", "calc_nosc", "calc_element_ratios")){
compound_calcs <- function(ftmsObj, calc_fns=c("calc_aroma", "calc_dbe", "calc_gibbs", "calc_kendrick", "calc_nosc", "calc_element_ratios"),
calc_args = list("calc_aroma" = NULL, "calc_dbe" = NULL, "calc_gibbs" = NULL, "calc_kendrick" = NULL, "calc_nosc" = NULL, "calc_element_ratios" = NULL)){

## initial checks ##

Expand All @@ -55,7 +60,12 @@ compound_calcs <- function(ftmsObj, calc_fns=c("calc_aroma", "calc_dbe", "calc_g
for(i in 1:length(calc_fns)){
# set f to the function that is named in the ith element of compound_calcs #
f <- get(as.character(calc_fns[i]), envir=asNamespace("ftmsRanalysis"), mode="function")
ftmsObj <- f(ftmsObj)

# check for extra arguments
if(!is.null(calc_args[[as.character(calc_fns[i])]])){
ftmsObj <- do.call(f, c(list(ftmsObj = ftmsObj), calc_args[[as.character(calc_fns[i])]]))
}
else ftmsObj <- f(ftmsObj)
}

return(ftmsObj)
Expand Down
62 changes: 32 additions & 30 deletions R/transformation_counts.R
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
#' @author Lisa Bramer
#' @export

transformation_counts <- function(ftmsObj, transformDF, transformDigits = 4, transMass_cname, transID_cname, transOther_cname = NULL){
transformation_counts <- function(ftmsObj, transformDF, transformDigits = 4, transMass_cname, transID_cname, transOther_cname = NULL, parallel = TRUE){

# check that ftmsObj is of the correct class #
if(!inherits(ftmsObj, "peakData") & !inherits(ftmsObj, "compoundData")) stop("ftmsObj must be an object of class 'peakData' or 'compoundData'")
Expand All @@ -40,9 +40,7 @@ transformation_counts <- function(ftmsObj, transformDF, transformDigits = 4, tra

# if the data is not p/a, then convert to p/a #
if(getDataScale(ftmsObj) != "pres"){
temp = edata_transform(ftmsObj, "pres")
}else{
temp = ftmsObj
ftmsObj = edata_transform(ftmsObj, "pres")
}

# pull edata and mass cnames #
Expand All @@ -51,9 +49,9 @@ transformation_counts <- function(ftmsObj, transformDF, transformDigits = 4, tra

# if mass information is in edata then we can just use edata #
if(edata_id == mass_id){
data = temp$e_data
data = ftmsObj$e_data
}else{ # otherwise we need to merge edata and emeta #
data = merge(temp$e_meta[,c(edata_id, mass_id)], temp$e_data, by = edata_id)[,-(edata_id)]
data = merge(ftmsObj$e_meta[,c(edata_id, mass_id)], ftmsObj$e_data, by = edata_id)[,-(edata_id)]
}

# set a local dopar argument #
Expand All @@ -63,46 +61,50 @@ transformation_counts <- function(ftmsObj, transformDF, transformDigits = 4, tra
transformDF[,transMass_cname] = round(transformDF[,transMass_cname], transformDigits)

# setup parallelization variables #
num_cores = parallel::detectCores()

cl = parallel::makeCluster(num_cores - 1)
doParallel::registerDoParallel(cl)

# helper function that counts occurences #
f6 = function(x) {
data.table::data.table(x)[, .N, keyby = x]
if(parallel){
num_cores = parallel::detectCores()
cl = parallel::makeCluster(num_cores - 1)
doParallel::registerDoParallel(cl)
on.exit(parallel::stopCluster(cl))
}

else foreach::registerDoSEQ()

# produce vector of which columns are not mass_id #
col_ids = which(names(data) != mass_id)

# do distance calculations in parallel
mass_dists = foreach::foreach(i = col_ids, .packages = c("data.table")) %dopar% {
temp = data[which(data[,i] == 1), mass_id]
temp_dists = round(c(dist(temp)),transformDigits)
temp_dists
}

# if there are no extra columns in transformDF #
# performed outside parallel block due to issues with data.table
if(is.null(transOther_cname)){
# produce counts of transformations for each sample #
mass_diffs = foreach::foreach(i = col_ids, .packages = c("data.table")) %dopar% {
temp = data[which(data[,i] == 1), mass_id]
temp_dists = round(c(dist(temp)),transformDigits)
dists_counts = f6(temp_dists)
trans_counts = merge(x = data.table::data.table(transformDF), y = dists_counts, by.x = transMass_cname, by.y = "x", all.x = T, all.y = F)
mass_diffs <- lapply(mass_dists, function(dist){
dists_counts = data.table::data.table(dist)[, .N, keyby = dist]
trans_counts = merge(x = data.table::data.table(transformDF), y = dists_counts, by.x = transMass_cname, by.y = "dist", all.x = T, all.y = F)
trans_counts[,(transID_cname):=NULL]
}
trans_counts
})

comp_res = data.frame(mass_diffs[[1]][,transMass_cname, with = F], transformDF[,transID_cname],do.call(cbind, lapply(mass_diffs, function(x) x[,!(transMass_cname), with = F])))
names(comp_res) = c(transMass_cname, transID_cname, names(data)[-1])
}else{
mass_diffs = foreach::foreach(i = col_ids, .packages = c("data.table")) %dopar% {
temp = data[which(data[,i] == 1), mass_id]
temp_dists = round(c(dist(temp)),transformDigits)
dists_counts = f6(temp_dists)
trans_counts = merge(x = data.table::data.table(transformDF[,c(transMass_cname, transID_cname)]), y = dists_counts, by.x = transMass_cname, by.y = "x", all.x = T, all.y = F)
# same as previous block but accounting for extra columns
mass_diffs <- lapply(mass_dists, function(dist){
dists_counts = data.table::data.table(dist)[, .N, keyby = dist]
trans_counts = merge(x = data.table::data.table(transformDF[,c(transMass_cname, transID_cname)]), y = dists_counts, by.x = transMass_cname, by.y = "dist", all.x = T, all.y = F)
trans_counts[,(transID_cname):=NULL]
}
trans_counts
})

comp_res = data.frame(mass_diffs[[1]][,transMass_cname, with = F], transformDF[,c(transID_cname, transOther_cname)], do.call(cbind, lapply(mass_diffs, function(x) x[,!(transMass_cname), with = F])))
names(comp_res) = c(transMass_cname, transID_cname, transOther_cname, names(data)[-1])
}

# stop the created cluster #
parallel::stopCluster(cl)

comp_res[is.na(comp_res)] = 0

return(comp_res)
Expand Down
10 changes: 8 additions & 2 deletions man/calc_dbe.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

13 changes: 12 additions & 1 deletion man/calc_kendrick.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading