diff --git a/DESCRIPTION b/DESCRIPTION index 1dcfe6a..0339da2 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: metabolyseR Title: Methods for Pre-Treatment, Data Mining and Correlation Analyses of Metabolomics Data -Version: 0.15.3 +Version: 0.15.4 Authors@R: person("Jasen", "Finch", email = "jsf9@aber.ac.uk", role = c("aut", "cre")) Description: A tool kit for pre-treatment, modelling, feature selection and correlation analyses of metabolomics data. URL: https://jasenfinch.github.io/metabolyseR/ diff --git a/NAMESPACE b/NAMESPACE index 9f1a7e3..dec6511 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -95,6 +95,7 @@ export(transformLevel) export(transformLn) export(transformLog10) export(transformPareto) +export(transformPercent) export(transformRange) export(transformSQRT) export(transformTICnorm) @@ -159,7 +160,6 @@ importFrom(furrr,future_map_dfr) importFrom(future,plan) importFrom(ggdendro,dendro_data) importFrom(ggplot2,aes) -importFrom(ggplot2,aes_string) importFrom(ggplot2,coord_fixed) importFrom(ggplot2,element_blank) importFrom(ggplot2,element_line) @@ -186,6 +186,7 @@ importFrom(ggplot2,scale_fill_gradient) importFrom(ggplot2,scale_fill_gradient2) importFrom(ggplot2,scale_fill_manual) importFrom(ggplot2,scale_shape_manual) +importFrom(ggplot2,scale_x_discrete) importFrom(ggplot2,scale_x_reverse) importFrom(ggplot2,scale_y_continuous) importFrom(ggplot2,scale_y_discrete) @@ -222,6 +223,7 @@ importFrom(purrr,map_chr) importFrom(purrr,map_dbl) importFrom(purrr,map_depth) importFrom(purrr,map_df) +importFrom(purrr,map_dfc) importFrom(purrr,map_dfr) importFrom(purrr,map_lgl) importFrom(purrr,walk) diff --git a/NEWS.md b/NEWS.md index 40b1446..569aa3a 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,15 @@ +# metabolyseR 0.15.4 + +* Fixed various tidyverse warnings. + +* Fixed an error when calculating the mds dimensions for multiple class comparisons with differing numbers of observations. + +* Added the [`transformPercent()`](https://jasenfinch.github.io/metabolyseR/reference/transform.html) method for the [`AnalysisData`](https://jasenfinch.github.io/metabolyseR/reference/AnalysisData-class.html) S4 class to scale as a percentage of feature maximum intensity. + +* Feature intensities are now displayed as relative percent intensities in heat maps generated by [`plotExplanatoryHeatmap()`](https://jasenfinch.github.io/metabolyseR/reference/plotExplanatoryHeatmap.html). + +* Reduced the gap between the dendrogram and heat map in outputs of [`plotExplanatoryHeatmap()`](https://jasenfinch.github.io/metabolyseR/reference/plotExplanatoryHeatmap.html). + # metabolyseR 0.15.3 * Fixed the margin value displayed in plots from [`plotSupervisedRF()`](https://jasenfinch.github.io/metabolyseR/reference/plotSupervisedRF.html) diff --git a/R/mds.R b/R/mds.R index 30c2044..7a35b4d 100644 --- a/R/mds.R +++ b/R/mds.R @@ -42,7 +42,9 @@ setMethod('mds',signature = 'RandomForest', mds_dimensions <- dissimilarities %>% group_by_at(group_vars) %>% group_map(~ .x %>% - select_if(is.numeric) %>% + select_if( + ~is.numeric(.x) & !any(is.na(.x)) + ) %>% cmdscale(k = dimensions) %>% set_colnames(str_c('dimension ', seq_len(dimensions))) %>% diff --git a/R/modelling-plots.R b/R/modelling-plots.R index 69ee153..f741b52 100644 --- a/R/modelling-plots.R +++ b/R/modelling-plots.R @@ -457,7 +457,7 @@ setMethod('plotMDS', mds_dimensions <- mds_dimensions %>% bind_cols(x %>% sinfo() %>% - select(label)) + select(all_of(label))) } } diff --git a/R/occupancy.R b/R/occupancy.R index 329fe4d..f91d889 100644 --- a/R/occupancy.R +++ b/R/occupancy.R @@ -164,7 +164,10 @@ setMethod('occupancy',signature = 'AnalysisData', full_join(clsSize %>% select(Class,Frequency) %>% rename(!!cls := Class) %>% - mutate(dummy = 1),by = 'dummy') %>% + mutate(dummy = 1), + by = 'dummy', + relationship = "many-to-many" + ) %>% select(!!cls,Feature,N,`Class total` = Frequency,Occupancy) %>% drop_na(Feature) occ <- occ %>% diff --git a/R/plotExplanatoryHeatmap.R b/R/plotExplanatoryHeatmap.R index 26104fc..ed4335a 100644 --- a/R/plotExplanatoryHeatmap.R +++ b/R/plotExplanatoryHeatmap.R @@ -1,4 +1,4 @@ -#' @importFrom ggplot2 ggplot aes_string theme scale_y_discrete +#' @importFrom ggplot2 ggplot theme scale_y_discrete scale_x_discrete #' @importFrom ggplot2 geom_segment scale_x_reverse scale_y_continuous unit heatmapClasses <- function(pl, @@ -31,30 +31,26 @@ heatmapClasses <- function(pl, d <- x %>% keepClasses(cls = pred, classes = classes) %>% - keepFeatures(features = feat) - - d <- d %>% - sinfo() %>% - select(all_of(pred)) %>% - bind_cols(d %>% - dat()) %>% - gather('Feature','Intensity',-1) %>% - group_by_at(c(pred,'Feature')) %>% - summarise(Intensity = mean(Intensity), - .groups = 'drop') - - sums <- d %>% - group_by(Feature) %>% - summarise(Total = max(Intensity),.groups = 'drop') - - d <- d %>% - left_join(sums,by = c('Feature')) %>% - mutate(`Relative Intensity` = Intensity / Total) + keepFeatures(features = feat) %>% + aggregateMean(cls = pred) %>% + transformPercent() %>% + { + d <- . + dat(d) %>% + bind_cols( + sinfo(d) %>% + select(all_of(pred)) + ) + } %>% + gather( + Feature, + `Percent Intensity`, + -all_of(pred) + ) suppressWarnings({ dend <- d %>% - select(-Intensity,-Total) %>% - spread(1,`Relative Intensity`) %>% + spread(all_of(pred),`Percent Intensity`) %>% data.frame(check.names = FALSE) %>% set_rownames(.$Feature) %>% select(-Feature) %>% @@ -73,15 +69,17 @@ heatmapClasses <- function(pl, high <- "#F21A00" plo <- d %>% - ggplot(aes_string(x = pred, - y = 'Feature', - fill = '`Relative Intensity`')) + + ggplot( + aes(x = .data[[pred]], + y = Feature, + fill = `Percent Intensity`)) + geom_tile(colour = 'black') + - scale_fill_gradient(low = low, high = high,limits=c(0,1)) + + scale_fill_gradient(low = low, high = high,limits=c(0,100)) + scale_y_discrete(expand = c(0,0),position = 'right') + + scale_x_discrete(expand = c(0,0)) + theme_minimal(base_size = 8) + labs(title = title, - fill = 'Relative\nIntensity') + fill = 'Percent\nIntensity') if (isTRUE(featureNames)) { plo <- plo + theme(plot.title = element_text(face = 'bold', @@ -112,7 +110,7 @@ heatmapClasses <- function(pl, geom_segment( data = dend$segments, aes(x = y, y = x, xend = yend, yend = xend)) + - scale_x_reverse() + + scale_x_reverse(expand = c(0,0)) + scale_y_continuous(breaks = seq_along(dend$labels$label), labels = dend$labels$label,position = 'right', expand = c(offset,offset)) + @@ -190,7 +188,11 @@ heatmapRegression <- function(pl, high <- "#F21A00" plo <- d %>% - ggplot(aes_string(x = 'Response',y = 'Feature',fill = 'r')) + + ggplot( + aes( + x = Response, + y = Feature, + fill = r)) + geom_tile(colour = 'black') + scale_fill_gradient2(low = low, mid = mid,high = high,limits=c(-1,1)) + scale_y_discrete(expand = c(0,0),position = 'right') + diff --git a/R/plotFeature.R b/R/plotFeature.R index 04df2bb..ee9df52 100644 --- a/R/plotFeature.R +++ b/R/plotFeature.R @@ -58,7 +58,7 @@ setMethod('plotFeature', i <- sinfo(analysis) i <- i %>% - select(!!cls,Label = label) + select(!!cls,Label = !!label) if (!is.null(label)) { d <- d %>% diff --git a/R/plotOccupancy.R b/R/plotOccupancy.R index 174d5e4..974449c 100644 --- a/R/plotOccupancy.R +++ b/R/plotOccupancy.R @@ -32,9 +32,10 @@ setMethod('plotOccupancy',signature = 'AnalysisData', occ <- occupancy(x,cls = cls) d <- ggplot(occ, - aes_string(x = 'Occupancy', - group = cls, - colour = cls)) + + aes( + x = Occupancy, + group = .data[[cls]], + colour = .data[[cls]])) + geom_density() + theme_bw() + labs(title = 'Density distrubution', @@ -52,11 +53,12 @@ setMethod('plotOccupancy',signature = 'AnalysisData', group_by_at(c(cls,'Occupancy')) %>% summarise(sum = n()) %>% mutate(cs = cumsum(sum)) - + csDist <- ggplot(cs, - aes_string(x = 'Occupancy', - y = 'cs', - colour = cls)) + + aes( + x = Occupancy, + y = cs, + colour = .data[[cls]])) + geom_line() + theme_bw() + labs(title = 'Cumulative distribution', diff --git a/R/plotRSD.R b/R/plotRSD.R index bf176c2..096e5e1 100644 --- a/R/plotRSD.R +++ b/R/plotRSD.R @@ -38,7 +38,12 @@ setMethod('plotRSD',signature = 'AnalysisData', x <- rsd(analysis,cls = cls) - d <- ggplot(x,aes_string(x = 'RSD',colour = cls,group = cls)) + + d <- ggplot( + x, + aes( + x = RSD, + colour = .data[[cls]], + group = .data[[cls]])) + geom_density() + theme_bw() + labs(title = 'Density distrubution', @@ -58,7 +63,12 @@ setMethod('plotRSD',signature = 'AnalysisData', summarise(sum = n()) %>% mutate(cs = cumsum(sum)) - csDist <- ggplot(cs,aes_string(x = 'RSD',y = 'cs',colour = cls)) + + csDist <- ggplot( + cs, + aes( + x = RSD, + y = cs, + colour = .data[[cls]])) + geom_line() + theme_bw() + labs(title = 'Cumulative distribution', diff --git a/R/randomForest-classification.R b/R/randomForest-classification.R index 9d168db..c2f529e 100644 --- a/R/randomForest-classification.R +++ b/R/randomForest-classification.R @@ -28,7 +28,7 @@ classificationMetrics <- function(model){ } roc <- predictions %>% - roc_auc(obs,estimate) + roc_auc(obs,all_of(estimate)) bind_rows( acc_kap, diff --git a/R/transform.R b/R/transform.R index 527f17e..f9cf42f 100644 --- a/R/transform.R +++ b/R/transform.R @@ -19,6 +19,7 @@ #' * `transformLn`: Natural logarithmic transformation. #' * `transformLog10`: Logarithmic transformation. #' * `transformPareto`: Pareto scaling. +#' * `transformPercent`: Scale as a percentage of the feature maximum intensity. #' * `transformRange`: Range scaling. Also known as min-max scaling. #' * `transformSQRT`: Square root transformation. #' * `transformTICnorm`: Total ion count normalisation. @@ -72,6 +73,11 @@ #' d %>% #' transformPareto() %>% #' plotLDA(cls = 'day') +#' +#' ## Percentage scaling +#' d %>% +#' transformPercent() %>% +#' plotLDA(cls = 'day') #' #' ## Range scaling #' d %>% @@ -204,6 +210,22 @@ setMethod('transformPareto',signature = 'AnalysisData', #' @rdname transform #' @export +setGeneric("transformPercent", function(d) + standardGeneric("transformPercent")) + +#' @rdname transform +#' @importFrom purrr map_dfc +setMethod('transformPercent',signature = 'AnalysisData', + function(d){ + dat(d) <- dat(d) %>% + map_dfc(~.x / max(.x) * 100) + + return(d) + }) + +#' @rdname transform +#' @export + setGeneric("transformRange", function(d) standardGeneric("transformRange")) diff --git a/R/univariate.R b/R/univariate.R index 0f1da28..763abdc 100644 --- a/R/univariate.R +++ b/R/univariate.R @@ -86,14 +86,14 @@ setMethod('anova',signature = 'AnalysisData', pc <- str_split(.x,'~')[[1]] pad <- removeClasses(x,pred,classes = sinfo(x) %>% - select(pred) %>% + select(all_of(pred)) %>% unlist() %>% unique() %>% .[!(. %in% pc)]) response <- pad %>% sinfo() %>% - select(pred) %>% + select(all_of(pred)) %>% unlist() %>% factor() @@ -197,14 +197,14 @@ setMethod('ttest',signature = 'AnalysisData', pc <- str_split(.x,'~')[[1]] pad <- removeClasses(x,pred,classes = sinfo(x) %>% - select(pred) %>% + select(all_of(pred)) %>% unlist() %>% unique() %>% .[!(. %in% pc)]) response <- pad %>% sinfo() %>% - select(pred) %>% + select(all_of(pred)) %>% unlist() %>% factor() @@ -282,7 +282,7 @@ setMethod('linearRegression',signature = 'AnalysisData', returnModels = FALSE){ indep <- x %>% sinfo() %>% - select(cls) + select(all_of(cls)) if (FALSE %in% (map_chr(indep,class) %in% c('integer','numeric'))) { @@ -299,7 +299,7 @@ setMethod('linearRegression',signature = 'AnalysisData', i <- . pred <- indep %>% - select(i) %>% + select(all_of(i)) %>% unlist() d %>% diff --git a/README.md b/README.md index 60582d1..a26868a 100644 --- a/README.md +++ b/README.md @@ -187,20 +187,20 @@ explan_feat <- explanatoryFeatures(anova_results,threshold = 0.05) ``` r explan_feat #> # A tibble: 379 × 10 -#> respo…¹ compa…² feature term df sumsq meansq stati…³ p.value adjust…⁴ -#> -#> 1 day 1~2~3~… N341 resp… 5 3.88e-4 7.76e-5 137. 1.55e-46 2.73e-43 -#> 2 day 1~2~3~… N133 resp… 5 7.00e-5 1.40e-5 126. 8.63e-45 1.52e-41 -#> 3 day 1~2~3~… N163 resp… 5 6.01e-5 1.20e-5 117. 2.95e-43 5.19e-40 -#> 4 day 1~2~3~… N1087 resp… 5 2.42e-6 4.84e-7 99.8 5.61e-40 9.88e-37 -#> 5 day 1~2~3~… N171 resp… 5 2.25e-7 4.50e-8 95.7 3.84e-39 6.75e-36 -#> 6 day 1~2~3~… N513 resp… 5 3.38e-6 6.76e-7 95.3 4.78e-39 8.41e-36 -#> 7 day 1~2~3~… N1025 resp… 5 2.78e-6 5.56e-7 91.0 3.91e-38 6.87e-35 -#> 8 day 1~2~3~… N342 resp… 5 3.71e-6 7.41e-7 90.3 5.32e-38 9.36e-35 -#> 9 day 1~2~3~… N1083 resp… 5 5.11e-5 1.02e-5 89.0 1.06e-37 1.87e-34 -#> 10 day 1~2~3~… N1085 resp… 5 1.10e-5 2.19e-6 83.4 1.92e-36 3.37e-33 -#> # … with 369 more rows, and abbreviated variable names ¹​response, ²​comparison, -#> # ³​statistic, ⁴​adjusted.p.value +#> response comparison feature term df sumsq meansq statistic p.value +#> +#> 1 day 1~2~3~4~5~H N341 response 5 9.17e7 1.83e7 137. 1.55e-46 +#> 2 day 1~2~3~4~5~H N133 response 5 1.65e7 3.31e6 126. 8.63e-45 +#> 3 day 1~2~3~4~5~H N163 response 5 1.42e7 2.84e6 117. 2.95e-43 +#> 4 day 1~2~3~4~5~H N1087 response 5 5.72e5 1.14e5 99.8 5.61e-40 +#> 5 day 1~2~3~4~5~H N171 response 5 5.31e4 1.06e4 95.7 3.84e-39 +#> 6 day 1~2~3~4~5~H N513 response 5 7.99e5 1.60e5 95.3 4.78e-39 +#> 7 day 1~2~3~4~5~H N1025 response 5 6.57e5 1.31e5 91.0 3.91e-38 +#> 8 day 1~2~3~4~5~H N342 response 5 8.76e5 1.75e5 90.3 5.32e-38 +#> 9 day 1~2~3~4~5~H N1083 response 5 1.21e7 2.42e6 89.0 1.06e-37 +#> 10 day 1~2~3~4~5~H N1085 response 5 2.59e6 5.18e5 83.4 1.92e-36 +#> # ℹ 369 more rows +#> # ℹ 1 more variable: adjusted.p.value ``` The ANOVA has identified 379 features significantly explanatory over the diff --git a/man/figures/README-feature_plot-1.png b/man/figures/README-feature_plot-1.png index 3d94c65..9f345c9 100644 Binary files a/man/figures/README-feature_plot-1.png and b/man/figures/README-feature_plot-1.png differ diff --git a/man/figures/README-rf_heatmap-1.png b/man/figures/README-rf_heatmap-1.png index a004675..55e3915 100644 Binary files a/man/figures/README-rf_heatmap-1.png and b/man/figures/README-rf_heatmap-1.png differ diff --git a/man/figures/README-supervised_RF-1.png b/man/figures/README-supervised_RF-1.png index 2057dc2..1d1f7d2 100644 Binary files a/man/figures/README-supervised_RF-1.png and b/man/figures/README-supervised_RF-1.png differ diff --git a/man/transform.Rd b/man/transform.Rd index 9922414..b1756ba 100644 --- a/man/transform.Rd +++ b/man/transform.Rd @@ -15,6 +15,8 @@ \alias{transformLog10,AnalysisData-method} \alias{transformPareto} \alias{transformPareto,AnalysisData-method} +\alias{transformPercent} +\alias{transformPercent,AnalysisData-method} \alias{transformRange} \alias{transformRange,AnalysisData-method} \alias{transformSQRT} @@ -53,6 +55,10 @@ transformPareto(d) \S4method{transformPareto}{AnalysisData}(d) +transformPercent(d) + +\S4method{transformPercent}{AnalysisData}(d) + transformRange(d) \S4method{transformRange}{AnalysisData}(d) @@ -99,6 +105,7 @@ There are a wide range of transformation methods available that are commonly use \item \code{transformLn}: Natural logarithmic transformation. \item \code{transformLog10}: Logarithmic transformation. \item \code{transformPareto}: Pareto scaling. +\item \code{transformPercent}: Scale as a percentage of the feature maximum intensity. \item \code{transformRange}: Range scaling. Also known as min-max scaling. \item \code{transformSQRT}: Square root transformation. \item \code{transformTICnorm}: Total ion count normalisation. @@ -156,6 +163,11 @@ d \%>\% transformPareto() \%>\% plotLDA(cls = 'day') +## Percentage scaling +d \%>\% + transformPercent() \%>\% + plotLDA(cls = 'day') + ## Range scaling d \%>\% transformRange() \%>\% diff --git a/vignettes/pre_treatment.Rmd b/vignettes/pre_treatment.Rmd index d335300..56d8489 100644 --- a/vignettes/pre_treatment.Rmd +++ b/vignettes/pre_treatment.Rmd @@ -310,7 +310,7 @@ corrected_data <- d %>% The plot of the TICs below shows that the inter-class variability has been removed but the intra-class variability has been retained. -```{r corrected-TIC plot} +```{r corrected-TIC-plot} plotTIC(corrected_data, by = 'day', colour = 'day')