Merge pull request #95 from jasenfinch/devel

v0.15.4
jasenfinch · Sep 12, 2023 · 08bb28b · 08bb28b
2 parents 2f1312d + 95a4459
commit 08bb28b
Show file tree

Hide file tree

Showing 19 changed files with 133 additions and 66 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: metabolyseR
 Title: Methods for Pre-Treatment, Data Mining and Correlation Analyses of Metabolomics Data
-Version: 0.15.3
+Version: 0.15.4
 Authors@R: person("Jasen", "Finch", email = "jsf9@aber.ac.uk", role = c("aut", "cre"))
 Description: A tool kit for pre-treatment, modelling, feature selection and correlation analyses of metabolomics data.
 URL: https://jasenfinch.github.io/metabolyseR/

diff --git a/NAMESPACE b/NAMESPACE
@@ -95,6 +95,7 @@ export(transformLevel)
 export(transformLn)
 export(transformLog10)
 export(transformPareto)
+export(transformPercent)
 export(transformRange)
 export(transformSQRT)
 export(transformTICnorm)
@@ -159,7 +160,6 @@ importFrom(furrr,future_map_dfr)
 importFrom(future,plan)
 importFrom(ggdendro,dendro_data)
 importFrom(ggplot2,aes)
-importFrom(ggplot2,aes_string)
 importFrom(ggplot2,coord_fixed)
 importFrom(ggplot2,element_blank)
 importFrom(ggplot2,element_line)
@@ -186,6 +186,7 @@ importFrom(ggplot2,scale_fill_gradient)
 importFrom(ggplot2,scale_fill_gradient2)
 importFrom(ggplot2,scale_fill_manual)
 importFrom(ggplot2,scale_shape_manual)
+importFrom(ggplot2,scale_x_discrete)
 importFrom(ggplot2,scale_x_reverse)
 importFrom(ggplot2,scale_y_continuous)
 importFrom(ggplot2,scale_y_discrete)
@@ -222,6 +223,7 @@ importFrom(purrr,map_chr)
 importFrom(purrr,map_dbl)
 importFrom(purrr,map_depth)
 importFrom(purrr,map_df)
+importFrom(purrr,map_dfc)
 importFrom(purrr,map_dfr)
 importFrom(purrr,map_lgl)
 importFrom(purrr,walk)

diff --git a/NEWS.md b/NEWS.md
@@ -1,3 +1,15 @@
+# metabolyseR 0.15.4
+
+* Fixed various tidyverse warnings.
+
+* Fixed an error when calculating the mds dimensions for multiple class comparisons with differing numbers of observations.
+
+* Added the [`transformPercent()`](https://jasenfinch.github.io/metabolyseR/reference/transform.html) method for the [`AnalysisData`](https://jasenfinch.github.io/metabolyseR/reference/AnalysisData-class.html) S4 class to scale as a percentage of feature maximum intensity.
+
+* Feature intensities are now displayed as relative percent intensities in heat maps generated by [`plotExplanatoryHeatmap()`](https://jasenfinch.github.io/metabolyseR/reference/plotExplanatoryHeatmap.html).
+
+* Reduced the gap between the dendrogram and heat map in outputs of [`plotExplanatoryHeatmap()`](https://jasenfinch.github.io/metabolyseR/reference/plotExplanatoryHeatmap.html).
+
 # metabolyseR 0.15.3
 
 * Fixed the margin value displayed in plots from [`plotSupervisedRF()`](https://jasenfinch.github.io/metabolyseR/reference/plotSupervisedRF.html)

diff --git a/R/mds.R b/R/mds.R
@@ -42,7 +42,9 @@ setMethod('mds',signature = 'RandomForest',
             mds_dimensions <- dissimilarities %>% 
               group_by_at(group_vars) %>% 
               group_map(~ .x %>% 
-                          select_if(is.numeric) %>% 
+                          select_if(
+                            ~is.numeric(.x) & !any(is.na(.x)) 
+                          ) %>% 
                           cmdscale(k = dimensions) %>% 
                           set_colnames(str_c('dimension ',
                                              seq_len(dimensions))) %>% 

diff --git a/R/modelling-plots.R b/R/modelling-plots.R
@@ -457,7 +457,7 @@ setMethod('plotMDS',
                 mds_dimensions <- mds_dimensions %>%
                   bind_cols(x %>%
                               sinfo() %>%
-                              select(label))
+                              select(all_of(label)))
               }
             }
 

diff --git a/R/occupancy.R b/R/occupancy.R
@@ -164,7 +164,10 @@ setMethod('occupancy',signature = 'AnalysisData',
               full_join(clsSize %>%
                           select(Class,Frequency) %>%
                           rename(!!cls := Class) %>%
-                          mutate(dummy = 1),by = 'dummy') %>%
+                          mutate(dummy = 1),
+                        by = 'dummy',
+                        relationship = "many-to-many"
+                        ) %>%
               select(!!cls,Feature,N,`Class total` = Frequency,Occupancy) %>% 
               drop_na(Feature)
             occ <- occ %>%

diff --git a/R/plotExplanatoryHeatmap.R b/R/plotExplanatoryHeatmap.R
@@ -1,4 +1,4 @@
-#' @importFrom ggplot2 ggplot aes_string theme scale_y_discrete 
+#' @importFrom ggplot2 ggplot theme scale_y_discrete scale_x_discrete
 #' @importFrom ggplot2 geom_segment scale_x_reverse scale_y_continuous unit
 
 heatmapClasses <- function(pl, 
@@ -31,30 +31,26 @@ heatmapClasses <- function(pl,
       d <- x %>%
         keepClasses(cls = pred,
                     classes = classes) %>%
-        keepFeatures(features = feat)
-
-      d <- d %>%
-        sinfo() %>%
-        select(all_of(pred)) %>%
-        bind_cols(d %>%
-                    dat()) %>%
-        gather('Feature','Intensity',-1) %>%
-        group_by_at(c(pred,'Feature')) %>%
-        summarise(Intensity = mean(Intensity),
-                  .groups = 'drop')
-
-      sums <- d %>%
-        group_by(Feature) %>%
-        summarise(Total = max(Intensity),.groups = 'drop')
-
-      d <- d %>%
-        left_join(sums,by = c('Feature')) %>%
-        mutate(`Relative Intensity` = Intensity / Total)
+        keepFeatures(features = feat) %>% 
+        aggregateMean(cls = pred) %>% 
+        transformPercent() %>% 
+        {
+          d <- .
+          dat(d) %>% 
+            bind_cols(
+              sinfo(d) %>% 
+                select(all_of(pred))
+            )
+        } %>% 
+        gather(
+          Feature,
+          `Percent Intensity`,
+          -all_of(pred)
+        )
 
       suppressWarnings({
         dend <- d %>%
-          select(-Intensity,-Total) %>%
-          spread(1,`Relative Intensity`) %>%
+          spread(all_of(pred),`Percent Intensity`) %>%
           data.frame(check.names = FALSE) %>%
           set_rownames(.$Feature) %>%
           select(-Feature) %>%
@@ -73,15 +69,17 @@ heatmapClasses <- function(pl,
       high <- "#F21A00"
 
       plo <- d %>%
-        ggplot(aes_string(x = pred,
-                          y = 'Feature',
-                          fill = '`Relative Intensity`')) +
+        ggplot(
+          aes(x = .data[[pred]],
+              y = Feature,
+              fill = `Percent Intensity`)) +
         geom_tile(colour = 'black') +
-        scale_fill_gradient(low = low, high = high,limits=c(0,1)) +
+        scale_fill_gradient(low = low, high = high,limits=c(0,100)) +
         scale_y_discrete(expand = c(0,0),position = 'right') +
+        scale_x_discrete(expand = c(0,0)) +
         theme_minimal(base_size = 8) +
         labs(title = title,
-             fill = 'Relative\nIntensity')
+             fill = 'Percent\nIntensity')
       if (isTRUE(featureNames)) {
         plo <- plo +
           theme(plot.title = element_text(face = 'bold',
@@ -112,7 +110,7 @@ heatmapClasses <- function(pl,
           geom_segment(
             data = dend$segments,
             aes(x = y, y = x, xend = yend, yend = xend)) +
-          scale_x_reverse() +
+          scale_x_reverse(expand = c(0,0)) +
           scale_y_continuous(breaks = seq_along(dend$labels$label), 
                              labels = dend$labels$label,position = 'right',
                              expand = c(offset,offset)) +
@@ -190,7 +188,11 @@ heatmapRegression <- function(pl,
       high <- "#F21A00"
 
       plo <- d %>%
-        ggplot(aes_string(x = 'Response',y = 'Feature',fill = 'r')) +
+        ggplot(
+          aes(
+            x = Response,
+            y = Feature,
+            fill = r)) +
         geom_tile(colour = 'black') +
         scale_fill_gradient2(low = low, mid = mid,high = high,limits=c(-1,1)) +
         scale_y_discrete(expand = c(0,0),position = 'right') +

diff --git a/R/plotFeature.R b/R/plotFeature.R
@@ -58,7 +58,7 @@ setMethod('plotFeature',
             i <- sinfo(analysis)
 
             i <- i %>%
-              select(!!cls,Label = label)
+              select(!!cls,Label = !!label)
 
             if (!is.null(label)) {
               d <- d %>%

diff --git a/R/plotOccupancy.R b/R/plotOccupancy.R
@@ -32,9 +32,10 @@ setMethod('plotOccupancy',signature = 'AnalysisData',
             occ <- occupancy(x,cls = cls)
 
             d <- ggplot(occ,
-                        aes_string(x = 'Occupancy',
-                                   group = cls,
-                                   colour = cls)) +
+                        aes(
+                          x = Occupancy,
+                          group = .data[[cls]],
+                          colour = .data[[cls]])) +
               geom_density() +
               theme_bw() +
               labs(title = 'Density distrubution',
@@ -52,11 +53,12 @@ setMethod('plotOccupancy',signature = 'AnalysisData',
               group_by_at(c(cls,'Occupancy')) %>%
               summarise(sum = n()) %>%
               mutate(cs = cumsum(sum))
-
+            
             csDist <- ggplot(cs,
-                             aes_string(x = 'Occupancy',
-                                        y = 'cs',
-                                        colour = cls)) + 
+                             aes(
+                               x = Occupancy,
+                               y = cs,
+                               colour = .data[[cls]])) + 
               geom_line() + 
               theme_bw() +
               labs(title = 'Cumulative distribution',

diff --git a/R/plotRSD.R b/R/plotRSD.R
@@ -38,7 +38,12 @@ setMethod('plotRSD',signature = 'AnalysisData',
 
             x <- rsd(analysis,cls = cls)
 
-            d <- ggplot(x,aes_string(x = 'RSD',colour = cls,group = cls)) +
+            d <- ggplot(
+              x,
+              aes(
+                x = RSD,
+                colour = .data[[cls]],
+                group = .data[[cls]])) +
               geom_density() +
               theme_bw() +
               labs(title = 'Density distrubution',
@@ -58,7 +63,12 @@ setMethod('plotRSD',signature = 'AnalysisData',
               summarise(sum = n()) %>%
               mutate(cs = cumsum(sum))
 
-            csDist <- ggplot(cs,aes_string(x = 'RSD',y = 'cs',colour = cls)) + 
+            csDist <- ggplot(
+              cs,
+              aes(
+                x = RSD,
+                y = cs,
+                colour = .data[[cls]])) + 
               geom_line() + 
               theme_bw() +
               labs(title = 'Cumulative distribution',

diff --git a/R/randomForest-classification.R b/R/randomForest-classification.R
@@ -28,7 +28,7 @@ classificationMetrics <- function(model){
   }
 
   roc <- predictions %>% 
-    roc_auc(obs,estimate)
+    roc_auc(obs,all_of(estimate))
 
   bind_rows(
     acc_kap,

diff --git a/R/transform.R b/R/transform.R
@@ -19,6 +19,7 @@
 #' * `transformLn`: Natural logarithmic transformation.
 #' * `transformLog10`: Logarithmic transformation.
 #' * `transformPareto`: Pareto scaling.
+#' * `transformPercent`: Scale as a percentage of the feature maximum intensity.
 #' * `transformRange`: Range scaling. Also known as min-max scaling.
 #' * `transformSQRT`: Square root transformation.
 #' * `transformTICnorm`: Total ion count normalisation.
@@ -72,6 +73,11 @@
 #' d %>% 
 #'  transformPareto() %>% 
 #'  plotLDA(cls = 'day')
+#'
+#' ## Percentage scaling
+#' d %>% 
+#'  transformPercent() %>% 
+#'  plotLDA(cls = 'day')
 #' 
 #' ## Range scaling
 #' d %>% 
@@ -204,6 +210,22 @@ setMethod('transformPareto',signature = 'AnalysisData',
 #' @rdname transform
 #' @export
 
+setGeneric("transformPercent", function(d)
+  standardGeneric("transformPercent"))
+
+#' @rdname transform
+#' @importFrom purrr map_dfc
+setMethod('transformPercent',signature = 'AnalysisData',
+          function(d){
+            dat(d) <- dat(d) %>% 
+              map_dfc(~.x / max(.x) * 100)
+
+            return(d)
+          })
+
+#' @rdname transform
+#' @export
+
 setGeneric("transformRange", function(d)
   standardGeneric("transformRange"))
 

diff --git a/R/univariate.R b/R/univariate.R
@@ -86,14 +86,14 @@ setMethod('anova',signature = 'AnalysisData',
                     pc <- str_split(.x,'~')[[1]]
 
                     pad <- removeClasses(x,pred,classes = sinfo(x) %>%
-                                           select(pred) %>%
+                                           select(all_of(pred)) %>%
                                            unlist() %>%
                                            unique() %>%
                                            .[!(. %in% pc)])
 
                     response <- pad %>%
                       sinfo() %>%
-                      select(pred) %>%
+                      select(all_of(pred)) %>%
                       unlist() %>%
                       factor()
 
@@ -197,14 +197,14 @@ setMethod('ttest',signature = 'AnalysisData',
                     pc <- str_split(.x,'~')[[1]]
 
                     pad <- removeClasses(x,pred,classes = sinfo(x) %>%
-                                           select(pred) %>%
+                                           select(all_of(pred)) %>%
                                            unlist() %>%
                                            unique() %>%
                                            .[!(. %in% pc)])
 
                     response <- pad %>%
                       sinfo() %>%
-                      select(pred) %>%
+                      select(all_of(pred)) %>%
                       unlist() %>%
                       factor()
 
@@ -282,7 +282,7 @@ setMethod('linearRegression',signature = 'AnalysisData',
                    returnModels = FALSE){
             indep <- x %>%
               sinfo() %>%
-              select(cls)
+              select(all_of(cls))
 
             if (FALSE %in% 
                 (map_chr(indep,class) %in% c('integer','numeric'))) {
@@ -299,7 +299,7 @@ setMethod('linearRegression',signature = 'AnalysisData',
                 i <- .
 
                 pred <- indep %>%
-                  select(i) %>%
+                  select(all_of(i)) %>%
                   unlist()
 
                 d %>%

diff --git a/README.md b/README.md
@@ -187,20 +187,20 @@ explan_feat <- explanatoryFeatures(anova_results,threshold = 0.05)
 ``` r
 explan_feat
 #> # A tibble: 379 × 10
-#>    respo…¹ compa…² feature term     df   sumsq  meansq stati…³  p.value adjust…⁴
-#>    <chr>   <chr>   <chr>   <chr> <dbl>   <dbl>   <dbl>   <dbl>    <dbl>    <dbl>
-#>  1 day     1~2~3~… N341    resp…     5 3.88e-4 7.76e-5   137.  1.55e-46 2.73e-43
-#>  2 day     1~2~3~… N133    resp…     5 7.00e-5 1.40e-5   126.  8.63e-45 1.52e-41
-#>  3 day     1~2~3~… N163    resp…     5 6.01e-5 1.20e-5   117.  2.95e-43 5.19e-40
-#>  4 day     1~2~3~… N1087   resp…     5 2.42e-6 4.84e-7    99.8 5.61e-40 9.88e-37
-#>  5 day     1~2~3~… N171    resp…     5 2.25e-7 4.50e-8    95.7 3.84e-39 6.75e-36
-#>  6 day     1~2~3~… N513    resp…     5 3.38e-6 6.76e-7    95.3 4.78e-39 8.41e-36
-#>  7 day     1~2~3~… N1025   resp…     5 2.78e-6 5.56e-7    91.0 3.91e-38 6.87e-35
-#>  8 day     1~2~3~… N342    resp…     5 3.71e-6 7.41e-7    90.3 5.32e-38 9.36e-35
-#>  9 day     1~2~3~… N1083   resp…     5 5.11e-5 1.02e-5    89.0 1.06e-37 1.87e-34
-#> 10 day     1~2~3~… N1085   resp…     5 1.10e-5 2.19e-6    83.4 1.92e-36 3.37e-33
-#> # … with 369 more rows, and abbreviated variable names ¹response, ²comparison,
-#> #   ³statistic, ⁴adjusted.p.value
+#>    response comparison  feature term        df   sumsq meansq statistic  p.value
+#>    <chr>    <chr>       <chr>   <chr>    <dbl>   <dbl>  <dbl>     <dbl>    <dbl>
+#>  1 day      1~2~3~4~5~H N341    response     5  9.17e7 1.83e7     137.  1.55e-46
+#>  2 day      1~2~3~4~5~H N133    response     5  1.65e7 3.31e6     126.  8.63e-45
+#>  3 day      1~2~3~4~5~H N163    response     5  1.42e7 2.84e6     117.  2.95e-43
+#>  4 day      1~2~3~4~5~H N1087   response     5  5.72e5 1.14e5      99.8 5.61e-40
+#>  5 day      1~2~3~4~5~H N171    response     5  5.31e4 1.06e4      95.7 3.84e-39
+#>  6 day      1~2~3~4~5~H N513    response     5  7.99e5 1.60e5      95.3 4.78e-39
+#>  7 day      1~2~3~4~5~H N1025   response     5  6.57e5 1.31e5      91.0 3.91e-38
+#>  8 day      1~2~3~4~5~H N342    response     5  8.76e5 1.75e5      90.3 5.32e-38
+#>  9 day      1~2~3~4~5~H N1083   response     5  1.21e7 2.42e6      89.0 1.06e-37
+#> 10 day      1~2~3~4~5~H N1085   response     5  2.59e6 5.18e5      83.4 1.92e-36
+#> # ℹ 369 more rows
+#> # ℹ 1 more variable: adjusted.p.value <dbl>
 ```
 
 The ANOVA has identified 379 features significantly explanatory over the

diff --git a/man/figures/README-feature_plot-1.png b/man/figures/README-feature_plot-1.png
diff --git a/man/figures/README-rf_heatmap-1.png b/man/figures/README-rf_heatmap-1.png
diff --git a/man/figures/README-supervised_RF-1.png b/man/figures/README-supervised_RF-1.png