Skip to content

Commit

Permalink
Merge pull request #95 from jasenfinch/devel
Browse files Browse the repository at this point in the history
v0.15.4
  • Loading branch information
jasenfinch committed Sep 12, 2023
2 parents 2f1312d + 95a4459 commit 08bb28b
Show file tree
Hide file tree
Showing 19 changed files with 133 additions and 66 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
@@ -1,6 +1,6 @@
Package: metabolyseR
Title: Methods for Pre-Treatment, Data Mining and Correlation Analyses of Metabolomics Data
Version: 0.15.3
Version: 0.15.4
Authors@R: person("Jasen", "Finch", email = "jsf9@aber.ac.uk", role = c("aut", "cre"))
Description: A tool kit for pre-treatment, modelling, feature selection and correlation analyses of metabolomics data.
URL: https://jasenfinch.github.io/metabolyseR/
Expand Down
4 changes: 3 additions & 1 deletion NAMESPACE
Expand Up @@ -95,6 +95,7 @@ export(transformLevel)
export(transformLn)
export(transformLog10)
export(transformPareto)
export(transformPercent)
export(transformRange)
export(transformSQRT)
export(transformTICnorm)
Expand Down Expand Up @@ -159,7 +160,6 @@ importFrom(furrr,future_map_dfr)
importFrom(future,plan)
importFrom(ggdendro,dendro_data)
importFrom(ggplot2,aes)
importFrom(ggplot2,aes_string)
importFrom(ggplot2,coord_fixed)
importFrom(ggplot2,element_blank)
importFrom(ggplot2,element_line)
Expand All @@ -186,6 +186,7 @@ importFrom(ggplot2,scale_fill_gradient)
importFrom(ggplot2,scale_fill_gradient2)
importFrom(ggplot2,scale_fill_manual)
importFrom(ggplot2,scale_shape_manual)
importFrom(ggplot2,scale_x_discrete)
importFrom(ggplot2,scale_x_reverse)
importFrom(ggplot2,scale_y_continuous)
importFrom(ggplot2,scale_y_discrete)
Expand Down Expand Up @@ -222,6 +223,7 @@ importFrom(purrr,map_chr)
importFrom(purrr,map_dbl)
importFrom(purrr,map_depth)
importFrom(purrr,map_df)
importFrom(purrr,map_dfc)
importFrom(purrr,map_dfr)
importFrom(purrr,map_lgl)
importFrom(purrr,walk)
Expand Down
12 changes: 12 additions & 0 deletions NEWS.md
@@ -1,3 +1,15 @@
# metabolyseR 0.15.4

* Fixed various tidyverse warnings.

* Fixed an error when calculating the mds dimensions for multiple class comparisons with differing numbers of observations.

* Added the [`transformPercent()`](https://jasenfinch.github.io/metabolyseR/reference/transform.html) method for the [`AnalysisData`](https://jasenfinch.github.io/metabolyseR/reference/AnalysisData-class.html) S4 class to scale as a percentage of feature maximum intensity.

* Feature intensities are now displayed as relative percent intensities in heat maps generated by [`plotExplanatoryHeatmap()`](https://jasenfinch.github.io/metabolyseR/reference/plotExplanatoryHeatmap.html).

* Reduced the gap between the dendrogram and heat map in outputs of [`plotExplanatoryHeatmap()`](https://jasenfinch.github.io/metabolyseR/reference/plotExplanatoryHeatmap.html).

# metabolyseR 0.15.3

* Fixed the margin value displayed in plots from [`plotSupervisedRF()`](https://jasenfinch.github.io/metabolyseR/reference/plotSupervisedRF.html)
Expand Down
4 changes: 3 additions & 1 deletion R/mds.R
Expand Up @@ -42,7 +42,9 @@ setMethod('mds',signature = 'RandomForest',
mds_dimensions <- dissimilarities %>%
group_by_at(group_vars) %>%
group_map(~ .x %>%
select_if(is.numeric) %>%
select_if(
~is.numeric(.x) & !any(is.na(.x))
) %>%
cmdscale(k = dimensions) %>%
set_colnames(str_c('dimension ',
seq_len(dimensions))) %>%
Expand Down
2 changes: 1 addition & 1 deletion R/modelling-plots.R
Expand Up @@ -457,7 +457,7 @@ setMethod('plotMDS',
mds_dimensions <- mds_dimensions %>%
bind_cols(x %>%
sinfo() %>%
select(label))
select(all_of(label)))
}
}

Expand Down
5 changes: 4 additions & 1 deletion R/occupancy.R
Expand Up @@ -164,7 +164,10 @@ setMethod('occupancy',signature = 'AnalysisData',
full_join(clsSize %>%
select(Class,Frequency) %>%
rename(!!cls := Class) %>%
mutate(dummy = 1),by = 'dummy') %>%
mutate(dummy = 1),
by = 'dummy',
relationship = "many-to-many"
) %>%
select(!!cls,Feature,N,`Class total` = Frequency,Occupancy) %>%
drop_na(Feature)
occ <- occ %>%
Expand Down
60 changes: 31 additions & 29 deletions R/plotExplanatoryHeatmap.R
@@ -1,4 +1,4 @@
#' @importFrom ggplot2 ggplot aes_string theme scale_y_discrete
#' @importFrom ggplot2 ggplot theme scale_y_discrete scale_x_discrete
#' @importFrom ggplot2 geom_segment scale_x_reverse scale_y_continuous unit

heatmapClasses <- function(pl,
Expand Down Expand Up @@ -31,30 +31,26 @@ heatmapClasses <- function(pl,
d <- x %>%
keepClasses(cls = pred,
classes = classes) %>%
keepFeatures(features = feat)

d <- d %>%
sinfo() %>%
select(all_of(pred)) %>%
bind_cols(d %>%
dat()) %>%
gather('Feature','Intensity',-1) %>%
group_by_at(c(pred,'Feature')) %>%
summarise(Intensity = mean(Intensity),
.groups = 'drop')

sums <- d %>%
group_by(Feature) %>%
summarise(Total = max(Intensity),.groups = 'drop')

d <- d %>%
left_join(sums,by = c('Feature')) %>%
mutate(`Relative Intensity` = Intensity / Total)
keepFeatures(features = feat) %>%
aggregateMean(cls = pred) %>%
transformPercent() %>%
{
d <- .
dat(d) %>%
bind_cols(
sinfo(d) %>%
select(all_of(pred))
)
} %>%
gather(
Feature,
`Percent Intensity`,
-all_of(pred)
)

suppressWarnings({
dend <- d %>%
select(-Intensity,-Total) %>%
spread(1,`Relative Intensity`) %>%
spread(all_of(pred),`Percent Intensity`) %>%
data.frame(check.names = FALSE) %>%
set_rownames(.$Feature) %>%
select(-Feature) %>%
Expand All @@ -73,15 +69,17 @@ heatmapClasses <- function(pl,
high <- "#F21A00"

plo <- d %>%
ggplot(aes_string(x = pred,
y = 'Feature',
fill = '`Relative Intensity`')) +
ggplot(
aes(x = .data[[pred]],
y = Feature,
fill = `Percent Intensity`)) +
geom_tile(colour = 'black') +
scale_fill_gradient(low = low, high = high,limits=c(0,1)) +
scale_fill_gradient(low = low, high = high,limits=c(0,100)) +
scale_y_discrete(expand = c(0,0),position = 'right') +
scale_x_discrete(expand = c(0,0)) +
theme_minimal(base_size = 8) +
labs(title = title,
fill = 'Relative\nIntensity')
fill = 'Percent\nIntensity')
if (isTRUE(featureNames)) {
plo <- plo +
theme(plot.title = element_text(face = 'bold',
Expand Down Expand Up @@ -112,7 +110,7 @@ heatmapClasses <- function(pl,
geom_segment(
data = dend$segments,
aes(x = y, y = x, xend = yend, yend = xend)) +
scale_x_reverse() +
scale_x_reverse(expand = c(0,0)) +
scale_y_continuous(breaks = seq_along(dend$labels$label),
labels = dend$labels$label,position = 'right',
expand = c(offset,offset)) +
Expand Down Expand Up @@ -190,7 +188,11 @@ heatmapRegression <- function(pl,
high <- "#F21A00"

plo <- d %>%
ggplot(aes_string(x = 'Response',y = 'Feature',fill = 'r')) +
ggplot(
aes(
x = Response,
y = Feature,
fill = r)) +
geom_tile(colour = 'black') +
scale_fill_gradient2(low = low, mid = mid,high = high,limits=c(-1,1)) +
scale_y_discrete(expand = c(0,0),position = 'right') +
Expand Down
2 changes: 1 addition & 1 deletion R/plotFeature.R
Expand Up @@ -58,7 +58,7 @@ setMethod('plotFeature',
i <- sinfo(analysis)

i <- i %>%
select(!!cls,Label = label)
select(!!cls,Label = !!label)

if (!is.null(label)) {
d <- d %>%
Expand Down
16 changes: 9 additions & 7 deletions R/plotOccupancy.R
Expand Up @@ -32,9 +32,10 @@ setMethod('plotOccupancy',signature = 'AnalysisData',
occ <- occupancy(x,cls = cls)

d <- ggplot(occ,
aes_string(x = 'Occupancy',
group = cls,
colour = cls)) +
aes(
x = Occupancy,
group = .data[[cls]],
colour = .data[[cls]])) +
geom_density() +
theme_bw() +
labs(title = 'Density distrubution',
Expand All @@ -52,11 +53,12 @@ setMethod('plotOccupancy',signature = 'AnalysisData',
group_by_at(c(cls,'Occupancy')) %>%
summarise(sum = n()) %>%
mutate(cs = cumsum(sum))

csDist <- ggplot(cs,
aes_string(x = 'Occupancy',
y = 'cs',
colour = cls)) +
aes(
x = Occupancy,
y = cs,
colour = .data[[cls]])) +
geom_line() +
theme_bw() +
labs(title = 'Cumulative distribution',
Expand Down
14 changes: 12 additions & 2 deletions R/plotRSD.R
Expand Up @@ -38,7 +38,12 @@ setMethod('plotRSD',signature = 'AnalysisData',

x <- rsd(analysis,cls = cls)

d <- ggplot(x,aes_string(x = 'RSD',colour = cls,group = cls)) +
d <- ggplot(
x,
aes(
x = RSD,
colour = .data[[cls]],
group = .data[[cls]])) +
geom_density() +
theme_bw() +
labs(title = 'Density distrubution',
Expand All @@ -58,7 +63,12 @@ setMethod('plotRSD',signature = 'AnalysisData',
summarise(sum = n()) %>%
mutate(cs = cumsum(sum))

csDist <- ggplot(cs,aes_string(x = 'RSD',y = 'cs',colour = cls)) +
csDist <- ggplot(
cs,
aes(
x = RSD,
y = cs,
colour = .data[[cls]])) +
geom_line() +
theme_bw() +
labs(title = 'Cumulative distribution',
Expand Down
2 changes: 1 addition & 1 deletion R/randomForest-classification.R
Expand Up @@ -28,7 +28,7 @@ classificationMetrics <- function(model){
}

roc <- predictions %>%
roc_auc(obs,estimate)
roc_auc(obs,all_of(estimate))

bind_rows(
acc_kap,
Expand Down
22 changes: 22 additions & 0 deletions R/transform.R
Expand Up @@ -19,6 +19,7 @@
#' * `transformLn`: Natural logarithmic transformation.
#' * `transformLog10`: Logarithmic transformation.
#' * `transformPareto`: Pareto scaling.
#' * `transformPercent`: Scale as a percentage of the feature maximum intensity.
#' * `transformRange`: Range scaling. Also known as min-max scaling.
#' * `transformSQRT`: Square root transformation.
#' * `transformTICnorm`: Total ion count normalisation.
Expand Down Expand Up @@ -72,6 +73,11 @@
#' d %>%
#' transformPareto() %>%
#' plotLDA(cls = 'day')
#'
#' ## Percentage scaling
#' d %>%
#' transformPercent() %>%
#' plotLDA(cls = 'day')
#'
#' ## Range scaling
#' d %>%
Expand Down Expand Up @@ -204,6 +210,22 @@ setMethod('transformPareto',signature = 'AnalysisData',
#' @rdname transform
#' @export

setGeneric("transformPercent", function(d)
standardGeneric("transformPercent"))

#' @rdname transform
#' @importFrom purrr map_dfc
setMethod('transformPercent',signature = 'AnalysisData',
function(d){
dat(d) <- dat(d) %>%
map_dfc(~.x / max(.x) * 100)

return(d)
})

#' @rdname transform
#' @export

setGeneric("transformRange", function(d)
standardGeneric("transformRange"))

Expand Down
12 changes: 6 additions & 6 deletions R/univariate.R
Expand Up @@ -86,14 +86,14 @@ setMethod('anova',signature = 'AnalysisData',
pc <- str_split(.x,'~')[[1]]

pad <- removeClasses(x,pred,classes = sinfo(x) %>%
select(pred) %>%
select(all_of(pred)) %>%
unlist() %>%
unique() %>%
.[!(. %in% pc)])

response <- pad %>%
sinfo() %>%
select(pred) %>%
select(all_of(pred)) %>%
unlist() %>%
factor()

Expand Down Expand Up @@ -197,14 +197,14 @@ setMethod('ttest',signature = 'AnalysisData',
pc <- str_split(.x,'~')[[1]]

pad <- removeClasses(x,pred,classes = sinfo(x) %>%
select(pred) %>%
select(all_of(pred)) %>%
unlist() %>%
unique() %>%
.[!(. %in% pc)])

response <- pad %>%
sinfo() %>%
select(pred) %>%
select(all_of(pred)) %>%
unlist() %>%
factor()

Expand Down Expand Up @@ -282,7 +282,7 @@ setMethod('linearRegression',signature = 'AnalysisData',
returnModels = FALSE){
indep <- x %>%
sinfo() %>%
select(cls)
select(all_of(cls))

if (FALSE %in%
(map_chr(indep,class) %in% c('integer','numeric'))) {
Expand All @@ -299,7 +299,7 @@ setMethod('linearRegression',signature = 'AnalysisData',
i <- .

pred <- indep %>%
select(i) %>%
select(all_of(i)) %>%
unlist()

d %>%
Expand Down
28 changes: 14 additions & 14 deletions README.md
Expand Up @@ -187,20 +187,20 @@ explan_feat <- explanatoryFeatures(anova_results,threshold = 0.05)
``` r
explan_feat
#> # A tibble: 379 × 10
#> respo…¹ compa…² feature term df sumsq meansq stati…³ p.value adjust…⁴
#> <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 day 1~2~3~ N341 resp… 5 3.88e-4 7.76e-5 137. 1.55e-46 2.73e-43
#> 2 day 1~2~3~ N133 resp… 5 7.00e-5 1.40e-5 126. 8.63e-45 1.52e-41
#> 3 day 1~2~3~ N163 resp… 5 6.01e-5 1.20e-5 117. 2.95e-43 5.19e-40
#> 4 day 1~2~3~ N1087 resp… 5 2.42e-6 4.84e-7 99.8 5.61e-40 9.88e-37
#> 5 day 1~2~3~ N171 resp… 5 2.25e-7 4.50e-8 95.7 3.84e-39 6.75e-36
#> 6 day 1~2~3~ N513 resp… 5 3.38e-6 6.76e-7 95.3 4.78e-39 8.41e-36
#> 7 day 1~2~3~ N1025 resp… 5 2.78e-6 5.56e-7 91.0 3.91e-38 6.87e-35
#> 8 day 1~2~3~ N342 resp… 5 3.71e-6 7.41e-7 90.3 5.32e-38 9.36e-35
#> 9 day 1~2~3~ N1083 resp… 5 5.11e-5 1.02e-5 89.0 1.06e-37 1.87e-34
#> 10 day 1~2~3~ N1085 resp… 5 1.10e-5 2.19e-6 83.4 1.92e-36 3.37e-33
#> # … with 369 more rows, and abbreviated variable names ¹​response, ²​comparison,
#> # ³​statistic, ⁴​adjusted.p.value
#> response comparison feature term df sumsq meansq statistic p.value
#> <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 day 1~2~3~4~5~H N341 response 5 9.17e7 1.83e7 137. 1.55e-46
#> 2 day 1~2~3~4~5~H N133 response 5 1.65e7 3.31e6 126. 8.63e-45
#> 3 day 1~2~3~4~5~H N163 response 5 1.42e7 2.84e6 117. 2.95e-43
#> 4 day 1~2~3~4~5~H N1087 response 5 5.72e5 1.14e5 99.8 5.61e-40
#> 5 day 1~2~3~4~5~H N171 response 5 5.31e4 1.06e4 95.7 3.84e-39
#> 6 day 1~2~3~4~5~H N513 response 5 7.99e5 1.60e5 95.3 4.78e-39
#> 7 day 1~2~3~4~5~H N1025 response 5 6.57e5 1.31e5 91.0 3.91e-38
#> 8 day 1~2~3~4~5~H N342 response 5 8.76e5 1.75e5 90.3 5.32e-38
#> 9 day 1~2~3~4~5~H N1083 response 5 1.21e7 2.42e6 89.0 1.06e-37
#> 10 day 1~2~3~4~5~H N1085 response 5 2.59e6 5.18e5 83.4 1.92e-36
#> # 369 more rows
#> # ℹ 1 more variable: adjusted.p.value <dbl>
```

The ANOVA has identified 379 features significantly explanatory over the
Expand Down
Binary file modified man/figures/README-feature_plot-1.png
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified man/figures/README-rf_heatmap-1.png
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified man/figures/README-supervised_RF-1.png
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.

0 comments on commit 08bb28b

Please sign in to comment.