Skip to content

Commit

Permalink
Merge pull request #86 from jasenfinch/devel
Browse files Browse the repository at this point in the history
v0.14.9
  • Loading branch information
jasenfinch committed Jan 27, 2022
2 parents e17b352 + 882ea24 commit 80181f5
Show file tree
Hide file tree
Showing 147 changed files with 1,001 additions and 263 deletions.
4 changes: 3 additions & 1 deletion DESCRIPTION
@@ -1,6 +1,6 @@
Package: metabolyseR
Title: Methods for Pre-Treatment, Data Mining and Correlation Analyses of Metabolomics Data
Version: 0.14.8
Version: 0.14.9
Authors@R: person("Jasen", "Finch", email = "jsf9@aber.ac.uk", role = c("aut", "cre"))
Description: A tool kit for pre-treatment, modelling, feature selection and correlation analyses of metabolomics data.
URL: https://jasenfinch.github.io/metabolyseR
Expand Down Expand Up @@ -57,6 +57,7 @@ Collate: allClasses.R
info.R
join.R
keep.R
mds.R
metabolyse.R
metabolyseR.R
modelling.R
Expand All @@ -80,6 +81,7 @@ Collate: allClasses.R
remove.R
randomForest.R
rsd.R
roc.R
show-method.R
split.R
transform.R
Expand Down
8 changes: 8 additions & 0 deletions NAMESPACE
Expand Up @@ -43,6 +43,7 @@ export(keepClasses)
export(keepFeatures)
export(keepSamples)
export(linearRegression)
export(mds)
export(metabolyse)
export(metrics)
export(modellingMethods)
Expand Down Expand Up @@ -72,13 +73,15 @@ export(preTreated)
export(preTreatmentElements)
export(preTreatmentMethods)
export(preTreatmentParameters)
export(proximity)
export(randomForest)
export(raw)
export(reAnalyse)
export(removeClasses)
export(removeFeatures)
export(removeSamples)
export(response)
export(roc)
export(rsd)
export(sinfo)
export(split)
Expand Down Expand Up @@ -113,6 +116,7 @@ importFrom(crayon,green)
importFrom(crayon,red)
importFrom(crayon,yellow)
importFrom(doFuture,registerDoFuture)
importFrom(dplyr,across)
importFrom(dplyr,arrange)
importFrom(dplyr,arrange_all)
importFrom(dplyr,bind_cols)
Expand All @@ -124,14 +128,18 @@ importFrom(dplyr,full_join)
importFrom(dplyr,group_by)
importFrom(dplyr,group_by_all)
importFrom(dplyr,group_by_at)
importFrom(dplyr,group_map)
importFrom(dplyr,left_join)
importFrom(dplyr,mutate)
importFrom(dplyr,mutate_all)
importFrom(dplyr,mutate_at)
importFrom(dplyr,mutate_if)
importFrom(dplyr,n)
importFrom(dplyr,relocate)
importFrom(dplyr,rename)
importFrom(dplyr,rowwise)
importFrom(dplyr,select)
importFrom(dplyr,select_if)
importFrom(dplyr,summarise)
importFrom(dplyr,summarise_all)
importFrom(dplyr,ungroup)
Expand Down
10 changes: 10 additions & 0 deletions NEWS.md
@@ -1,3 +1,13 @@
# metabolyseR 0.14.9

* Suppressed name repair console message encountered during random forest permutation testing.

* Added the [`proximity()`](https://jasenfinch.github.io/metabolyseR/reference/modelling-accessors.html) method for extracting sample proximities from the [`RandomForest`](https://jasenfinch.github.io/metabolyseR/reference/RandomForest-class.html) S4 class.

* Added the [`mds()`](https://jasenfinch.github.io/metabolyseR/reference/mds.html) method to perform multidimensional scaling on sample proximities from the [`RandomForest`](https://jasenfinch.github.io/metabolyseR/reference/RandomForest-class.html) S4 class.

* Added the [`roc()`](https://jasenfinch.github.io/metabolyseR/reference/roc.html) method to calculate receiver-operator characteristic curves from the [`RandomForest`](https://jasenfinch.github.io/metabolyseR/reference/RandomForest-class.html) S4 class.

# metabolyseR 0.14.8

* An error is now thrown during random forest classification when less than two classes are specified.
Expand Down
103 changes: 103 additions & 0 deletions R/mds.R
@@ -0,0 +1,103 @@
#' Multidimensional scaling (MDS)
#' @rdname mds
#' @description Multidimensional scaling of random forest proximities.
#' @param x S4 object of class `RandomForest`, `Analysis` or a list
#' @param dimensions The number of dimensions by which the data are to be represented.
#' @param idx sample information column to use for sample names. If `NULL`, the sample row number will be used. Sample names should be unique for each row of data.
#' @return
#' A tibble containing the scaled dimensions.
#' @examples
#' library(metaboData)
#'
#' x <- analysisData(abr1$neg[,200:300],abr1$fact) %>%
#' occupancyMaximum(cls = 'day') %>%
#' transformTICnorm()
#'
#' rf <- randomForest(x,cls = 'day')
#'
#' mds(rf)
#' @export

setGeneric("mds", function(x,dimensions = 2,idx = NULL)
standardGeneric("mds")
)

#' @rdname mds
#' @importFrom dplyr mutate_if group_map select_if across

setMethod('mds',signature = 'RandomForest',
function(x,dimensions = 2,idx = NULL){

group_vars <- switch(type(x),
classification = c('Response','Comparison'),
regression = 'Response',
unsupervised = NULL)

dissimilarities <- x %>%
proximity(idx = idx) %>%
mutate(across(Sample1:Sample2,as.character)) %>%
spread(Sample2,Proximity) %>%
mutate_if(is.numeric,~ 1 - .x)

mds_dimensions <- dissimilarities %>%
group_by_at(group_vars) %>%
group_map(~ .x %>%
select_if(is.numeric) %>%
cmdscale(k = dimensions) %>%
set_colnames(str_c('Dimension ',
seq_len(dimensions))) %>%
as_tibble() %>%
bind_cols(select_if(.x %>%
ungroup(),
is.character)) %>%
relocate(contains('Dimension'),
.after = last_col()),
.keep = TRUE
) %>%
bind_rows() %>%
rename(Sample = Sample1)

if (is.null(idx)){
mds_dimensions <- mds_dimensions %>%
mutate(Sample = as.numeric(Sample)) %>%
arrange(across(c(group_vars,'Sample')))
}

return(mds_dimensions)
}
)

#' @rdname mds

setMethod('mds',signature = 'list',
function(x,dimensions = 2,idx = NULL){
object_classes <- x %>%
map_chr(class)

if (FALSE %in% (object_classes == 'RandomForest')) {
message(
str_c('All objects contained within supplied list ',
'that are not of class RandomForest will be ignored.'))
}

x <- x[object_classes == 'RandomForest']

if (length(x) > 0) {
x %>%
map(mds,dimensions = dimensions,idx = idx) %>%
bind_rows()
} else {
tibble()
}

})

#' @rdname mds

setMethod('mds',signature = 'Analysis',
function(x,dimensions = 2,idx = NULL){
x %>%
analysisResults('modelling') %>%
mds(dimensions = dimensions,
idx = idx)
})
2 changes: 1 addition & 1 deletion R/metabolyseR.R
Expand Up @@ -7,5 +7,5 @@ globalVariables(
'Occupancy','adjustedPvalue','adjusted.p.value','Label','response',
'-log10(p)','DF1','Sample1','Proximity','Dimension 1','Dimension 2',
'x','.level','sensitivity','specificity','Mode','RSD','Median','Colour',
'Index','TIC','y','label','batch','correction','N','term','Metric','Frequency','|r|'
'Index','TIC','y','label','batch','correction','N','term','Metric','Frequency','|r|','idx_1','idx_2'
))
97 changes: 97 additions & 0 deletions R/modelling-accessors.R
Expand Up @@ -5,6 +5,7 @@
#' @param cls sample information column to use
#' @param metric importance metric for which to retrieve explanatory features
#' @param threshold threshold below which explanatory features are extracted
#' @param idx sample information column to use for sample names. If `NULL`, the sample row number will be used. Sample names should be unique for each row of data.
#' @param ... arguments to parse to method for specific class
#' @section Methods:
#' * `binaryComparisons`: Return a vector of all possible binary comparisons for a given sample information column.
Expand All @@ -13,6 +14,7 @@
#' * `metrics`: Retrieve the model performance metrics for a random forest analysis
#' * `importanceMetrics`: Retrieve the available feature importance metrics for a random forest analysis.
#' * `importance`: Retrieve feature importance results.
#' * `proximity`: Retrieve the random forest sample proximities.
#' * `explanatoryFeatures`: Retrieve explanatory features.
#' @examples
#' library(metaboData)
Expand Down Expand Up @@ -40,6 +42,9 @@
#' ## Retrieve the feature importance results
#' importance(rf_analysis)
#'
#' ## Retrieve the sample proximities
#' proximity(rf_analysis)
#'
#' ## Retrieve the explanatory features
#' explanatoryFeatures(rf_analysis,metric = 'FalsePositiveRate',threshold = 0.05)
#' @export
Expand Down Expand Up @@ -210,6 +215,98 @@ setMethod('importance',signature = 'Analysis',
#' @rdname modelling-accessors
#' @export

setGeneric("proximity", function(x,idx = NULL)
standardGeneric("proximity")
)

#' @rdname modelling-accessors
#' @importFrom dplyr relocate

setMethod('proximity',signature = 'RandomForest',
function(x,idx = NULL){

group_vars <- switch(type(x),
classification = c('Response','Comparison'),
regression = 'Response',
unsupervised = NULL) %>%
c(.,'Sample1','Sample2')

proximities <- x@proximities %>%
group_by_at(group_vars) %>%
summarise(Proximity = mean(Proximity),
.groups = 'drop')

if (!is.null(idx)){
sample_idx <- x %>%
clsExtract(cls = idx)

if (any(duplicated(sample_idx))){
stop(str_c('Duplicated sample names found in sample information column `',
idx,
'`. The specified sample names should be unique to each sample.'),
call. = FALSE)
}

sample_idx <- sample_idx %>%
tibble(idx = .) %>%
rowid_to_column(var = 'row')

proximities <- proximities %>%
left_join(sample_idx,
by = c('Sample1' = 'row')) %>%
rename(idx_1 = idx) %>%
left_join(sample_idx,
by = c('Sample2' = 'row')) %>%
rename(idx_2 = idx) %>%
select(-Sample1,
-Sample2,
Sample1 = idx_1,
Sample2 = idx_2) %>%
relocate(Proximity,.after = Sample2)
}

return(proximities)
}
)

#' @rdname modelling-accessors

setMethod('proximity',signature = 'list',
function(x,idx = NULL){
object_classes <- x %>%
map_chr(class)

if (FALSE %in% (object_classes == 'RandomForest')) {
message(
str_c('All objects contained within supplied list ',
'that are not of class RandomForest will be ignored.'))
}

x <- x[object_classes == 'RandomForest']

if (length(x) > 0) {
x %>%
map(proximity,idx = idx) %>%
bind_rows()
} else {
tibble()
}

})

#' @rdname modelling-accessors

setMethod('proximity',signature = 'Analysis',
function(x,idx = NULL){
x %>%
analysisResults(element = 'modelling') %>%
proximity(idx = idx)
})


#' @rdname modelling-accessors
#' @export

setGeneric('explanatoryFeatures', function(x,...)
standardGeneric("explanatoryFeatures")
)
Expand Down

0 comments on commit 80181f5

Please sign in to comment.