SchlossLab · GregJohnsonJr · Sep 30, 2024 · Apr 24, 2024 · Apr 29, 2024 · May 17, 2024
diff --git a/.Rbuildignore b/.Rbuildignore
@@ -12,3 +12,4 @@
 ^pkgdown$
 ^vignettes/articles$
 ^\.vscode$
+^\.lintr$
diff --git a/.lintr b/.lintr
@@ -0,0 +1,5 @@
+linters: linters_with_defaults() # see vignette("lintr")
+encoding: "UTF-8"
+exclusions: list(
+    "vignettes/articles/Using-clustur.Rmd"
+  )
diff --git a/R/Cluster.R b/R/Cluster.R
@@ -7,42 +7,47 @@
 #' @param shuffle a boolean to determine whether or
 #'  not you want to shuffle the data before you cluster
 #' @param simularity_matrix are you using a simularity matrix or distance matrix
-#' @param random_seed you can set your own random seed for consistent results, if not it will be set to 123
-#' @param ... Either your phylip file or column file path, or a sparse distance matrix
+#' @param random_seed you can set your own random
+#' seed for consistent results, if not it will be set to 123
+#' @param ... Either your phylip file or column file path,
+#'  or a sparse distance matrix
 #' @description
-#' You must specfiy the type of matrix you are inputting to cluster your object and we support three types:
+#' You must specfiy the type of matrix you are inputting
+#'  to cluster your object and we support three types:
 #' the path to your phylip and column distance file, or a sparse matrix.
-#' 
+#'
 #' @examples
 #'  # Using a sparse matrix
 #'  i_values <- as.integer(1:100)
 #'  j_values <- as.integer(sample(1:100, 100, TRUE))
 #'  x_values <- as.numeric(runif(100, 0, 1))
-#'  s_matrix <- Matrix::spMatrix(nrow=max(i_values), 
-#'                               ncol=max(i_values), 
-#'                               i=i_values, 
-#'                               j=j_values, 
+#'  s_matrix <- Matrix::spMatrix(nrow=max(i_values),
+#'                               ncol=max(i_values),
+#'                               i=i_values,
+#'                               j=j_values,
 #'                               x=x_values)
-#' 
+#'
 #'  # Creating a count table using the sparse matrix
-#'  count_table_sparse <- data.frame(sequence=as.character(i_values), 
+#'  count_table_sparse <- data.frame(sequence=as.character(i_values),
 #'                                  total=rep(1,times=100))
-#' 
-#'  cluster_results <- opti_cluster(cutoff=0.2, 
+#'
+#'  cluster_results <- opti_cluster(cutoff=0.2,
 #'                                  count_table = count_table_sparse,
 #'                                  sparse_matrix=s_matrix)
-#' 
+#'
 #'  # With a column file
 #'  count_table <- read.delim(example_path("amazon1.count_table"))
-#'  amazon_data_column <- opti_cluster(column_path=example_path("96_sq_column_amazon.dist"),
+#'  amazon_data_column <- opti_cluster(column_path=
+#'                                     example_path("96_sq_column_amazon.dist"),
 #'                                     count_table = count_table, cutoff = 0.2)
 #'  # With a phylip file
 #'  count_table <- read.delim(example_path("amazon1.count_table"))
-#'  amazon_data_phylip <- opti_cluster(phylip_path=example_path("98_sq_phylip_amazon.dist"),
+#'  amazon_data_phylip <- opti_cluster(phylip_path=
+#'                                     example_path("98_sq_phylip_amazon.dist"),
 #'                                     count_table = count_table, cutoff = 0.2)
-#' 
-#' 
-#' 
+#'
+#'
+#'
 #' @return A data.frame of the cluster and cluster metrics.
 opti_cluster <- function(cutoff, count_table,
                          iterations = 100, shuffle = TRUE,
@@ -51,16 +56,15 @@ opti_cluster <- function(cutoff, count_table,
   list_params <- list(...)
   params <- names(list_params)
   cluster_dfs <- list()
-  
-  if("phylip_path" %in% params && 
-    "column_path" %in% params &&
-    "sparse_matrix" %in% params){
+
+  if ("phylip_path" %in% params &&
+        "column_path" %in% params &&
+        "sparse_matrix" %in% params) {
     stop("You cannot use all three input paramters at once.
     Use either phylip_path, column_path, or sparse_matrix.")
   }
   set.seed(random_seed)
-  if("sparse_matrix" %in% params)
-  {
+  if ("sparse_matrix" %in% params) {
     sparse_matrix <- list_params$sparse_matrix
     cluster_dfs <- MatrixToOpiMatrixCluster(
       sparse_matrix@i,
@@ -72,8 +76,7 @@ opti_cluster <- function(cutoff, count_table,
       shuffle,
       simularity_matrix
     )
-  }
-  else if("phylip_path" %in% params) {
+  } else if ("phylip_path" %in% params) {
     phylip_path <- list_params$phylip_path
     cluster_dfs <- OptiClusterPhylip(
       phylip_path,
@@ -83,8 +86,7 @@ opti_cluster <- function(cutoff, count_table,
       shuffle,
       simularity_matrix
     )
-  }
-  else if("column_path" %in% params) {
+  } else if ("column_path" %in% params) {
     column_path <- list_params$column_path
     cluster_dfs <- OptiClusterColumnDist(
       column_path,
@@ -94,30 +96,29 @@ opti_cluster <- function(cutoff, count_table,
       shuffle,
       simularity_matrix
     )
-  }
-  else {
+  } else {
     stop("The parameters should include either a sparse_matrix,
     phylip_path, column_path")
   }
-  cluster_dfs[[4]]$comma_count <- sapply(cluster_dfs[[4]]$bins, function(x){
-    ls <- gregexpr(",", x, fixed=TRUE)[[1]]
-    if(ls[[1]] == -1){
+  cluster_dfs[[4]]$comma_count <- sapply(cluster_dfs[[4]]$bins, function(x) {
+    ls <- gregexpr(",", x, fixed = TRUE)[[1]]
+    if (ls[[1]] == -1) {
       return(0)
-    }
-    else{
+    } else {
       return(length(ls))
     }
   })
-  cluster_dfs[[4]] <- cluster_dfs[[4]][order(cluster_dfs[[4]]$comma_count, decreasing = T), ]
-  cluster_dfs[[4]] <- cluster_dfs[[4]][,1:3]
+  cluster_dfs[[4]] <- cluster_dfs[[4]][order(cluster_dfs[[4]]$comma_count,
+                                             decreasing = TRUE), ]
+  cluster_dfs[[4]] <- cluster_dfs[[4]][, 1:3]
   opticluster_data <- list(
     abundance = cluster_dfs[[1]],
     cluster = cluster_dfs[[4]],
     cluster_metrics = cluster_dfs[[3]],
     other_cluster_metrics = cluster_dfs[[2]]
   )
   return(opticluster_data)
-  }
+}
 
 #' Cluster Description
 #'
@@ -129,99 +130,100 @@ opti_cluster <- function(cutoff, count_table,
 #'  furthest, nearest, average, weighted.
 #' @param count_table A table of names and the given abundance per group.
 #' @param simularity_matrix are you using a simularity matrix or distance matrix
-#' @param random_seed you can set your own random seed for consistent results, if not it will be set to 123
-#' @param ... Either your phylip file or column file path, or a sparse distance matrix
+#' @param random_seed you can set your own random seed
+#'  for consistent results, if not it will be set to 123
+#' @param ... Either your phylip file or column file path,
+#'  or a sparse distance matrix
 #' @description
-#' You must specfiy the type of matrix you are inputting to cluster your object and we support three types:
+#' You must specfiy the type of matrix you are inputting
+#'  to cluster your object and we support three types:
 #' the path to your phylip and column distance file, or a sparse matrix.
 #' @return A string of the given cluster.
-#' 
+#'
 #' @examples
 #'  # Using a sparse matrix
 #'  i_values <- as.integer(1:100)
 #'  j_values <- as.integer(sample(1:100, 100, TRUE))
 #'  x_values <- as.numeric(runif(100, 0, 1))
-#'  s_matrix <- Matrix::spMatrix(nrow=max(i_values), 
-#'                               ncol=max(i_values), 
-#'                               i=i_values, 
-#'                               j=j_values, 
+#'  s_matrix <- Matrix::spMatrix(nrow=max(i_values),
+#'                               ncol=max(i_values),
+#'                               i=i_values,
+#'                               j=j_values,
 #'                               x=x_values)
-#' 
+#'
 #'  # Creating a count table using the sparse matrix
-#'  count_table_sparse <- data.frame(sequence=as.character(i_values), 
+#'  count_table_sparse <- data.frame(sequence=as.character(i_values),
 #'                                  total=rep(1,times=100))
 #'  # furthest method
-#'  cluster_results <- cluster(cutoff=0.2, count_table = count_table_sparse, 
+#'  cluster_results <- cluster(cutoff=0.2, count_table = count_table_sparse,
 #'                             sparse_matrix=s_matrix, method="furthest")
-#' 
+#'
 #'  # With a phylip file and nearest methods
 #'  count_table <- read.delim(example_path("amazon1.count_table"))
-#'  amazon_data_phylip <- cluster(phylip_path=example_path("98_sq_phylip_amazon.dist"),
-#'                                count_table = count_table, method="nearest", cutoff = 0.2)
-#' 
-#'  # With a column file and average methods 
-#'  amazon_data_column <- cluster(column_path=example_path("96_sq_column_amazon.dist"),
-#'                                count_table = count_table, method="average", cutoff = 0.2)
-#' 
+#'  amazon_data_phylip <- cluster(phylip_path=
+#'                                example_path("98_sq_phylip_amazon.dist"),
+#'               count_table = count_table, method="nearest", cutoff = 0.2)
+#'
+#'  # With a column file and average methods
+#'  amazon_data_column <- cluster(column_path=
+#'                                example_path("96_sq_column_amazon.dist"),
+#'               count_table = count_table, method="average", cutoff = 0.2)
+#'
 #'  # Weighted method
-#'  amazon_data_column <- cluster(column_path=example_path("96_sq_column_amazon.dist"),
-#'                                count_table = count_table, method="weighted", cutoff = 0.2)
-#' 
-#' 
-cluster <- function(cutoff, method,
-                    count_table, simularity_matrix = FALSE, random_seed = 123, ...) {
+#'  amazon_data_column <- cluster(column_path=
+#'                                example_path("96_sq_column_amazon.dist"),
+#'              count_table = count_table, method="weighted", cutoff = 0.2)
+#'
+#'
+cluster <- function(cutoff, method, count_table,
+                    simularity_matrix = FALSE, random_seed = 123, ...) {
   list_params <- list(...)
   params <- names(list_params)
   cluster_dfs <- list()
-  if("phylip_path" %in% params && 
-    "column_path" %in% params &&
-    "sparse_matrix" %in% params){
+  if ("phylip_path" %in% params &&
+        "column_path" %in% params &&
+        "sparse_matrix" %in% params) {
     stop("You cannot use all three input paramters at once.
     Use either phylip_path, column_path, or sparse_matrix.")
   }
   set.seed(random_seed)
-  if("sparse_matrix" %in% params)
-  {
+  if ("sparse_matrix" %in% params) {
     sparse_matrix <- list_params$sparse_matrix
     cluster_dfs <-  ClassicCluster(
       sparse_matrix@i, sparse_matrix@j,
       sparse_matrix@x, cutoff, method,
       validate_count_table(count_table),
       simularity_matrix
     )
-  }
-  else if("phylip_path" %in% params) {
+  } else if ("phylip_path" %in% params) {
     phylip_path <- list_params$phylip_path
     cluster_dfs <- ClusterWithPhylip(
       phylip_path, cutoff, method,
       validate_count_table(count_table),
       simularity_matrix
     )
-  }
-  else if("column_path" %in% params) {
+  } else if ("column_path" %in% params) {
     column_path <- list_params$column_path
     cluster_dfs <-  ClusterWithColumn(
       column_path, cutoff, method,
       validate_count_table(count_table),
       simularity_matrix
     )
-  }
-  else {
+  } else {
     stop("The parameters should include either a sparse_matrix,
     phylip_path, column_path")
   }
-
-  cluster_dfs[[2]]$comma_count <- sapply(cluster_dfs[[2]]$bins, function(x){
-    ls <- gregexpr(",", x, fixed=TRUE)[[1]]
-    if(ls[[1]] == -1){
+  cluster_dfs[[2]]$comma_count <- sapply(cluster_dfs[[2]]$bins, function(x) {
+    ls <- gregexpr(",", x, fixed = TRUE)[[1]]
+    if (ls[[1]] == -1) {
       return(0)
-    }
-    else{
+    } else {
       return(length(ls))
     }
   })
-  cluster_dfs[[2]] <- cluster_dfs[[2]][order(cluster_dfs[[2]]$comma_count, decreasing = T), ]
-  cluster_dfs[[2]] <- cluster_dfs[[2]][,1:3]
+  cluster_dfs[[2]] <- cluster_dfs[[2]][order(cluster_dfs[[2]]$comma_count,
+                                             decreasing = TRUE), ]
+  cluster_dfs[[2]] <- cluster_dfs[[2]][, 1:3]
 
   return(list(
     abundance = cluster_dfs[[1]],
@@ -243,14 +245,16 @@ validate_count_table <- function(count_table_df) {
 
 
 #' Example Path
-#' 
+#'
 #' @export
-#' This function was created as a helper function to generate file paths to our internal data. You are able to access this function if you want to follow along with the example.
+#' This function was created as a helper function to generate file paths to our
+#'  internal data. You are able to access this function if you
+#'  want to follow along with the example.
 #' @param file The data of the path you are looking to find.
 #' @examples
 #' # This will return the path to our example file
 #' example_path("98_sq_phylip_amazon.dist")
-#' 
+#'
 #' @return the path inside of the package of the file.
 example_path <- function(file = NULL) {
   path <- ""
@@ -260,4 +264,4 @@ example_path <- function(file = NULL) {
     path <- system.file("extdata", file, package = "clustur", mustWork = TRUE)
   }
   return(path)
-}
+}
diff --git a/man/validate_count_table.Rd b/man/validate_count_table.Rd
diff --git a/src/Utils.cpp b/src/Utils.cpp
@@ -9,10 +9,6 @@
 #include <sstream>
 #include <unordered_set>
 
-Utils::Utils() {
-    constexpr long long seed = 19760620;
-    mersenne_twister_engine.seed(seed);
-}
 
 void Utils::mothurRandomShuffle(std::vector<int>& randomize){
     Rcpp::IntegerVector randomValues = Rcpp::wrap(randomize);

diff --git a/src/test-matrix_adapter.cpp b/src/test-matrix_adapter.cpp
@@ -59,7 +59,7 @@ context("MatrixAdapter Test") {
      }
      test_that("Matrix Adapter can create proper square matrices from distance matrices") {
          MatrixAdapterTestFixture fixture;
-         bool result = fixture.TestDistanceMatrixToSquareMatrix(5);
+         bool result = fixture.TestDistanceMatrixToSquareMatrix(6);
          expect_true(result);
          result = fixture.TestDistanceMatrixToSquareMatrix(0);
          expect_false(result);