Skip to content
Merged

Dev #13

Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
36 commits
Select commit Hold shift + click to select a range
5cae88f
Added unit test and created a easy opticluster function
GregJohnsonJr Apr 24, 2024
37ec06d
Release v0.0.1 (#1)
GregJohnsonJr Apr 29, 2024
931b70b
Add cpp test (#3)
GregJohnsonJr May 17, 2024
9efe436
RMD Check is able to run successfully!
GregJohnsonJr May 31, 2024
678d5a3
Correcting the paths of my cpp files, should fix the action errors.
GregJohnsonJr May 31, 2024
2c99c18
Update to the cluster command test fixture
GregJohnsonJr May 31, 2024
8f3cbc1
Modifying the test for opticluster
GregJohnsonJr Jun 3, 2024
626e70a
Ensuring everything works with c++11
GregJohnsonJr Jun 3, 2024
5b1bdb0
Removing code issues from cluster command
GregJohnsonJr Jun 3, 2024
e35e710
Adding the build ignore
GregJohnsonJr Jun 3, 2024
11a41dd
Founds some issue where I am using c++ 17 syntax and not 11.
GregJohnsonJr Jun 5, 2024
8fcff5d
Github action fixes, needed to update syntax towards cpp 11
GregJohnsonJr Jun 6, 2024
6eb79ec
Modified the testing structure by removing the "Opticluster returns p…
GregJohnsonJr Jun 6, 2024
50c3a7c
Fix cluster unit test (#5)
GregJohnsonJr Jun 10, 2024
b717404
Printing out the metrics after you perform a cluster and added a true…
GregJohnsonJr Jun 10, 2024
77ebc1c
Release polish (#6)
GregJohnsonJr Jun 14, 2024
58a4056
Added a depends for lazy-loading and other R related issues.
GregJohnsonJr Jun 14, 2024
d958121
More cluster features (#7)
GregJohnsonJr Jul 12, 2024
12aaa2e
Merge branch 'master' into dev
GregJohnsonJr Jul 12, 2024
2682ec5
The fix for github actions.
GregJohnsonJr Jul 12, 2024
b7a77be
Change to the include file.
GregJohnsonJr Jul 12, 2024
869656e
Removing srand from Utils, going to attempt to set seeds inside of R.
GregJohnsonJr Jul 12, 2024
fc8b722
Fix for race condition issue.
GregJohnsonJr Jul 15, 2024
8603183
Fix for RCMD check warnings
GregJohnsonJr Jul 15, 2024
79fb369
The fix for the windows version of RMD Check!
GregJohnsonJr Jul 16, 2024
1a4256b
Adding dependency for time.
GregJohnsonJr Jul 16, 2024
8e0ae22
Make shared (#9)
GregJohnsonJr Aug 30, 2024
7564d48
Forgot a unit test. (#10)
GregJohnsonJr Sep 3, 2024
3bd7dea
Fix results (#11)
GregJohnsonJr Sep 10, 2024
2be797e
Removing and fixing check issues.
GregJohnsonJr Sep 10, 2024
a445421
Fix compilation warnings (#12)
GregJohnsonJr Sep 11, 2024
e7d8625
Fix for negative index value
GregJohnsonJr Sep 11, 2024
ad47beb
Cleaning up build notes.
GregJohnsonJr Sep 11, 2024
ba93c19
Merge branch 'master' into dev
GregJohnsonJr Sep 11, 2024
e6a4c9f
lintr fixes
GregJohnsonJr Sep 11, 2024
386a0c7
Fix for lintr
GregJohnsonJr Sep 11, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file added .DS_Store
Binary file not shown.
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ pip-log.txt
pip-delete-this-directory.txt

# CMakeCheck list via Clion
CMakeLists.txt
src/CMakeLists.txt

# Unit test / coverage reports
htmlcov/
Expand Down Expand Up @@ -192,3 +192,4 @@ fabric.properties
# idea folder, uncomment if you don't need it
.idea
src/.DS_Store
/.vscode
2 changes: 1 addition & 1 deletion NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
useDynLib(Opticluster, .registration=TRUE)
importFrom(Rcpp, evalCpp)
importFrom("utils", "read.table")
export(opti_cluster)
export(cluster)
export(opti_cluster)
94 changes: 61 additions & 33 deletions R/Cluster.R
Original file line number Diff line number Diff line change
Expand Up @@ -11,51 +11,79 @@
#' @export
#' @param sparse_matrix A Sparse Matrix.
#' @param cutoff A cutoff value
#' @param count_table A table of names and the given abundance per group.
#' @param iterations The number of iterations
#' @param shuffle a boolean to determine whether or not you want to shuffle the data before you cluster
#' @return A data.frame of the clusters.
opti_cluster <- function(sparse_matrix, cutoff, iterations, shuffle = TRUE) {
index_one_list <- sparse_matrix@i
index_two_list <- sparse_matrix@j
value_list <- sparse_matrix@x
clustering_output_string_list <- MatrixToOpiMatrixCluster(index_one_list, index_two_list, value_list, cutoff,
iterations, shuffle)
clustering_output_string <- clustering_output_string_list[1]
clustering_metric <- clustering_output_string_list[2]
clustering_metric_2 <- clustering_output_string_list[3]
df_cluster_metrics <- (read.table(text = clustering_metric,
sep = "\t", header = TRUE))
df_other_cluster_metrics <- (read.table(text = clustering_metric_2,
sep = "\t", header = TRUE))

df_cluster <- t(read.table(text = clustering_output_string,
sep = "\t", header = TRUE))
df_cluster <- data.frame(df_cluster[-1, ])

colnames(df_cluster)[1] <- "cluster"

opticluster_data <- list(cluster = df_cluster,
cluster_metrics = df_cluster_metrics,
other_cluster_metrics = df_other_cluster_metrics)
#' @param shuffle a boolean to determine whether or
#' not you want to shuffle the data before you cluster
#' @param simularity_matrix are you using a simularity matrix or distance matrix
#' @return A data.frame of the cluster and cluster metrics.
opti_cluster <- function(sparse_matrix, cutoff, count_table,
iterations = 100, shuffle = TRUE,
simularity_matrix = FALSE) {
count_table <- validate_count_table(count_table)
cluster_dfs <- MatrixToOpiMatrixCluster(
sparse_matrix@i,
sparse_matrix@j,
sparse_matrix@x,
cutoff,
count_table,
iterations,
shuffle,
simularity_matrix
)
opticluster_data <- list(
abundance = cluster_dfs[[1]],
cluster = cluster_dfs[[4]],
cluster_metrics = cluster_dfs[[3]],
other_cluster_metrics = cluster_dfs[[2]]
)

return(opticluster_data)
}



#' Opticluster Description
#'
#' Detailed description of the function.
#'
#' @export
#' @param sparse_matrix A Sparse Matrix.
#' @param cutoff A cutoff value
#' @param method The type of cluster you wish to conduct. There are four different types:
#' furthest, nearest, average, weighted.
#' @param cutoff A cutoff value.
#' @param method The type of cluster you wish to conduct;
#' furthest, nearest, average, weighted.
#' @param count_table A table of names and the given abundance per group.
#' @param simularity_matrix are you using a simularity matrix or
#' distance matrix.
#' @return A string of the given cluster.
cluster <- function(sparse_matrix, cutoff, method)
{
cluster <- function(sparse_matrix, cutoff, method,
count_table, simularity_matrix = FALSE) {
df <- ClassicCluster(
sparse_matrix@i, sparse_matrix@j,
sparse_matrix@x, cutoff, method,
validate_count_table(count_table),
simularity_matrix
)
return(list(
abundance = df[[1]],
cluster = df[[2]]
))
}

return (ClassicCluster(sparse_matrix@i, sparse_matrix@j,
sparse_matrix@x, cutoff, method))

#' Opticluster Description
#'
#' Detailed description of the function.
#'
#' @export
#' @param count_table_df The count table,
#' which contains all your abundance information for each sequence.
#' @return Validated count table.
validate_count_table <- function(count_table_df) {
if (ncol(count_table_df) > 2) {
return(count_table_df)
}
totals <- count_table_df$total
count_table_df <- cbind(count_table_df, totals)
names(count_table_df)[3] <- "no_group"
count_table_df[[1]] <- as.character(count_table_df[[1]])
}
12 changes: 6 additions & 6 deletions R/RcppExports.R
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@
# Generated by using Rcpp::compileAttributes() -> do not edit by hand
# Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393

WritePhylipFile <- function(xPosition, yPosition, data, cutoff, saveLocation) {
invisible(.Call(`_Opticluster_WritePhylipFile`, xPosition, yPosition, data, cutoff, saveLocation))
WritePhylipFile <- function(xPosition, yPosition, data, cutoff, countTable, saveLocation) {
invisible(.Call(`_Opticluster_WritePhylipFile`, xPosition, yPosition, data, cutoff, countTable, saveLocation))
}

MatrixToOpiMatrixCluster <- function(xPosition, yPosition, data, cutoff, maxIterations = 100L, shuffle = TRUE) {
.Call(`_Opticluster_MatrixToOpiMatrixCluster`, xPosition, yPosition, data, cutoff, maxIterations, shuffle)
MatrixToOpiMatrixCluster <- function(xPosition, yPosition, data, cutoff, countTable, maxIterations = 100L, shuffle = TRUE, isSim = FALSE) {
.Call(`_Opticluster_MatrixToOpiMatrixCluster`, xPosition, yPosition, data, cutoff, countTable, maxIterations, shuffle, isSim)
}

ClassicCluster <- function(xPosition, yPosition, data, cutoff, method) {
.Call(`_Opticluster_ClassicCluster`, xPosition, yPosition, data, cutoff, method)
ClassicCluster <- function(xPosition, yPosition, data, cutoff, method, countTable, isSimularity) {
.Call(`_Opticluster_ClassicCluster`, xPosition, yPosition, data, cutoff, method, countTable, isSimularity)
}

11 changes: 7 additions & 4 deletions man/cluster.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

15 changes: 13 additions & 2 deletions man/opti_cluster.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

17 changes: 17 additions & 0 deletions man/validate_count_table.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

32 changes: 32 additions & 0 deletions src/Adapters/CountTableAdapter.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
//
// Created by gregj on 8/27/2024.
//

#ifndef COUNTTABLEADAPTER_H
#define COUNTTABLEADAPTER_H
#include <Rcpp.h>
#include <vector>
#include <string>
#include <unordered_map>
#include <algorithm>

class CountTableAdapter {
public:
CountTableAdapter() = default;
bool CreateDataFrameMap(const Rcpp::DataFrame& countTable);
// Going to ensure that each count_table atleast has a group
// And if there is no count table inputted, I will create a base one.
double FindAbundanceBasedOnGroup(const std::string& group, const std::string& sampleName) const;
double FindTotalAbundance(const std::string& sampleName) const;
std::string GetNameByIndex(int) const;
std::vector<double> GetColumnByName (const std::string& name) const;
std::vector<std::string> GetGroups() const;
private:
std::vector<std::string> sampleNames;
std::unordered_map<std::string, std::vector<double>> dataFrameMap;
std::vector<std::string> groups;
};



#endif //COUNTTABLEADAPTER_H
19 changes: 19 additions & 0 deletions src/Adapters/DataFrameAdapter.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
//
// Created by Gregory Johnson on 8/29/24.
//

#ifndef DATAFRAMEADAPTER_H
#define DATAFRAMEADAPTER_H
#include <Rcpp.h>
#include <unordered_map>
#include <vector>
#include <string>

class DataFrameAdapter {
public:
static Rcpp::DataFrame UnorderedMapToDataFrame(const std::unordered_map<std::string, std::vector<std::string>> &map);
};



#endif //DATAFRAMEADAPTER_H
8 changes: 5 additions & 3 deletions src/Adapters/MatrixAdapter.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,15 +5,17 @@
#ifndef MATRIXADAPTER_H
#define MATRIXADAPTER_H
#include <unordered_map>

#include "CountTableAdapter.h"
#include "../MothurDependencies/ReadPhylipMatrix.h"


class MatrixAdapter {
public:
MatrixAdapter(const std::vector<int> &iIndexes, const std::vector<int> &jIndexes,
const std::vector<double> &dataValues, double cutoff);
const std::vector<double> &dataValues, double cutOff, bool isSimularity, CountTableAdapter table);
~MatrixAdapter() = default;
ReadPhylipMatrix* ReadPhylipFile(const std::string& path);
ReadPhylipMatrix* ReadPhylipFile(const std::string& path) const;
SparseDistanceMatrix* CreateSparseMatrix();
ListVector* GetListVector() const {return phylipReader->getListVector();}
SparseDistanceMatrix* GetSpareDistanceMatrix() const {return phylipReader->getDMatrix();}
Expand All @@ -25,11 +27,11 @@ class MatrixAdapter {

ReadPhylipMatrix* phylipReader;
SparseDistanceMatrix* spareDistanceMatrix = nullptr;
CountTableAdapter countTable;
std::vector<int> xPosition;
std::vector<int> yPosition;
std::vector<double> data;
std::vector<std::string> matrixNames;
double cutoff;
};


Expand Down
3 changes: 3 additions & 0 deletions src/Adapters/OptimatrixAdapter.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@
#include <unordered_map>
#include <sstream>

#include "../RowData.h"


class OptimatrixAdapter {
///Closeness is the data represntation, it is a vector of sets that tells us which values are closes to it ->
Expand All @@ -27,6 +29,7 @@ class OptimatrixAdapter {
}
OptiMatrix* ConvertToOptimatrix(const std::vector<int>
&xPosition, const std::vector<int>& yPosition, const std::vector<double>& data);
OptiMatrix* ConvertToOptimatrix(const std::vector<RowData>&, bool);
std::vector<std::set<long long>> GetCloseness() {return closeness;}
std::vector<std::string> GetNameList() {return nameList;}
std::vector<std::string> GetSingletons() {return singletons;}
Expand Down
19 changes: 10 additions & 9 deletions src/AverageLinkage.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,18 +2,18 @@
// Created by Gregory Johnson on 6/17/24.
//

#include <utility>

#include "MothurDependencies/AverageLinkage.h"

/* This class implements the average UPGMA, average neighbor clustering algorithm */

/***********************************************************************/

AverageLinkage::AverageLinkage(RAbundVector* rav, ListVector* lv, SparseDistanceMatrix* dm, float c, std::string s, float a) :
Cluster(rav, lv, dm, c, s, a)
{
saveRow = -1;
saveCol = -1;
}
AverageLinkage::AverageLinkage(RAbundVector* rav, ListVector* lv, SparseDistanceMatrix* dm,
const float c, std::string s, const float a) :
Cluster(rav, lv, dm, c, std::move(s), a),saveRow(-1), saveCol(-1), rowBin(-1), colBin(-1),totalBin(-1)
{}


/***********************************************************************/
Expand All @@ -27,14 +27,15 @@ std::string AverageLinkage::getTag() {
//This function updates the distance based on the average linkage method.
bool AverageLinkage::updateDistance(PDistCell& colCell, PDistCell& rowCell) {
if ((saveRow != smallRow) || (saveCol != smallCol)) {
rowBin = rabund->get(smallRow);
colBin = rabund->get(smallCol);
rowBin = rabund->get(static_cast<int>(smallRow));
colBin = rabund->get(static_cast<int>(smallCol));
totalBin = rowBin + colBin;
saveRow = smallRow;
saveCol = smallCol;
}

colCell.dist = (colBin * colCell.dist + rowBin * rowCell.dist) / totalBin;
colCell.dist = (static_cast<float>(colBin) * colCell.dist + static_cast<float>(rowBin) * rowCell.dist) /
static_cast<float>(totalBin);

return(true);
}
Loading