# ddr_tree
## DDRTree 模块打包为 Python 包

In [None]:
%%bash
rm -rf build/* *.so
pwd
python setup.py build_ext --inplace

## 将 Python 的 PCA 降维打包为 C++ 库

In [13]:
%%bash
set -x
pwd
rm -rf build

mkdir build && cd build && \
cmake .. && \
cmake --build .

+ pwd
+ rm -rf build


/mnt/ssd/geneplus/develop/liushen/DDRTree/test/ddrtreecpp_call_pypca


+ mkdir build
+ cd build
+ cmake ..


-- The C compiler identification is GNU 11.2.0
-- The CXX compiler identification is GNU 11.2.0
-- Check for working C compiler: /opt/miniconda/bin/x86_64-conda-linux-gnu-cc
-- Check for working C compiler: /opt/miniconda/bin/x86_64-conda-linux-gnu-cc -- works
-- Detecting C compiler ABI info
-- Detecting C compiler ABI info - done
-- Detecting C compile features
-- Detecting C compile features - done
-- Check for working CXX compiler: /opt/miniconda/bin/x86_64-conda-linux-gnu-c++
-- Check for working CXX compiler: /opt/miniconda/bin/x86_64-conda-linux-gnu-c++ -- works
-- Detecting CXX compiler ABI info
-- Detecting CXX compiler ABI info - done
-- Detecting CXX compile features
-- Detecting CXX compile features - done
-- Found Python3: /opt/miniconda/envs/r42/bin/python3 (found version "3.12.3") found components: Interpreter Development 
-- Performing Test HAS_FLTO
-- Performing Test HAS_FLTO - Success
-- Found pybind11: /opt/miniconda/include (found version "2.13.6")
-- Configuring do

+ cmake --build .


make[1]: Entering directory '/mnt/ssd/geneplus/develop/liushen/DDRTree/test/ddrtreecpp_call_pypca/build'
make[2]: Entering directory '/mnt/ssd/geneplus/develop/liushen/DDRTree/test/ddrtreecpp_call_pypca/build'
[35m[1mScanning dependencies of target pca_modul[0m
make[2]: Leaving directory '/mnt/ssd/geneplus/develop/liushen/DDRTree/test/ddrtreecpp_call_pypca/build'
make[2]: Entering directory '/mnt/ssd/geneplus/develop/liushen/DDRTree/test/ddrtreecpp_call_pypca/build'
[ 50%] [32mBuilding CXX object CMakeFiles/pca_modul.dir/DDRTree_wrapper.cpp.o[0m


In file included from /opt/miniconda/envs/r42/lib/R/library/RcppEigen/include/Eigen/Core:205,
                 from /opt/miniconda/include/pybind11/eigen/matrix.h:28,
                 from /opt/miniconda/include/pybind11/eigen.h:12,
                 from /mnt/ssd/geneplus/develop/liushen/DDRTree/test/ddrtreecpp_call_pypca/DDRTree_wrapper.cpp:2:
   46 | typedef eigen_packet_wrapper<__m128i, 0> Packet4i;
      |                                        ^
   47 | typedef eigen_packet_wrapper<__m128i, 1> Packet16b;
      |                                        ^
   49 | template<> struct is_arithmetic<__m128>  { enum { value = true }; };
      |                                       ^
   50 | template<> struct is_arithmetic<__m128i> { enum { value = true }; };
      |                                        ^
   51 | template<> struct is_arithmetic<__m128d> { enum { value = true }; };
      |                                        ^
  222 | template<> struct unpacket_traits<Packet4f> {
     

[100%] [32m[1mLinking CXX shared module libpca_modul.so[0m
make[2]: Leaving directory '/mnt/ssd/geneplus/develop/liushen/DDRTree/test/ddrtreecpp_call_pypca/build'
[100%] Built target pca_modul
make[1]: Leaving directory '/mnt/ssd/geneplus/develop/liushen/DDRTree/test/ddrtreecpp_call_pypca/build'


# test 1 : 测试 DDRTree.cpp 能否成功使用

In [None]:
import numpy as np
from test.ddrtreecpp_call_pypca.DDRTree_py.utils import time_func
from loguru import logger
try:
    from ddr_tree import DDRTree_reduce_dim
except ImportError:
    print("无法导入 ddr_tree 模块或 DDRTree_reduce_dim 函数，请检查模块是否安装正确及函数名是否准确。")
    raise


@time_func
def test_ddr_tree_cpp():

    # 设置随机种子
    np.random.seed(42)

    # 参数设置
    # n_samples = 100      # 样本数量（N）
    # n_features = 200     # 原始特征维度（D）
    n_samples = 5000      # 样本数量（N）
    n_features = 2000     # 原始特征维度（D）
    num_clusters = 100    # 聚类数量（K）
    dimensions = 2      # 降维后的维度（d）
    maxiter = 20        # 最大迭代次数
    sigma = 1e-3        # 高斯核参数
    lambda_ = 0.1       # 正则化参数
    gamma = 10         # 权重参数
    eps = 1e-3          # 收敛阈值
    verbose = True      # 是否输出详细信息

    # 生成可控的随机数据
    R_X = np.random.rand(n_features, n_samples)          # (D x N)
    R_Z = np.random.rand(dimensions, n_samples)          # (d x N)
    R_Y = np.random.rand(dimensions, num_clusters)       # (d x K)
    R_W = np.random.rand(n_features, dimensions)         # (D x d)

    # 检查输入数据是否有异常值
    for name, matrix in zip(['R_X', 'R_Z', 'R_Y', 'R_W'], [R_X, R_Z, R_Y, R_W]):
        assert not np.any(np.isnan(matrix)), f"{name} contains NaN values"
        assert not np.any(np.isinf(matrix)), f"{name} contains Inf values"

    # 调用函数
    logger.warning(f"开始调用 DDRTree_reduce_dim 函数")
    result = DDRTree_reduce_dim(
        R_X, R_Z, R_Y, R_W,
        dimensions, maxiter, num_clusters,
        sigma, lambda_, gamma, eps, verbose
    )

    logger.warning(f"开始调用 DDRTree_reduce_dim 函数结束")
    # 输出结果
    print("-------------------------------------------------------------")
    print("W shape:", result['W'].shape)
    print("Z shape:", result['Z'].shape)
    print("stree shape:", result['stree'].shape)
    print("Y shape:", result['Y'].shape)
    print("X shape:", result['X'].shape)
    print("Q shape:", result['Q'].shape)
    print("R shape:", result['R'].shape)
    print("Objective values:", result['objective_vals'])


if __name__ == "__main__":
    test_ddr_tree_cpp()




X shape: 2000 5000
Z shape: 2 5000
Y shape: 2 100
W shape: 2000 2
************************************** 
Iteration: 0
updating weights in graph
Finding MST
Refreshing B matrix
   B : (100 x 100)
   distZY : (5000 x 100)
   min_dist : (5000 x 100)
distZY_minCoeff = 
   tmp_R : (5000 x 100)
   R : (5000 x 100)
   Gamma : (100 x 100)
   X : (2000 x 5000)
   W : (2000 x 2)
   Z : (2 x 5000)
   L : (100 x 100)
Checking termination criterion
Computing tmp
... stage 1
... stage 2
Pre-computing LLT analysis
tmp is (100x100), 10000 non-zero values
Computing LLT
tmp_dense 5000x100) 
Computing Q 100x5000) 
gamma: 10
   X_in : (2000 x 5000)
   Q : (2000 x 5000)
Computing W
Computing Z
Computing Y
************************************** 
Iteration: 1
updating weights in graph
Finding MST
Refreshing B matrix
   B : (100 x 100)
   distZY : (5000 x 100)
   min_dist : (5000 x 100)
distZY_minCoeff = 
   tmp_R : (5000 x 100)
   R : (5000 x 100)
   Gamma : (100 x 100)
   X : (2000 x 5000)
   W : (2000 x 2

[32m2024-12-16 16:14:13.239[0m | [1mINFO    [0m | [36mutils[0m:[36mwrapper[0m:[36m20[0m - [1m函数 test_ddr_tree_cpp 开始时间： 2024-12-16 16:07:16，整体运行时间: 6.95 min[0m


-------------------------------------------------------------
W shape: (2000, 2)
Z shape: (2, 5000)
stree shape: (5000, 5000)
Y shape: (2, 100)
X shape: (2000, 5000)
Q shape: (2000, 5000)
R shape: (5000, 100)
Objective values: [1.82714606e+06 3.79539258e+16 7.74740201e+15 3.63943177e+15
 3.63933497e+15]
