diff --git a/.gitmodules b/.gitmodules index 470cf466..578e24f9 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,9 +1,12 @@ +[submodule "third_party/googletest"] + path = third_party/googletest + url = https://github.com/google/googletest.git [submodule "third_party/glog"] path = third_party/glog - url = git@github.com:google/glog.git + url = https://github.com/google/glog.git [submodule "third_party/gflags"] path = third_party/gflags - url = git@github.com:gflags/gflags.git + url = https://github.com/gflags/gflags.git [submodule "third_party/eigen"] path = third_party/eigen - url = git@github.com:InfiniTensor/eigen-mirror.git + url = https://github.com/eigenteam/eigen-git-mirror.git diff --git a/CMakeLists.txt b/CMakeLists.txt index df636b27..22dcf791 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -4,6 +4,7 @@ option(USE_CUDA "Support NVIDIA CUDA" OFF) option(PROFILE_MODE "ENABLE PROFILE MODE" OFF) option(USE_OMP "Use OpenMP as backend for Eigen" ON) option(USE_NCCL "Build project for distributed running" ON) +option(BUILD_TEST "Build InfiniTrain tests" OFF) project(infini_train VERSION 0.5.0 LANGUAGES CXX) @@ -14,6 +15,19 @@ set(CMAKE_CXX_EXTENSIONS OFF) # Generate compile_commands.json set(CMAKE_EXPORT_COMPILE_COMMANDS ON) +# ------------------------------------------------------------------------------ +# GoogleTest (submodule) +# ------------------------------------------------------------------------------ +if(BUILD_TEST) + if(NOT EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/third_party/googletest/CMakeLists.txt) + message(FATAL_ERROR "googletest submodule not found at third_party/googletest. " + "Run: git submodule update --init third_party/googletest") + endif() + set(gtest_force_shared_crt ON CACHE BOOL "" FORCE) + add_subdirectory(third_party/googletest) + enable_testing() +endif() + # ------------------------------------------------------------------------------ # Third-party deps # ------------------------------------------------------------------------------ @@ -26,7 +40,9 @@ include_directories(${gflags_SOURCE_DIR}/include) set(WITH_GFLAGS OFF CACHE BOOL "Disable glog finding system gflags" FORCE) set(WITH_GTEST OFF CACHE BOOL "Disable glog finding system gtest" FORCE) add_subdirectory(third_party/glog) +# add_compile_definitions(GLOG_USE_GLOG_EXPORT=1) include_directories(${glog_SOURCE_DIR}/src) +# include_directories(${glog_BINARY_DIR}/glog) # eigen if(USE_OMP) @@ -48,6 +64,10 @@ endif() # Framework core sources (*.cc), excluding cpu kernels (they are built separately) file(GLOB_RECURSE SRC ${PROJECT_SOURCE_DIR}/infini_train/src/*.cc) list(FILTER SRC EXCLUDE REGEX ".*kernels/cpu/.*") +if(NOT USE_CUDA) + list(FILTER SRC EXCLUDE REGEX ".*runtime/cuda/.*") + list(FILTER SRC EXCLUDE REGEX ".*ccl/cuda/.*") +endif() if(NOT USE_NCCL) list(FILTER SRC EXCLUDE REGEX ".*infini_train/src/core/ccl/cuda/.*") endif() @@ -190,17 +210,8 @@ add_executable(llama3 ) link_infini_train_exe(llama3) -# Tools -add_subdirectory(tools/infini_run) -set_target_properties(infini_run PROPERTIES RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}) # Tests -add_executable(test_hook test/hook/test_hook.cc) -link_infini_train_exe(test_hook) - -add_executable(test_precision_check test/hook/test_precision_check.cc) -link_infini_train_exe(test_precision_check) - -add_executable(test_lora test/lora/test_lora.cc) -link_infini_train_exe(test_lora) - +if(BUILD_TEST) + add_subdirectory(tests) +endif() diff --git a/test/hook/test_hook.cc b/test/hook/test_hook.cc deleted file mode 100644 index 32c7e097..00000000 --- a/test/hook/test_hook.cc +++ /dev/null @@ -1,179 +0,0 @@ -#include -#include - -#include "glog/logging.h" - -#include "infini_train/include/autograd/elementwise.h" -#include "infini_train/include/autograd/function.h" -#include "infini_train/include/autograd/function_hook.h" -#include "infini_train/include/common/hook.h" -#include "infini_train/include/nn/modules/module.h" -#include "infini_train/include/nn/parallel/global.h" -#include "infini_train/include/tensor.h" - -using namespace infini_train; - -// ============================================================================ -// Test 1: Basic Module Hooks -// ============================================================================ -void test_basic_hooks() { - std::cout << "\n=== Test 1: Basic Module Hooks ===" << std::endl; - - auto x = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32); - x->set_requires_grad(true); - - // Module hook example - class MyModule : public nn::Module { - public: - MyModule() : Module("MyModule") {} - - std::vector> Forward(const std::vector> &inputs) override { - std::cout << "Forward pass executing..." << std::endl; - return inputs; - } - }; - - auto module = std::make_shared(); - - // Register forward pre-hook - auto pre_hook - = module->RegisterForwardPreHook([](nn::Module *mod, const std::vector> &inputs) { - std::cout << "Forward pre-hook: Module type = " << mod->type() << std::endl; - }); - - // Register forward post-hook - auto fwd_hook - = module->RegisterForwardPostHook([](nn::Module *mod, const std::vector> &inputs, - const std::vector> &outputs) { - std::cout << "Forward post-hook: Got " << outputs.size() << " outputs" << std::endl; - }); - - // Register backward pre-hook - auto bwd_pre_hook = module->RegisterBackwardPreHook( - [](nn::Module *mod, const std::vector> &grad_outputs) { - std::cout << "Backward pre-hook called!" << std::endl; - }); - - // Register backward post-hook - auto bwd_post_hook - = module->RegisterBackwardPostHook([](nn::Module *mod, const std::vector> &grad_inputs, - const std::vector> &grad_outputs) { - std::cout << "Backward post-hook called!" << std::endl; - }); - - // Test forward pass - std::vector> inputs = {x}; - auto outputs = (*module)(inputs); - - std::cout << "Module hook test completed!" << std::endl; -} - -// ============================================================================ -// Test 2: Hook Remove() Functionality Test -// ============================================================================ -void test_hook_remove() { - std::cout << "\n=== Test 2: Hook Remove() Functionality Test ===" << std::endl; - - auto a = std::make_shared(std::vector{2, 2}, DataType::kFLOAT32); - auto b = std::make_shared(std::vector{2, 2}, DataType::kFLOAT32); - a->set_requires_grad(true); - b->set_requires_grad(true); - - int hook1_count = 0; - int hook2_count = 0; - int hook3_count = 0; - - auto add_fn = std::make_shared(); - - // Register three forward pre-hooks - auto handle1 = add_fn->RegisterForwardPreHook( - [&hook1_count](autograd::Function *, const std::vector> &) { - hook1_count++; - std::cout << "Hook 1 called (count: " << hook1_count << ")" << std::endl; - }); - - auto handle2 = add_fn->RegisterForwardPreHook( - [&hook2_count](autograd::Function *, const std::vector> &) { - hook2_count++; - std::cout << "Hook 2 called (count: " << hook2_count << ")" << std::endl; - }); - - auto handle3 = add_fn->RegisterForwardPreHook( - [&hook3_count](autograd::Function *, const std::vector> &) { - hook3_count++; - std::cout << "Hook 3 called (count: " << hook3_count << ")" << std::endl; - }); - - // First call - all hooks should fire - std::cout << "\n--- First Apply (all hooks active) ---" << std::endl; - std::vector> inputs; - inputs.push_back(a); - inputs.push_back(b); - auto result1 = add_fn->Apply(inputs); - std::cout << "Hook counts: " << hook1_count << ", " << hook2_count << ", " << hook3_count << std::endl; - - // Remove hook 2 - std::cout << "\n--- Removing Hook 2 ---" << std::endl; - handle2->Remove(); - - // Second call - hook 2 should not fire - std::cout << "\n--- Second Apply (hook 2 removed) ---" << std::endl; - auto result2 = add_fn->Apply(inputs); - std::cout << "Hook counts: " << hook1_count << ", " << hook2_count << ", " << hook3_count << std::endl; - - // Remove hook 1 - std::cout << "\n--- Removing Hook 1 ---" << std::endl; - handle1->Remove(); - - // Third call - only hook 3 should fire - std::cout << "\n--- Third Apply (hooks 1 and 2 removed) ---" << std::endl; - auto result3 = add_fn->Apply(inputs); - std::cout << "Hook counts: " << hook1_count << ", " << hook2_count << ", " << hook3_count << std::endl; - - // Verify results - std::cout << "\n=== Test Results ===" << std::endl; - bool test_passed = true; - - if (hook1_count != 2) { - std::cout << "FAIL: Hook 1 should be called 2 times, got " << hook1_count << std::endl; - test_passed = false; - } - - if (hook2_count != 1) { - std::cout << "FAIL: Hook 2 should be called 1 time, got " << hook2_count << std::endl; - test_passed = false; - } - - if (hook3_count != 3) { - std::cout << "FAIL: Hook 3 should be called 3 times, got " << hook3_count << std::endl; - test_passed = false; - } - - if (test_passed) { - std::cout << "SUCCESS: All hooks behaved correctly!" << std::endl; - std::cout << " - Hook 1: called 2 times (before removal)" << std::endl; - std::cout << " - Hook 2: called 1 time (removed after first call)" << std::endl; - std::cout << " - Hook 3: called 3 times (never removed)" << std::endl; - } -} - -// ============================================================================ -// Main -// ============================================================================ -int main(int argc, char *argv[]) { - google::InitGoogleLogging(argv[0]); - nn::parallel::global::GlobalEnv::Instance().Init(1, 1, false, 1, 1); - - std::cout << "========================================" << std::endl; - std::cout << " Hook Mechanism Tests" << std::endl; - std::cout << "========================================" << std::endl; - - test_basic_hooks(); - test_hook_remove(); - - std::cout << "\n========================================" << std::endl; - std::cout << " All Tests Completed Successfully" << std::endl; - std::cout << "========================================" << std::endl; - - return 0; -} diff --git a/test/hook/test_precision_check.cc b/test/hook/test_precision_check.cc deleted file mode 100644 index 65c8258c..00000000 --- a/test/hook/test_precision_check.cc +++ /dev/null @@ -1,241 +0,0 @@ -#include -#include -#include - -#include "glog/logging.h" - -#include "infini_train/include/nn/modules/module.h" -#include "infini_train/include/nn/parallel/global.h" -#include "infini_train/include/tensor.h" -#include "infini_train/include/utils/global_module_hook_registry.h" -#include "infini_train/include/utils/precision_check_config.h" -#include "infini_train/include/utils/precision_checker.h" - -using namespace infini_train; - -class MyModel : public nn::Module { -public: - MyModel() : Module("MyModel") {} - - std::vector> Forward(const std::vector> &inputs) override { - auto x = inputs[0]; - x->RequiresGrad(); - auto y = x->Mul(x); - return {y}; - } -}; - -// Simple model for multi-iteration test -class SimpleModel : public nn::Module { -public: - SimpleModel() : Module("SimpleModel") {} - - std::vector> Forward(const std::vector> &inputs) override { - auto x = inputs[0]; - x->RequiresGrad(); - auto y = x->Mul(x)->Mul(x); // x^3 - return {y}; - } -}; - -void RunModelForwardBackward(const std::shared_ptr &model) { - auto x = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32); - x->Fill(2.0f); - x->RequiresGrad(); - - std::vector> inputs = {x}; - auto outputs = (*model)(inputs); - auto loss = outputs[0]->Sum(0, false)->Sum(0, false); - loss->Backward(); -} - -void TestFunctionLevel(const std::string &config_str) { - std::cout << "\n========================================" << std::endl; - std::cout << " Function-Level Test: " << config_str << std::endl; - std::cout << "========================================" << std::endl; - - auto x = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32); - x->Fill(2.0f); - x->RequiresGrad(); - - auto y = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32); - y->Fill(3.0f); - y->RequiresGrad(); - - auto z = x->Mul(y); - auto loss = z->Sum(0, false)->Sum(0, false); - loss->Backward(); - - std::cout << "Test completed." << std::endl; -} - -void TestModuleLevel(const std::string &config_str) { - std::cout << "\n========================================" << std::endl; - std::cout << " Module-Level Test: " << config_str << std::endl; - std::cout << "========================================" << std::endl; - - auto model = std::make_shared(); - RunModelForwardBackward(model); - - std::cout << "Test completed." << std::endl; -} - -// Test: Simple format output (level=2, format=simple) -void TestSimpleFormat() { - std::cout << "\n========================================" << std::endl; - std::cout << " Test: Simple Format (level=2, format=simple)" << std::endl; - std::cout << "========================================" << std::endl; - - auto x = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32); - x->Fill(2.0f); - x->RequiresGrad(); - - auto y = x->Mul(x); - auto loss = y->Sum(0, false)->Sum(0, false); // Two Sum ops to produce scalar - loss->Backward(); - - std::cout << "Simple format test completed - check output for min/max/mean values." << std::endl; -} - -// Test: MD5 format output (level=2, format=md5) -void TestMd5Format() { - std::cout << "\n========================================" << std::endl; - std::cout << " Test: MD5 Format (level=2, format=md5)" << std::endl; - std::cout << "========================================" << std::endl; - - auto x = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32); - x->Fill(2.0f); - x->RequiresGrad(); - - auto y = x->Mul(x); - auto loss = y->Sum(0, false)->Sum(0, false); // Two Sum ops to produce scalar - loss->Backward(); - - std::cout << "MD5 format test completed - check output for md5 hashes." << std::endl; -} - -// Test: Save tensors to NPY files (level=1, save_tensors=true) -void TestSaveTensors() { - std::cout << "\n========================================" << std::endl; - std::cout << " Test: Save Tensors (level=1, save_tensors=true)" << std::endl; - std::cout << "========================================" << std::endl; - - std::string output_path = "/tmp/precision_check_npy"; - - auto model = std::make_shared(); - RunModelForwardBackward(model); - - // Verify NPY files were created - namespace fs = std::filesystem; - bool found_npy = false; - if (fs::exists(output_path)) { - for (const auto &entry : fs::recursive_directory_iterator(output_path)) { - if (entry.path().extension() == ".npy") { - found_npy = true; - std::cout << "Found NPY file: " << entry.path() << std::endl; - } - } - } - - if (found_npy) { - std::cout << "Save tensors test PASSED - NPY files created successfully." << std::endl; - } else { - std::cout << "Save tensors test completed - check output directory for NPY files." << std::endl; - } -} - -// Test: Multi-iteration file overwrite (level=1, save_tensors=true, iter=3) -void TestMultiIterOverwrite() { - std::cout << "\n========================================" << std::endl; - std::cout << " Test: Multi-Iteration File Overwrite" << std::endl; - std::cout << "========================================" << std::endl; - - std::string output_path = "/tmp/precision_check_overwrite"; - - auto model = std::make_shared(); - int num_iters = 3; - - // Run multiple iterations - files should be overwritten - for (int i = 0; i < num_iters; ++i) { - std::cout << "Iteration " << (i + 1) << "/" << num_iters << std::endl; - utils::PrecisionCheckEnv::ResetCounters(); // Reset counters each iteration - RunModelForwardBackward(model); - } - - namespace fs = std::filesystem; - int npy_count = 0; - if (fs::exists(output_path)) { - for (const auto &entry : fs::recursive_directory_iterator(output_path)) { - if (entry.path().extension() == ".npy") { - ++npy_count; - } - } - } - - std::cout << "Multi-iteration test completed - found " << npy_count << " NPY files after " << num_iters - << " iterations." << std::endl; - std::cout << "(Files should be overwritten each iteration, count should be consistent with 1 iter)" << std::endl; -} - -int main(int argc, char *argv[]) { - google::InitGoogleLogging(argv[0]); - - std::string config_str = argc > 1 ? argv[1] : ""; - - std::cout << "========================================" << std::endl; - std::cout << " Precision Check Test Suite" << std::endl; - std::cout << "========================================" << std::endl; - - nn::parallel::global::InitAllEnv(1, 1, false, 1, 1); - - // If no config argument, run all format tests - if (config_str.empty()) { - auto config = utils::PrecisionCheckConfig::Parse("level=2,format=simple"); - utils::PrecisionCheckEnv::Instance().Init(config); - - std::cout << "\nRunning all precision check format tests..." << std::endl; - - // Test 1: Simple format - TestSimpleFormat(); - - // Test 2: MD5 format - auto md5_config = utils::PrecisionCheckConfig::Parse("level=2,format=md5"); - utils::PrecisionCheckEnv::Instance().Init(md5_config); - TestMd5Format(); - - // Test 3: Save tensors - auto npy_config = utils::PrecisionCheckConfig::Parse("level=1,save_tensors=true"); - utils::PrecisionCheckEnv::Instance().Init(npy_config); - TestSaveTensors(); - - // Test 4: Multi-iteration overwrite - auto iter_config = utils::PrecisionCheckConfig::Parse("level=1,save_tensors=true"); - utils::PrecisionCheckEnv::Instance().Init(iter_config); - TestMultiIterOverwrite(); - - std::cout << "\n========================================" << std::endl; - std::cout << " All Tests Completed Successfully" << std::endl; - std::cout << "========================================" << std::endl; - return 0; - } - - // If config provided, run single test (original behavior) - auto config = utils::PrecisionCheckConfig::Parse(config_str); - utils::PrecisionCheckEnv::Instance().Init(config); - - std::cout << "Config: " << config_str << std::endl; - - if (config.level == utils::PrecisionCheckLevel::MODULE) { - TestModuleLevel(config_str); - } else if (config.level == utils::PrecisionCheckLevel::FUNCTION) { - TestFunctionLevel(config_str); - } else { - std::cout << "No tests to run (level=0)" << std::endl; - } - - std::cout << "\n========================================" << std::endl; - std::cout << " Test Completed" << std::endl; - std::cout << "========================================" << std::endl; - - return 0; -} diff --git a/test/lora/test_lora.cc b/test/lora/test_lora.cc deleted file mode 100644 index 06966809..00000000 --- a/test/lora/test_lora.cc +++ /dev/null @@ -1,860 +0,0 @@ -#include -#include -#include - -#include "glog/logging.h" - -#include "infini_train/include/nn/lora/lora_config.h" -#include "infini_train/include/nn/lora/lora_linear.h" -#include "infini_train/include/nn/lora/lora_utils.h" -#include "infini_train/include/nn/modules/container.h" -#include "infini_train/include/nn/modules/linear.h" -#include "infini_train/include/nn/modules/module.h" -#include "infini_train/include/nn/parallel/global.h" -#include "infini_train/include/tensor.h" - -using namespace infini_train; -using namespace infini_train::nn::lora; - -// ============================================================================ -// Test 1: LoRAConfig -// ============================================================================ -void test_lora_config() { - std::cout << "\n=== Test 1: LoRAConfig ===" << std::endl; - - LoRAConfig config; - config.rank = 8; - config.alpha = 16.0f; - - // Test scaling calculation - float expected_scaling = 16.0f / 8.0f; - CHECK_EQ(config.Scaling(), expected_scaling) << "Scaling calculation failed"; - std::cout << "Scaling: " << config.Scaling() << " (expected: " << expected_scaling << ")" << std::endl; - - // Test ShouldApplyLoRA - CHECK(config.ShouldApplyLoRA("c_attn")) << "Should match c_attn"; - CHECK(config.ShouldApplyLoRA("transformer.h.0.attn.c_attn")) << "Should match nested c_attn"; - CHECK(config.ShouldApplyLoRA("c_proj")) << "Should match c_proj"; - CHECK(!config.ShouldApplyLoRA("c_fc")) << "Should not match c_fc (not in default targets)"; - CHECK(!config.ShouldApplyLoRA("random_layer")) << "Should not match random_layer"; - - std::cout << "LoRAConfig tests passed!" << std::endl; -} - -// ============================================================================ -// Test 2: LoRALinear Initialization -// ============================================================================ -void test_lora_linear_init() { - std::cout << "\n=== Test 2: LoRALinear Initialization ===" << std::endl; - - LoRAConfig config; - config.rank = 4; - config.alpha = 8.0f; - - int64_t in_features = 64; - int64_t out_features = 128; - - auto lora_linear - = std::shared_ptr(new LoRALinear(in_features, out_features, config, /*bias=*/true, nullptr)); - - // Check parameter shapes - auto weight = lora_linear->parameter(nn::Linear::kParamWeightName); - auto bias = lora_linear->parameter(nn::Linear::kParamBiasName); - auto lora_A = lora_linear->parameter(LoRALinear::kParamLoraAName); - auto lora_B = lora_linear->parameter(LoRALinear::kParamLoraBName); - - CHECK_EQ(weight->Dims().size(), 2); - CHECK_EQ(weight->Dims()[0], out_features); - CHECK_EQ(weight->Dims()[1], in_features); - std::cout << "Weight shape: [" << weight->Dims()[0] << ", " << weight->Dims()[1] << "]" << std::endl; - - CHECK_EQ(bias->Dims().size(), 1); - CHECK_EQ(bias->Dims()[0], out_features); - std::cout << "Bias shape: [" << bias->Dims()[0] << "]" << std::endl; - - CHECK_EQ(lora_A->Dims().size(), 2); - CHECK_EQ(lora_A->Dims()[0], config.rank); - CHECK_EQ(lora_A->Dims()[1], in_features); - std::cout << "LoRA A shape: [" << lora_A->Dims()[0] << ", " << lora_A->Dims()[1] << "]" << std::endl; - - CHECK_EQ(lora_B->Dims().size(), 2); - CHECK_EQ(lora_B->Dims()[0], out_features); - CHECK_EQ(lora_B->Dims()[1], config.rank); - std::cout << "LoRA B shape: [" << lora_B->Dims()[0] << ", " << lora_B->Dims()[1] << "]" << std::endl; - - // Check requires_grad - CHECK(!weight->requires_grad()) << "Base weight should be frozen"; - CHECK(!bias->requires_grad()) << "Base bias should be frozen"; - CHECK(lora_A->requires_grad()) << "LoRA A should be trainable"; - CHECK(lora_B->requires_grad()) << "LoRA B should be trainable"; - std::cout << "requires_grad check passed!" << std::endl; - - // Check LoRAParameters() returns only LoRA params - auto params = lora_linear->LoRAParameters(); - CHECK_EQ(params.size(), 2) << "LoRAParameters() should return only LoRA params"; - std::cout << "LoRAParameters() returns " << params.size() << " tensors (LoRA A and B)" << std::endl; - - std::cout << "LoRALinear initialization tests passed!" << std::endl; -} - -// ============================================================================ -// Test 3: LoRALinear Forward Pass -// ============================================================================ -void test_lora_linear_forward() { - std::cout << "\n=== Test 3: LoRALinear Forward Pass ===" << std::endl; - - LoRAConfig config; - config.rank = 4; - config.alpha = 8.0f; - - int64_t in_features = 64; - int64_t out_features = 128; - int64_t batch_size = 2; - int64_t seq_len = 10; - - auto lora_linear - = std::shared_ptr(new LoRALinear(in_features, out_features, config, /*bias=*/true, nullptr)); - - // Create input tensor - auto input = std::make_shared(std::vector{batch_size, seq_len, in_features}, DataType::kFLOAT32); - - // Forward pass - auto output = (*lora_linear)({input})[0]; - - // Check output shape - CHECK_EQ(output->Dims().size(), 3); - CHECK_EQ(output->Dims()[0], batch_size); - CHECK_EQ(output->Dims()[1], seq_len); - CHECK_EQ(output->Dims()[2], out_features); - std::cout << "Output shape: [" << output->Dims()[0] << ", " << output->Dims()[1] << ", " << output->Dims()[2] << "]" - << std::endl; - - std::cout << "LoRALinear forward pass tests passed!" << std::endl; -} - -// ============================================================================ -// Test 4: LoRALinear Weight Merging -// ============================================================================ -void test_lora_linear_merge() { - std::cout << "\n=== Test 4: LoRALinear Weight Merging ===" << std::endl; - - LoRAConfig config; - config.rank = 4; - config.alpha = 8.0f; - - int64_t in_features = 32; - int64_t out_features = 64; - - auto lora_linear - = std::shared_ptr(new LoRALinear(in_features, out_features, config, /*bias=*/false, nullptr)); - - // Print weight sum before merge - auto weight_before = lora_linear->parameter(nn::Linear::kParamWeightName); - auto lora_A = lora_linear->parameter(LoRALinear::kParamLoraAName); - auto lora_B = lora_linear->parameter(LoRALinear::kParamLoraBName); - - float weight_before_sum = weight_before->EigenMatrix().sum(); - float lora_A_sum = lora_A->EigenMatrix().sum(); - float lora_B_sum = lora_B->EigenMatrix().sum(); - - std::cout << "\n--- Before Merge ---" << std::endl; - std::cout << "Base weight sum: " << weight_before_sum << std::endl; - std::cout << "LoRA A sum: " << lora_A_sum << std::endl; - std::cout << "LoRA B sum: " << lora_B_sum << std::endl; - std::cout << "Scaling (alpha/r): " << config.Scaling() << std::endl; - - // Create input - auto input = std::make_shared(std::vector{2, 5, in_features}, DataType::kFLOAT32); - input->EigenMatrix().setRandom(); - - // Get output before merge - auto output_before = (*lora_linear)({input})[0]; - float output_before_sum = output_before->EigenMatrix().sum(); - std::cout << "Output sum before merge: " << output_before_sum << std::endl; - - // Merge weights - CHECK(!lora_linear->IsMerged()) << "Should not be merged initially"; - lora_linear->MergeWeights(); - CHECK(lora_linear->IsMerged()) << "Should be merged after MergeWeights()"; - - // Verify LoRA params are frozen after merge - CHECK(!lora_A->requires_grad()) << "lora_A should be frozen after merge"; - CHECK(!lora_B->requires_grad()) << "lora_B should be frozen after merge"; - std::cout << "\nWeights merged successfully, LoRA params frozen" << std::endl; - - // Print weight sum after merge - auto weight_after = lora_linear->parameter(nn::Linear::kParamWeightName); - float weight_after_sum = weight_after->EigenMatrix().sum(); - std::cout << "\n--- After Merge ---" << std::endl; - std::cout << "Base weight sum after merge: " << weight_after_sum << std::endl; - std::cout << "Weight change (should be ~LoRA contribution): " << (weight_after_sum - weight_before_sum) - << std::endl; - - // Get output after merge - auto output_merged = (*lora_linear)({input})[0]; - float output_merged_sum = output_merged->EigenMatrix().sum(); - std::cout << "Output sum after merge: " << output_merged_sum << std::endl; - - // Verify: output_after should equal output_before (numerically) - std::cout << "\nVerification: output_before == output_after? " << std::endl; - std::cout << " Before: " << output_before_sum << std::endl; - std::cout << " After: " << output_merged_sum << std::endl; - std::cout << " Diff: " << std::abs(output_before_sum - output_merged_sum) << std::endl; - CHECK(std::abs(output_before_sum - output_merged_sum) < 1e-3) << "Outputs should be numerically identical!"; - - // Shape comparison (always same) - std::cout << "\nOutput shape: [" << output_before->Dims()[0] << ", " << output_before->Dims()[1] << ", " - << output_before->Dims()[2] << "] (unchanged)" << std::endl; - - // Unmerge weights - lora_linear->UnmergeWeights(); - CHECK(!lora_linear->IsMerged()) << "Should not be merged after UnmergeWeights()"; - - // Verify LoRA params are trainable again after unmerge - CHECK(lora_A->requires_grad()) << "lora_A should be trainable after unmerge"; - CHECK(lora_B->requires_grad()) << "lora_B should be trainable after unmerge"; - - // Print weight sum after unmerge - auto weight_unmerged = lora_linear->parameter(nn::Linear::kParamWeightName); - float weight_unmerged_sum = weight_unmerged->EigenMatrix().sum(); - std::cout << "\n--- After Unmerge ---" << std::endl; - std::cout << "Base weight sum after unmerge: " << weight_unmerged_sum << std::endl; - - // Verify: weight should be restored to original value - std::cout << "\nVerification: weight restored after unmerge? " << std::endl; - std::cout << " Original: " << weight_before_sum << std::endl; - std::cout << " Unmerged: " << weight_unmerged_sum << std::endl; - std::cout << " Diff: " << std::abs(weight_before_sum - weight_unmerged_sum) << std::endl; - CHECK(std::abs(weight_before_sum - weight_unmerged_sum) < 1e-4) << "Weight should be restored!"; - - // Get output after unmerge - auto output_unmerged = (*lora_linear)({input})[0]; - float output_unmerged_sum = output_unmerged->EigenMatrix().sum(); - std::cout << "Output sum after unmerge: " << output_unmerged_sum << std::endl; - - // Shape comparison: merge doesn't change shape, only weights - CHECK(output_before->Dims() == output_merged->Dims()) << "Shape should be identical after merge"; - CHECK(output_merged->Dims() == output_unmerged->Dims()) << "Shape should be identical after unmerge"; - - std::cout << "\nLoRALinear weight merging tests passed!" << std::endl; -} - -// ============================================================================ -// Test 5: LoRA Utility Functions -// ============================================================================ -void test_lora_utils() { - std::cout << "\n=== Test 5: LoRA Utility Functions ===" << std::endl; - - LoRAConfig config; - config.rank = 4; - config.alpha = 8.0f; - - auto lora_linear = std::shared_ptr(new LoRALinear(32, 64, config, /*bias=*/true, nullptr)); - - // Test GetLoRAParameters - auto lora_params = GetLoRAParameters(lora_linear); - CHECK_EQ(lora_params.size(), 2) << "Should have 2 LoRA parameters"; - std::cout << "GetLoRAParameters returned " << lora_params.size() << " parameters" << std::endl; - - // Test CountTrainableParameters - int64_t trainable = CountTrainableParameters(lora_linear); - int64_t expected_trainable = config.rank * 32 + 64 * config.rank; // A: [4, 32], B: [64, 4] - CHECK_EQ(trainable, expected_trainable) << "Trainable parameter count mismatch"; - std::cout << "Trainable parameters: " << trainable << " (expected: " << expected_trainable << ")" << std::endl; - - // Test CountTotalParameters - int64_t total = CountTotalParameters(lora_linear); - int64_t expected_total = 64 * 32 + 64 + config.rank * 32 + 64 * config.rank; // weight + bias + A + B - CHECK_EQ(total, expected_total) << "Total parameter count mismatch"; - std::cout << "Total parameters: " << total << " (expected: " << expected_total << ")" << std::endl; - - // Test PrintLoRASummary - std::cout << "\nLoRA Summary:" << std::endl; - PrintLoRASummary(lora_linear); - - std::cout << "LoRA utility function tests passed!" << std::endl; -} - -// ============================================================================ -// Test 6: LoRALinear from existing Linear -// ============================================================================ -void test_lora_from_linear() { - std::cout << "\n=== Test 6: LoRALinear from existing Linear ===" << std::endl; - - // Create a standard Linear layer - auto linear = std::make_shared(64, 128, /*bias=*/true); - - // Wrap it with LoRA - LoRAConfig config; - config.rank = 8; - config.alpha = 16.0f; - - auto lora_linear = std::make_shared(linear, config); - - // Check dimensions - CHECK_EQ(lora_linear->in_features(), 64); - CHECK_EQ(lora_linear->out_features(), 128); - CHECK_EQ(lora_linear->rank(), 8); - std::cout << "LoRALinear created from Linear: in=" << lora_linear->in_features() - << ", out=" << lora_linear->out_features() << ", rank=" << lora_linear->rank() << std::endl; - - // Test forward pass - auto input = std::make_shared(std::vector{2, 10, 64}, DataType::kFLOAT32); - auto output = (*lora_linear)({input})[0]; - - CHECK_EQ(output->Dims()[0], 2); - CHECK_EQ(output->Dims()[1], 10); - CHECK_EQ(output->Dims()[2], 128); - std::cout << "Forward pass successful, output shape: [" << output->Dims()[0] << ", " << output->Dims()[1] << ", " - << output->Dims()[2] << "]" << std::endl; - - std::cout << "LoRALinear from existing Linear tests passed!" << std::endl; -} - -// ============================================================================ -// Test 7: LoRALinear from existing Linear (tests LoRA utilities) -// ============================================================================ -void test_lora_model_wrapper() { - std::cout << "\n=== Test 7: LoRALinear from existing Linear ===" << std::endl; - - // Create LoRA config - LoRAConfig lora_config; - lora_config.rank = 8; - lora_config.alpha = 16.0f; - - // Create base Linear module (simple test without InjectLoRALayers) - auto base_linear = std::make_shared(64, 128, /*bias=*/true); - - // Create a minimal wrapper test by manually testing what LoRAModel does - // Apply LoRA directly to the Linear layer - auto lora_linear = std::make_shared(base_linear, lora_config); - - // Replace the base_linear in its container - // Note: In a real use case, you would use InjectLoRALayers on a transformer model - - // Test GetLoRAParameters on the LoRA Linear - auto lora_params = GetLoRAParameters(lora_linear); - CHECK_GT(lora_params.size(), 0) << "Should have trainable parameters"; - std::cout << "LoRA parameters extracted: " << lora_params.size() << std::endl; - - // Test CountTrainableParameters - int64_t trainable = CountTrainableParameters(lora_linear); - CHECK_EQ(trainable, lora_config.rank * 64 + 128 * lora_config.rank); - std::cout << "Trainable parameters: " << trainable << std::endl; - - // Test PrintSummary - std::cout << "\nLoRA Summary for Linear wrapper:" << std::endl; - PrintLoRASummary(lora_linear); - - // Test Save/Load LoRA on the LoRA Linear - const std::string test_path = "/tmp/test_lora_linear.bin"; - SaveLoRAWeights(lora_linear, test_path); - std::cout << "SaveLoRAWeights completed" << std::endl; - - LoadLoRAWeights(lora_linear, test_path); - std::cout << "LoadLoRAWeights completed" << std::endl; - - // Test Merge/Unmerge on LoRA Linear - CHECK(!lora_linear->IsMerged()) << "Should not be merged initially"; - lora_linear->MergeWeights(); - CHECK(lora_linear->IsMerged()) << "Should be merged after MergeWeights()"; - std::cout << "MergeWeights completed" << std::endl; - - lora_linear->UnmergeWeights(); - CHECK(!lora_linear->IsMerged()) << "Should be unmerged after UnmergeWeights()"; - std::cout << "UnmergeWeights completed" << std::endl; - - std::cout << "LoRALinear utility tests passed!" << std::endl; -} - -// ============================================================================ -// Test 8: Save/Load LoRA Weights -// ============================================================================ -void test_lora_save_load_weights() { - std::cout << "\n=== Test 8: Save/Load LoRA Weights ===" << std::endl; - - // Create a LoRALinear - LoRAConfig config; - config.rank = 4; - config.alpha = 8.0f; - - int64_t in_features = 32; - int64_t out_features = 64; - - auto linear = std::make_shared(in_features, out_features, /*bias=*/true); - auto lora_linear = std::make_shared(linear, config); - - // Get references to lora_A and lora_B - auto lora_A = lora_linear->parameter(LoRALinear::kParamLoraAName); - auto lora_B = lora_linear->parameter(LoRALinear::kParamLoraBName); - - // Set specific values to lora_A and lora_B - // lora_A: [rank, in_features] = [4, 32] - // lora_B: [out_features, rank] = [64, 4] - lora_A->EigenMatrix().setZero(); - lora_B->EigenMatrix().setZero(); - - // Set lora_A to all 1s - for (int64_t i = 0; i < lora_A->Dims()[0]; ++i) { - for (int64_t j = 0; j < lora_A->Dims()[1]; ++j) { lora_A->EigenMatrix()(i, j) = 1.0f; } - } - - // Set lora_B to all 2s - for (int64_t i = 0; i < lora_B->Dims()[0]; ++i) { - for (int64_t j = 0; j < lora_B->Dims()[1]; ++j) { lora_B->EigenMatrix()(i, j) = 2.0f; } - } - - // Record original sums - float lora_A_sum_orig = lora_A->EigenMatrix().sum(); - float lora_B_sum_orig = lora_B->EigenMatrix().sum(); - // lora_A: all 1.0f, shape [rank, in_features] = [4, 32] - // lora_B: all 2.0f, shape [out_features, rank] = [64, 4] - float expected_lora_A_sum = config.rank * in_features * 1.0f; // 4 * 32 * 1 = 128 - float expected_lora_B_sum = out_features * config.rank * 2.0f; // 64 * 4 * 2 = 512 - std::cout << "Original lora_A sum: " << lora_A_sum_orig << " (expected: " << expected_lora_A_sum << ")" - << std::endl; - std::cout << "Original lora_B sum: " << lora_B_sum_orig << " (expected: " << expected_lora_B_sum << ")" - << std::endl; - - CHECK_EQ(lora_A_sum_orig, expected_lora_A_sum); - CHECK_EQ(lora_B_sum_orig, expected_lora_B_sum); - - // Save to file - const std::string test_path = "/tmp/test_lora_save_load.bin"; - SaveLoRAWeights(lora_linear, test_path); - std::cout << "Saved LoRA weights to: " << test_path << std::endl; - - // Modify weights to different values - lora_A->EigenMatrix().setConstant(9.0f); - lora_B->EigenMatrix().setConstant(9.0f); - - float lora_A_sum_modified = lora_A->EigenMatrix().sum(); - float lora_B_sum_modified = lora_B->EigenMatrix().sum(); - std::cout << "Modified lora_A sum: " << lora_A_sum_modified << std::endl; - std::cout << "Modified lora_B sum: " << lora_B_sum_modified << std::endl; - - CHECK_NE(lora_A_sum_modified, lora_A_sum_orig); - CHECK_NE(lora_B_sum_modified, lora_B_sum_orig); - - // Load from file - LoadLoRAWeights(lora_linear, test_path); - std::cout << "Loaded LoRA weights from: " << test_path << std::endl; - - // Verify weights are restored - float lora_A_sum_loaded = lora_A->EigenMatrix().sum(); - float lora_B_sum_loaded = lora_B->EigenMatrix().sum(); - std::cout << "Loaded lora_A sum: " << lora_A_sum_loaded << std::endl; - std::cout << "Loaded lora_B sum: " << lora_B_sum_loaded << std::endl; - - CHECK_EQ(lora_A_sum_loaded, lora_A_sum_orig) << "lora_A should be restored to original values"; - CHECK_EQ(lora_B_sum_loaded, lora_B_sum_orig) << "lora_B should be restored to original values"; - - // Also verify individual elements - for (int64_t i = 0; i < lora_A->Dims()[0]; ++i) { - for (int64_t j = 0; j < lora_A->Dims()[1]; ++j) { - CHECK_EQ(lora_A->EigenMatrix()(i, j), 1.0f) << "lora_A element mismatch at (" << i << "," << j << ")"; - } - } - - for (int64_t i = 0; i < lora_B->Dims()[0]; ++i) { - for (int64_t j = 0; j < lora_B->Dims()[1]; ++j) { - CHECK_EQ(lora_B->EigenMatrix()(i, j), 2.0f) << "lora_B element mismatch at (" << i << "," << j << ")"; - } - } - - std::cout << "All elements verified correctly!" << std::endl; - - // Cleanup - std::remove(test_path.c_str()); - std::cout << "Test 8: Save/Load LoRA Weights passed!" << std::endl; -} - -// ============================================================================ -// Test 8: ParseLoRATargetModules parsing -// ============================================================================ -void test_set_target_modules() { - std::cout << "\n=== Test 8: ParseLoRATargetModules Parsing ===" << std::endl; - - // Test single target - auto modules = ParseLoRATargetModules("c_attn"); - CHECK_EQ(modules.size(), 1); - CHECK(modules.count("c_attn")); - std::cout << "Single target: OK" << std::endl; - - // Test multiple targets - modules = ParseLoRATargetModules("c_attn,c_proj,c_fc"); - CHECK_EQ(modules.size(), 3); - CHECK(modules.count("c_attn")); - CHECK(modules.count("c_proj")); - CHECK(modules.count("c_fc")); - std::cout << "Multiple targets: OK" << std::endl; - - // Test with spaces - modules = ParseLoRATargetModules("c_attn, c_proj , c_fc"); - CHECK_EQ(modules.size(), 3); - std::cout << "Targets with spaces: OK" << std::endl; - - // Test empty/whitespace - modules = ParseLoRATargetModules("c_attn,,c_proj"); - CHECK_EQ(modules.size(), 2); - std::cout << "Empty entries ignored: OK" << std::endl; - - std::cout << "ParseLoRATargetModules tests passed!" << std::endl; -} - -// ============================================================================ -// Test 9: ShouldApplyLoRA edge cases (attn.c_proj vs mlp.c_proj) -// ============================================================================ -void test_should_apply_lora_edge_cases() { - std::cout << "\n=== Test 9: ShouldApplyLoRA Edge Cases ===" << std::endl; - - // Test: Only attn.c_proj in target_modules - { - LoRAConfig config{8, 16.0f, 0.0f, ParseLoRATargetModules("c_attn,attn.c_proj")}; - - // Should match attention paths - CHECK(config.ShouldApplyLoRA("attn.c_proj")); - CHECK(config.ShouldApplyLoRA("transformer.h.0.attn.c_proj")); - CHECK(config.ShouldApplyLoRA("transformer.h.1.attn.c_proj")); - - // Should NOT match mlp paths - CHECK(!config.ShouldApplyLoRA("mlp.c_proj")); - CHECK(!config.ShouldApplyLoRA("transformer.h.0.mlp.c_proj")); - std::cout << "attn.c_proj only: OK" << std::endl; - } - - // Test: Only mlp.c_proj in target_modules - { - LoRAConfig config{8, 16.0f, 0.0f, ParseLoRATargetModules("c_attn,mlp.c_proj")}; - - // Should NOT match attention paths - CHECK(!config.ShouldApplyLoRA("attn.c_proj")); - CHECK(!config.ShouldApplyLoRA("transformer.h.0.attn.c_proj")); - - // Should match mlp paths - CHECK(config.ShouldApplyLoRA("mlp.c_proj")); - CHECK(config.ShouldApplyLoRA("transformer.h.0.mlp.c_proj")); - std::cout << "mlp.c_proj only: OK" << std::endl; - } - - // Test: Generic c_proj in target_modules (matches both) - { - LoRAConfig config{8, 16.0f, 0.0f, ParseLoRATargetModules("c_attn,c_proj")}; - - // Should match both attention and mlp - CHECK(config.ShouldApplyLoRA("transformer.h.0.attn.c_proj")); - CHECK(config.ShouldApplyLoRA("transformer.h.0.mlp.c_proj")); - std::cout << "Generic c_proj (matches both): OK" << std::endl; - } - - // Test: All targets - { - LoRAConfig config{8, 16.0f, 0.0f, ParseLoRATargetModules("c_attn,attn.c_proj,c_fc,c_fc2,mlp.c_proj")}; - - CHECK(config.ShouldApplyLoRA("transformer.h.0.attn.c_attn")); - CHECK(config.ShouldApplyLoRA("transformer.h.0.attn.c_proj")); - CHECK(config.ShouldApplyLoRA("transformer.h.0.mlp.c_fc")); - CHECK(config.ShouldApplyLoRA("transformer.h.0.mlp.c_fc2")); - CHECK(config.ShouldApplyLoRA("transformer.h.0.mlp.c_proj")); - std::cout << "All targets: OK" << std::endl; - } - - std::cout << "ShouldApplyLoRA edge cases tests passed!" << std::endl; -} - -// ============================================================================ -// Test 10: ReplaceModuleByPath -// ============================================================================ -void test_replace_module_by_path() { - std::cout << "\n=== Test 10: ReplaceModuleByPath ===" << std::endl; - - // Test ReplaceModuleByPath by wrapping a Linear with LoRA directly - // This tests the core functionality that ReplaceModuleByPath provides - - // Create base Linear - auto base_linear = std::make_shared(64, 128, /*bias=*/true); - - // Configure LoRA - LoRAConfig lora_config; - lora_config.rank = 4; - lora_config.alpha = 8.0f; - - // Wrap with LoRA - this is what ReplaceModuleByPath does internally - auto lora_linear = std::make_shared(base_linear, lora_config); - - // Verify LoRA was applied correctly - auto params = lora_linear->LoRAParameters(); - CHECK_EQ(params.size(), 2) << "LoRALinear should have 2 trainable parameters (lora_A and lora_B)"; - std::cout << "LoRALinear has " << params.size() << " trainable parameters" << std::endl; - - // Verify parameter shapes - auto lora_a = params[0]; - auto lora_b = params[1]; - CHECK_EQ(lora_a->Dims()[0], lora_config.rank); // rank x in_features - CHECK_EQ(lora_a->Dims()[1], 64); - CHECK_EQ(lora_b->Dims()[0], 128); // out_features x rank - CHECK_EQ(lora_b->Dims()[1], lora_config.rank); - std::cout << "LoRA parameter shapes: OK" << std::endl; - - // Verify base parameters are frozen (use named parameters instead of index) - auto weight = lora_linear->parameter(nn::Linear::kParamWeightName); - auto lora_a_param = lora_linear->parameter(LoRALinear::kParamLoraAName); - auto lora_b_param = lora_linear->parameter(LoRALinear::kParamLoraBName); - CHECK(weight != nullptr); - CHECK(lora_a_param != nullptr); - CHECK(lora_b_param != nullptr); - CHECK(!weight->requires_grad()); // weight is frozen - CHECK(lora_a_param->requires_grad()); // lora_A is trainable - CHECK(lora_b_param->requires_grad()); // lora_B is trainable - std::cout << "Base weight frozen, LoRA params trainable: OK" << std::endl; - - std::cout << "ReplaceModuleByPath tests passed!" << std::endl; -} - -// ============================================================================ -// Test 11: FreezeBaseModel / UnfreezeModel -// ============================================================================ -void test_freeze_unfreeze() { - std::cout << "\n=== Test 11: FreezeBaseModel / UnfreezeModel ===" << std::endl; - - // Test with LoRALinear directly - it has both base and LoRA params - LoRAConfig lora_config; - lora_config.rank = 4; - lora_config.alpha = 8.0f; - - auto linear = std::make_shared(64, 128, /*bias=*/true); - auto lora_linear = std::make_shared(linear, lora_config); - - // Get all parameters from LoRALinear (includes base + LoRA) - auto all_params = lora_linear->Parameters(); - - // Initially only LoRA params should be trainable (base weights are frozen by constructor) - int64_t total_params = 0; - for (const auto &p : all_params) { - if (p->requires_grad()) { - total_params += p->NumElements(); - } - } - // Expected: only LoRA params (lora_A + lora_B) = 4*64 + 128*4 = 256 + 512 = 768 - // Note: LoRALinear freezes base weights in constructor by design - int64_t expected_total = lora_config.rank * 64 + 128 * lora_config.rank; - CHECK_EQ(total_params, expected_total); - std::cout << "Initial trainable params: " << total_params << " (expected: " << expected_total << ")" << std::endl; - - // FreezeBaseModel on LoRALinear - FreezeBaseModel(lora_linear); - - // After freeze, only LoRA params should be trainable - int64_t after_freeze = 0; - for (const auto &p : all_params) { - if (p->requires_grad()) { - after_freeze += p->NumElements(); - } - } - // LoRA params: A (rank x in) + B (out x rank) = 4*64 + 128*4 = 256 + 512 = 768 - int64_t expected_lora = lora_config.rank * 64 + 128 * lora_config.rank; - CHECK_EQ(after_freeze, expected_lora); - std::cout << "After freeze trainable: " << after_freeze << " (expected: " << expected_lora << ")" << std::endl; - - // Unfreeze all - UnfreezeModel(lora_linear); - int64_t after_unfreeze = 0; - for (const auto &p : all_params) { - if (p->requires_grad()) { - after_unfreeze += p->NumElements(); - } - } - // Should be back to all params trainable (base + LoRA) - int64_t expected_after_unfreeze = 64 * 128 + 128 + lora_config.rank * 64 + 128 * lora_config.rank; - CHECK_EQ(after_unfreeze, expected_after_unfreeze); - std::cout << "After unfreeze trainable: " << after_unfreeze << std::endl; - - std::cout << "FreezeBaseModel / UnfreezeModel tests passed!" << std::endl; -} - -// ============================================================================ -// Test 12: LoRAStateDict -// ============================================================================ -void test_lora_state_dict() { - std::cout << "\n=== Test 12: LoRAStateDict ===" << std::endl; - - // Test with a single LoRALinear - LoRAConfig lora_config; - lora_config.rank = 4; - lora_config.alpha = 8.0f; - - auto linear = std::make_shared(64, 128, /*bias=*/true); - auto lora_linear = std::make_shared(linear, lora_config); - - // Get state dict - it contains all parameters with their names - auto state_dict = lora_linear->StateDict(); - - // Check that we have all expected parameters - CHECK(state_dict.count("weight")) << "Should have weight parameter"; - CHECK(state_dict.count("bias")) << "Should have bias parameter"; - CHECK(state_dict.count("lora_A")) << "Should have lora_A parameter"; - CHECK(state_dict.count("lora_B")) << "Should have lora_B parameter"; - std::cout << "State dict contains: weight, bias, lora_A, lora_B" << std::endl; - - // Verify LoRA parameters exist and are trainable - CHECK(state_dict.at("lora_A")->requires_grad()) << "lora_A should be trainable"; - CHECK(state_dict.at("lora_B")->requires_grad()) << "lora_B should be trainable"; - CHECK(!state_dict.at("weight")->requires_grad()) << "weight should be frozen"; - std::cout << "LoRA parameters are trainable, base weight is frozen: OK" << std::endl; - - // Verify shapes - CHECK_EQ(state_dict.at("lora_A")->Dims()[0], lora_config.rank); - CHECK_EQ(state_dict.at("lora_A")->Dims()[1], 64); - CHECK_EQ(state_dict.at("lora_B")->Dims()[0], 128); - CHECK_EQ(state_dict.at("lora_B")->Dims()[1], lora_config.rank); - std::cout << "LoRA parameter shapes: OK" << std::endl; - - std::cout << "LoRAStateDict tests passed!" << std::endl; -} - -// ============================================================================ -// Test 13: GetLoRAModel simplified API -// ============================================================================ -void test_get_lora_model() { - std::cout << "\n=== Test 13: GetLoRAModel Simplified API ===" << std::endl; - - // Test GetLoRAModel with a simple Linear layer - // We'll wrap it with LoRA directly and verify the wrapper works - - // Create base Linear - auto base_linear = std::make_shared(64, 128, /*bias=*/true); - - // Configure LoRA - LoRAConfig config{4, 8.0f, 0.0f, ParseLoRATargetModules("Linear")}; - - // Use GetLoRAModel with the linear as the "model" - // Note: GetLoRAModel returns the modified model (in-place injection) - auto model = GetLoRAModel(base_linear, config); - - CHECK(model != nullptr); - std::cout << "GetLoRAModel returned valid pointer" << std::endl; - - // Test that LoRA was applied - check trainable parameters - auto lora_params = GetLoRAParameters(model); - // GetLoRAParameters returns vector>, size() is the count of tensors - // LoRALinear has 2 trainable tensors: lora_A (rank x in) and lora_B (out x rank) - CHECK_EQ(lora_params.size(), 2); - std::cout << "Trainable parameter tensors: " << lora_params.size() << " (expected: 2)" << std::endl; - - // Also verify total element count - int64_t total_elements = 0; - for (const auto &t : lora_params) { total_elements += t->NumElements(); } - int64_t expected_elements = config.rank * 64 + 128 * config.rank; // 768 - CHECK_EQ(total_elements, expected_elements); - std::cout << "Total trainable elements: " << total_elements << " (expected: " << expected_elements << ")" - << std::endl; - - // Test PrintSummary - std::cout << "\nLoRA Model Summary:" << std::endl; - PrintLoRASummary(model); - - // Test Merge/Unmerge using utility functions - MergeLoRAWeights(model); - // Verify LoRA params frozen after merge - auto *lora_mod = dynamic_cast(model.get()); - CHECK(lora_mod != nullptr); - CHECK(!lora_mod->LoRAParameters()[0]->requires_grad()) << "lora_A should be frozen after merge"; - CHECK(!lora_mod->LoRAParameters()[1]->requires_grad()) << "lora_B should be frozen after merge"; - std::cout << "Merge: OK (LoRA params frozen)" << std::endl; - - UnmergeLoRAWeights(model); - CHECK(lora_mod->LoRAParameters()[0]->requires_grad()) << "lora_A should be trainable after unmerge"; - CHECK(lora_mod->LoRAParameters()[1]->requires_grad()) << "lora_B should be trainable after unmerge"; - std::cout << "Unmerge: OK (LoRA params trainable)" << std::endl; - - std::cout << "GetLoRAModel in-place injection tests passed!" << std::endl; -} - -// ============================================================================ -// Test 14: MergeAndUnload -// ============================================================================ -void test_merge_and_unload() { - std::cout << "\n=== Test 14: MergeAndUnload ===" << std::endl; - - // Create base Linear and apply LoRA - auto base_linear = std::make_shared(64, 128, /*bias=*/true); - LoRAConfig config{4, 8.0f, 0.0f, ParseLoRATargetModules("Linear")}; - auto model = GetLoRAModel(base_linear, config); - - // Verify it's a LoRA module - CHECK(dynamic_cast(model.get()) != nullptr) << "Should be LoRALinear"; - - // Create input and get output before merge_and_unload - auto input = std::make_shared(std::vector{2, 5, 64}, DataType::kFLOAT32); - input->EigenMatrix().setRandom(); - auto output_before = (*model)({input})[0]; - float output_before_sum = output_before->EigenMatrix().sum(); - std::cout << "Output sum before MergeAndUnload: " << output_before_sum << std::endl; - - // MergeAndUnload - auto unloaded_model = MergeAndUnload(model); - CHECK(unloaded_model != nullptr) << "MergeAndUnload should return valid model"; - - // Verify it's no longer a LoRA module - CHECK(dynamic_cast(unloaded_model.get()) == nullptr) << "Should be plain Linear after MergeAndUnload"; - std::cout << "Model is no longer LoRALinear: OK" << std::endl; - - // Verify no LoRA parameters exist (check state dict) - auto state_dict = unloaded_model->StateDict(); - for (const auto &[name, param] : state_dict) { - CHECK(name.find("lora_A") == std::string::npos && name.find("lora_B") == std::string::npos) - << "Should not have LoRA parameters after MergeAndUnload, found: " << name; - } - std::cout << "No LoRA parameters in state dict: OK" << std::endl; - - // Verify forward output matches (merged output should equal unmerged LoRA output) - auto output_after = (*unloaded_model)({input})[0]; - float output_after_sum = output_after->EigenMatrix().sum(); - std::cout << "Output sum after MergeAndUnload: " << output_after_sum << std::endl; - std::cout << "Diff: " << std::abs(output_before_sum - output_after_sum) << std::endl; - CHECK(std::abs(output_before_sum - output_after_sum) < 1e-3) << "Output should match after MergeAndUnload"; - - // Verify all parameters have requires_grad = true (unfrozen) - for (const auto ¶m : unloaded_model->Parameters()) { - CHECK(param->requires_grad()) << "All parameters should be trainable after MergeAndUnload"; - } - std::cout << "All parameters trainable: OK" << std::endl; - - std::cout << "MergeAndUnload tests passed!" << std::endl; -} - -int main(int argc, char **argv) { - google::InitGoogleLogging(argv[0]); - FLAGS_logtostderr = 1; - - // Initialize parallel settings (required for some tensor operations) - // Parameters: nthread_per_process, tensor_parallel_size, sequence_parallel_enabled, - // pipeline_parallel_size, virtual_pipeline_parallel_size - nn::parallel::global::InitAllEnv(1, 1, false, 1, 1); - - std::cout << "========================================" << std::endl; - std::cout << " LoRA Module Unit Tests " << std::endl; - std::cout << "========================================" << std::endl; - - test_lora_config(); - test_lora_linear_init(); - test_lora_linear_forward(); - test_lora_linear_merge(); - test_lora_utils(); - test_lora_from_linear(); - test_lora_model_wrapper(); - test_lora_save_load_weights(); - test_set_target_modules(); - test_should_apply_lora_edge_cases(); - test_replace_module_by_path(); - test_freeze_unfreeze(); - test_lora_state_dict(); - test_get_lora_model(); - test_merge_and_unload(); - - std::cout << "\n========================================" << std::endl; - std::cout << " All LoRA Tests Passed! " << std::endl; - std::cout << "========================================" << std::endl; - - return 0; -} diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt new file mode 100644 index 00000000..39a44f27 --- /dev/null +++ b/tests/CMakeLists.txt @@ -0,0 +1,24 @@ +# Tests CMakeLists.txt +# This file manages the test infrastructure for InfiniTrain + +# Include shared test macros (must be before any test subdirectory) +include(${CMAKE_CURRENT_SOURCE_DIR}/common/test_macros.cmake) + +# Common test utilities +add_subdirectory(common) + +# Tensor tests +add_subdirectory(tensor) + +# Optimizer tests +add_subdirectory(optimizer) + +# Autograd operator tests +add_subdirectory(autograd) + +# LoRA tests +add_subdirectory(lora) + +# Hook tests +add_subdirectory(hook) + diff --git a/tests/autograd/CMakeLists.txt b/tests/autograd/CMakeLists.txt new file mode 100644 index 00000000..d321f629 --- /dev/null +++ b/tests/autograd/CMakeLists.txt @@ -0,0 +1,11 @@ +# ============================================================================ +# Autograd tests +# ============================================================================ + +set(AUTOGRAD_TEST_DIR "${CMAKE_CURRENT_SOURCE_DIR}") + +file(GLOB AUTOGRAD_SOURCES ${AUTOGRAD_TEST_DIR}/test_autograd*.cc) + +infini_train_add_test_suite(test_autograd + SOURCES ${AUTOGRAD_SOURCES} +) diff --git a/tests/autograd/test_autograd.cc b/tests/autograd/test_autograd.cc new file mode 100644 index 00000000..6401cc93 --- /dev/null +++ b/tests/autograd/test_autograd.cc @@ -0,0 +1,376 @@ +#include + +#include +#include + +#include "infini_train/include/autograd/activations.h" +#include "infini_train/include/autograd/elementwise.h" +#include "infini_train/include/autograd/function.h" +#include "infini_train/include/autograd/linear.h" +#include "infini_train/include/autograd/matmul.h" +#include "infini_train/include/autograd/misc.h" +#include "infini_train/include/autograd/normalization.h" +#include "infini_train/include/autograd/outer.h" +#include "infini_train/include/autograd/reduction.h" +#include "infini_train/include/autograd/softmax.h" +#include "infini_train/include/autograd/transform.h" +#include "infini_train/include/tensor.h" +#include "test_utils.h" + +using namespace infini_train; + +// ============================================================================ +// Forward / Backward — CPU + CUDA +// ============================================================================ + +class AutogradForwardTest : public infini_train::test::AutogradTestBaseP {}; +class AutogradBackwardTest : public infini_train::test::AutogradTestBaseP {}; + +TEST_P(AutogradForwardTest, AddForward) { + auto a = createTensor({2, 3}, 1.0f); + auto b = createTensor({2, 3}, 2.0f); + auto result = std::make_shared()->Apply({a, b}); + EXPECT_EQ(result.size(), 1); + EXPECT_EQ(result[0]->Dims(), (std::vector{2, 3})); +} + +TEST_P(AutogradForwardTest, SubForward) { + auto a = createTensor({2, 3}, 5.0f); + auto b = createTensor({2, 3}, 3.0f); + auto result = std::make_shared()->Apply({a, b}); + EXPECT_EQ(result.size(), 1); +} + +TEST_P(AutogradForwardTest, MulForward) { + auto a = createTensor({2, 3}, 2.0f); + auto b = createTensor({2, 3}, 3.0f); + auto result = std::make_shared()->Apply({a, b}); + EXPECT_EQ(result.size(), 1); +} + +TEST_P(AutogradForwardTest, DivForward) { + auto a = createTensor({2, 3}, 6.0f); + auto b = createTensor({2, 3}, 2.0f); + auto result = std::make_shared()->Apply({a, b}); + EXPECT_EQ(result.size(), 1); +} + +TEST_P(AutogradForwardTest, NegForward) { + auto a = createTensor({2, 3}, 5.0f); + auto result = std::make_shared()->Apply({a}); + EXPECT_EQ(result.size(), 1); +} + +TEST_P(AutogradForwardTest, SinForward) { + auto a = createTensor({2, 3}, 0.0f); + auto result = std::make_shared()->Apply({a}); + EXPECT_EQ(result.size(), 1); +} + +TEST_P(AutogradForwardTest, CosForward) { + auto a = createTensor({2, 3}, 0.0f); + auto result = std::make_shared()->Apply({a}); + EXPECT_EQ(result.size(), 1); +} + +TEST_P(AutogradForwardTest, TanhForward) { + auto a = createTensor({2, 3}, 0.0f); + auto result = std::make_shared()->Apply({a}); + EXPECT_EQ(result.size(), 1); +} + +TEST_P(AutogradForwardTest, ExpForward) { + auto a = createTensor({2, 3}, 1.0f); + auto result = std::make_shared()->Apply({a}); + EXPECT_EQ(result.size(), 1); +} + +TEST_P(AutogradForwardTest, LogForward) { + auto a = createTensor({2, 3}, 2.0f); + auto result = std::make_shared()->Apply({a}); + EXPECT_EQ(result.size(), 1); +} + +TEST_P(AutogradForwardTest, ReciprocalForward) { + auto a = createTensor({2, 3}, 2.0f); + auto result = std::make_shared()->Apply({a}); + EXPECT_EQ(result.size(), 1); +} + +TEST_P(AutogradForwardTest, PowForward) { + auto a = createTensor({2, 3}, 2.0f); + auto result = std::make_shared(2.0f)->Apply({a}); + EXPECT_EQ(result.size(), 1); +} + +TEST_P(AutogradForwardTest, RsqrtForward) { + auto a = createTensor({2, 3}, 4.0f); + auto result = std::make_shared()->Apply({a}); + EXPECT_EQ(result.size(), 1); +} + +TEST_P(AutogradForwardTest, SigmoidForward) { + auto a = createTensor({2, 3}, 0.0f); + auto result = std::make_shared()->Apply({a}); + EXPECT_EQ(result.size(), 1); +} + +TEST_P(AutogradForwardTest, MatmulForward) { + auto a = createTensor({2, 3}, 1.0f); + auto b = createTensor({3, 4}, 1.0f); + auto result = std::make_shared()->Apply({a, b}); + EXPECT_EQ(result.size(), 1); + EXPECT_EQ(result[0]->Dims(), (std::vector{2, 4})); +} + +TEST_P(AutogradForwardTest, SumForward) { + auto a = createTensor({2, 3}, 1.0f); + auto result = std::make_shared(1, false)->Apply({a}); + EXPECT_EQ(result.size(), 1); +} + +TEST_P(AutogradForwardTest, MeanForward) { + auto a = createTensor({2, 3}, 1.0f); + auto result = std::make_shared(1, false)->Apply({a}); + EXPECT_EQ(result.size(), 1); +} + +TEST_P(AutogradForwardTest, MaxForward) { + auto a = createTensor({2, 3}, 1.0f); + auto result = std::make_shared(1, false)->Apply({a}); + EXPECT_EQ(result.size(), 1); +} + +TEST_P(AutogradForwardTest, MinForward) { + auto a = createTensor({2, 3}, 1.0f); + auto result = std::make_shared(1, false)->Apply({a}); + EXPECT_EQ(result.size(), 1); +} + +TEST_P(AutogradForwardTest, SoftmaxForward) { + auto a = createTensor({2, 3}, 1.0f); + auto result = std::make_shared(1)->Apply({a}); + EXPECT_EQ(result.size(), 1); + EXPECT_EQ(result[0]->Dims(), (std::vector{2, 3})); +} + +TEST_P(AutogradForwardTest, LayerNormForward) { + auto a = createTensor({2, 3, 4}, 1.0f); + auto weight = createTensor({4}, 1.0f); + auto bias = createTensor({4}, 0.0f); + auto result = std::make_shared(1e-5f)->Apply({a, weight, bias}); + EXPECT_EQ(result.size(), 1); +} + +TEST_P(AutogradForwardTest, LinearForward) { + auto input = createTensor({2, 3}, 1.0f); + auto weight = createTensor({4, 3}, 1.0f); + auto bias = createTensor({4}, 0.0f); + auto result = std::make_shared()->Apply({input, weight, bias}); + EXPECT_EQ(result.size(), 1); + EXPECT_EQ(result[0]->Dims(), (std::vector{2, 4})); +} + +TEST_P(AutogradForwardTest, TransposeForward) { + auto a = createTensor({2, 3}, 1.0f); + auto result = std::make_shared(0, 1)->Apply({a}); + EXPECT_EQ(result.size(), 1); + EXPECT_EQ(result[0]->Dims(), (std::vector{3, 2})); +} + +TEST_P(AutogradForwardTest, SliceForward) { + auto a = createTensor({4, 4}, 1.0f); + auto result = std::make_shared(std::vector{1, 1}, std::vector{3, 3}, + std::vector{1, 1}) + ->Apply({a}); + EXPECT_EQ(result.size(), 1); +} + +TEST_P(AutogradForwardTest, SplitForward) { + auto a = createTensor({4, 4}, 1.0f); + auto result = std::make_shared(2, 0)->Apply({a}); + EXPECT_EQ(result.size(), 2); +} + +TEST_P(AutogradForwardTest, ConcatForward) { + auto a = createTensor({2, 2}, 1.0f); + auto b = createTensor({2, 2}, 2.0f); + auto result = std::make_shared(0)->Apply({a, b}); + EXPECT_EQ(result.size(), 1); + EXPECT_EQ(result[0]->Dims(), (std::vector{4, 2})); +} + +TEST_P(AutogradForwardTest, StackForward) { + auto a = createTensor({2, 3}, 1.0f); + auto b = createTensor({2, 3}, 2.0f); + auto result = std::make_shared(0)->Apply({a, b}); + EXPECT_EQ(result.size(), 1); + EXPECT_EQ(result[0]->Dims(), (std::vector{2, 2, 3})); +} + +TEST_P(AutogradForwardTest, TrilForward) { + auto a = createTensor({3, 3}, 1.0f); + auto result = std::make_shared(0)->Apply({a}); + EXPECT_EQ(result.size(), 1); +} + +TEST_P(AutogradForwardTest, TriuForward) { + auto a = createTensor({3, 3}, 1.0f); + auto result = std::make_shared(0)->Apply({a}); + EXPECT_EQ(result.size(), 1); +} + +TEST_P(AutogradForwardTest, OuterForward) { + auto a = createTensor({3}, 1.0f); + auto b = createTensor({4}, 1.0f); + auto result = std::make_shared()->Apply({a, b}); + EXPECT_EQ(result.size(), 1); + EXPECT_EQ(result[0]->Dims(), (std::vector{3, 4})); +} + +TEST_P(AutogradForwardTest, AddScalarForward) { + auto a = createTensor({2, 3}, 1.0f); + auto result = std::make_shared(2.0f)->Apply({a}); + EXPECT_EQ(result.size(), 1); +} + +TEST_P(AutogradForwardTest, MulScalarForward) { + auto a = createTensor({2, 3}, 2.0f); + auto result = std::make_shared(3.0f)->Apply({a}); + EXPECT_EQ(result.size(), 1); +} + +TEST_P(AutogradForwardTest, LtForward) { + auto a = createTensor({2, 3}, 5.0f); + auto b = createTensor({2, 3}, 3.0f); + auto result = std::make_shared()->Apply({a, b}); + EXPECT_EQ(result.size(), 1); +} + +TEST_P(AutogradForwardTest, LeForward) { + auto a = createTensor({2, 3}, 3.0f); + auto b = createTensor({2, 3}, 3.0f); + auto result = std::make_shared()->Apply({a, b}); + EXPECT_EQ(result.size(), 1); +} + +TEST_P(AutogradForwardTest, GtForward) { + auto a = createTensor({2, 3}, 5.0f); + auto b = createTensor({2, 3}, 3.0f); + auto result = std::make_shared()->Apply({a, b}); + EXPECT_EQ(result.size(), 1); +} + +TEST_P(AutogradForwardTest, GeForward) { + auto a = createTensor({2, 3}, 3.0f); + auto b = createTensor({2, 3}, 3.0f); + auto result = std::make_shared()->Apply({a, b}); + EXPECT_EQ(result.size(), 1); +} + +TEST_P(AutogradForwardTest, EqualsForward) { + auto a = createTensor({2, 3}, 3.0f); + auto b = createTensor({2, 3}, 3.0f); + auto result = std::make_shared()->Apply({a, b}); + EXPECT_EQ(result.size(), 1); +} + +TEST_P(AutogradForwardTest, AndForward) { + auto a = createTensor({2, 3}, 1.0f); + auto b = createTensor({2, 3}, 1.0f); + auto result = std::make_shared()->Apply({a, b}); + EXPECT_EQ(result.size(), 1); +} + +TEST_P(AutogradForwardTest, OrForward) { + auto a = createTensor({2, 3}, 0.0f); + auto b = createTensor({2, 3}, 1.0f); + auto result = std::make_shared()->Apply({a, b}); + EXPECT_EQ(result.size(), 1); +} + +TEST_P(AutogradForwardTest, NoOpForward) { + auto a = createTensor({2, 3}, 1.0f); + auto result = std::make_shared(std::vector{2, 3})->Apply({a}); + EXPECT_EQ(result.size(), 1); + EXPECT_EQ(result[0]->Dims(), (std::vector{2, 3})); +} + +TEST_P(AutogradBackwardTest, AddBackward) { + auto a = createTensor({2, 3}, 1.0f); + auto b = createTensor({2, 3}, 2.0f); + auto add_fn = std::make_shared(); + auto result = add_fn->Apply({a, b}); + auto grad = createTensor({2, 3}, 1.0f); + auto grad_inputs = add_fn->Backward({grad}); + EXPECT_EQ(grad_inputs.size(), 2); +} + +TEST_P(AutogradBackwardTest, MulBackward) { + auto a = createTensor({2, 3}, 2.0f); + auto b = createTensor({2, 3}, 3.0f); + auto mul_fn = std::make_shared(); + auto result = mul_fn->Apply({a, b}); + auto grad = createTensor({2, 3}, 1.0f); + auto grad_inputs = mul_fn->Backward({grad}); + EXPECT_EQ(grad_inputs.size(), 2); +} + +INFINI_TRAIN_REGISTER_TEST(AutogradForwardTest); + +INFINI_TRAIN_REGISTER_TEST(AutogradBackwardTest); + +// ============================================================================ +// Distributed — requires NCCL + >=2 GPUs +// ============================================================================ + +class AutogradDistributedTest : public infini_train::test::DistributedInfiniTrainTestP {}; + +TEST_P(AutogradDistributedTest, AllReduce) { + auto a = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, GetDevice()); + a->set_requires_grad(true); + infini_train::test::FillConstantTensor(a, 1.0f); + EXPECT_TRUE(a->GetDevice().IsCUDA()); + EXPECT_TRUE(a->requires_grad()); +} + +TEST_P(AutogradDistributedTest, AllGather) { + auto a = std::make_shared(std::vector{4, 4}, DataType::kFLOAT32, GetDevice()); + a->set_requires_grad(true); + infini_train::test::FillConstantTensor(a, 1.0f); + EXPECT_TRUE(a->GetDevice().IsCUDA()); + EXPECT_EQ(a->Dims(), (std::vector{4, 4})); +} + +TEST_P(AutogradDistributedTest, ReduceScatter) { + auto a = std::make_shared(std::vector{2, 8}, DataType::kFLOAT32, GetDevice()); + a->set_requires_grad(true); + infini_train::test::FillConstantTensor(a, 1.0f); + EXPECT_TRUE(a->GetDevice().IsCUDA()); + EXPECT_EQ(a->Dims(), (std::vector{2, 8})); +} + +TEST_P(AutogradDistributedTest, DistributedMatmul) { + auto a = std::make_shared(std::vector{2, 4}, DataType::kFLOAT32, GetDevice()); + a->set_requires_grad(true); + auto b = std::make_shared(std::vector{4, 2}, DataType::kFLOAT32, GetDevice()); + b->set_requires_grad(true); + auto result = std::make_shared()->Apply({a, b}); + EXPECT_EQ(result.size(), 1); + EXPECT_TRUE(result[0]->GetDevice().IsCUDA()); +} + +TEST_P(AutogradDistributedTest, DistributedLinear) { + auto input = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, GetDevice()); + input->set_requires_grad(true); + auto weight = std::make_shared(std::vector{4, 3}, DataType::kFLOAT32, GetDevice()); + weight->set_requires_grad(true); + auto bias = std::make_shared(std::vector{4}, DataType::kFLOAT32, GetDevice()); + bias->set_requires_grad(true); + auto result = std::make_shared()->Apply({input, weight, bias}); + EXPECT_EQ(result.size(), 1); + EXPECT_EQ(result[0]->Dims(), (std::vector{2, 4})); + EXPECT_TRUE(result[0]->GetDevice().IsCUDA()); +} + +INFINI_TRAIN_REGISTER_TEST_DISTRIBUTED(AutogradDistributedTest); diff --git a/tests/autograd/test_autograd_elementwise_backward.cc b/tests/autograd/test_autograd_elementwise_backward.cc new file mode 100644 index 00000000..65ffd5a8 --- /dev/null +++ b/tests/autograd/test_autograd_elementwise_backward.cc @@ -0,0 +1,136 @@ +#include + +#include +#include + +#include "infini_train/include/tensor.h" +#include "infini_train/include/nn/parallel/global.h" +#include "infini_train/include/autograd/elementwise.h" +#include "test_utils.h" + +using namespace infini_train; + +class AutogradElementwiseBackwardTest : public infini_train::test::AutogradTestBaseP {}; + +TEST_P(AutogradElementwiseBackwardTest, AddBackward) { + auto a = createTensor({2, 3}, 1.0f); + auto b = createTensor({2, 3}, 2.0f); + auto add_fn = std::make_shared(); + auto result = add_fn->Apply({a, b}); + auto grad = createTensor({2, 3}, 1.0f); + auto grad_inputs = add_fn->Backward({grad}); + EXPECT_EQ(grad_inputs.size(), 2); +} + +TEST_P(AutogradElementwiseBackwardTest, SubBackward) { + auto a = createTensor({2, 3}, 5.0f); + auto b = createTensor({2, 3}, 3.0f); + auto sub_fn = std::make_shared(); + auto result = sub_fn->Apply({a, b}); + auto grad = createTensor({2, 3}, 1.0f); + auto grad_inputs = sub_fn->Backward({grad}); + EXPECT_EQ(grad_inputs.size(), 2); +} + +TEST_P(AutogradElementwiseBackwardTest, MulBackward) { + auto a = createTensor({2, 3}, 2.0f); + auto b = createTensor({2, 3}, 3.0f); + auto mul_fn = std::make_shared(); + auto result = mul_fn->Apply({a, b}); + auto grad = createTensor({2, 3}, 1.0f); + auto grad_inputs = mul_fn->Backward({grad}); + EXPECT_EQ(grad_inputs.size(), 2); +} + +TEST_P(AutogradElementwiseBackwardTest, DivBackward) { + auto a = createTensor({2, 3}, 6.0f); + auto b = createTensor({2, 3}, 2.0f); + auto div_fn = std::make_shared(); + auto result = div_fn->Apply({a, b}); + auto grad = createTensor({2, 3}, 1.0f); + auto grad_inputs = div_fn->Backward({grad}); + EXPECT_EQ(grad_inputs.size(), 2); +} + +TEST_P(AutogradElementwiseBackwardTest, NegBackward) { + auto a = createTensor({2, 3}, 5.0f); + auto neg_fn = std::make_shared(); + auto result = neg_fn->Apply({a}); + auto grad = createTensor({2, 3}, 1.0f); + auto grad_inputs = neg_fn->Backward({grad}); + EXPECT_EQ(grad_inputs.size(), 1); +} + +TEST_P(AutogradElementwiseBackwardTest, SinBackward) { + auto a = createTensor({2, 3}, 0.0f); + auto sin_fn = std::make_shared(); + auto result = sin_fn->Apply({a}); + auto grad = createTensor({2, 3}, 1.0f); + auto grad_inputs = sin_fn->Backward({grad}); + EXPECT_EQ(grad_inputs.size(), 1); +} + +TEST_P(AutogradElementwiseBackwardTest, CosBackward) { + auto a = createTensor({2, 3}, 0.0f); + auto cos_fn = std::make_shared(); + auto result = cos_fn->Apply({a}); + auto grad = createTensor({2, 3}, 1.0f); + auto grad_inputs = cos_fn->Backward({grad}); + EXPECT_EQ(grad_inputs.size(), 1); +} + +TEST_P(AutogradElementwiseBackwardTest, TanhBackward) { + auto a = createTensor({2, 3}, 0.0f); + auto tanh_fn = std::make_shared(); + auto result = tanh_fn->Apply({a}); + auto grad = createTensor({2, 3}, 1.0f); + auto grad_inputs = tanh_fn->Backward({grad}); + EXPECT_EQ(grad_inputs.size(), 1); +} + +TEST_P(AutogradElementwiseBackwardTest, ExpBackward) { + auto a = createTensor({2, 3}, 1.0f); + auto exp_fn = std::make_shared(); + auto result = exp_fn->Apply({a}); + auto grad = createTensor({2, 3}, 1.0f); + auto grad_inputs = exp_fn->Backward({grad}); + EXPECT_EQ(grad_inputs.size(), 1); +} + +TEST_P(AutogradElementwiseBackwardTest, LogBackward) { + auto a = createTensor({2, 3}, 2.0f); + auto log_fn = std::make_shared(); + auto result = log_fn->Apply({a}); + auto grad = createTensor({2, 3}, 1.0f); + auto grad_inputs = log_fn->Backward({grad}); + EXPECT_EQ(grad_inputs.size(), 1); +} + +TEST_P(AutogradElementwiseBackwardTest, ReciprocalBackward) { + auto a = createTensor({2, 3}, 2.0f); + auto reciprocal_fn = std::make_shared(); + auto result = reciprocal_fn->Apply({a}); + auto grad = createTensor({2, 3}, 1.0f); + auto grad_inputs = reciprocal_fn->Backward({grad}); + EXPECT_EQ(grad_inputs.size(), 1); +} + +TEST_P(AutogradElementwiseBackwardTest, PowBackward) { + auto a = createTensor({2, 3}, 2.0f); + auto pow_fn = std::make_shared(2.0f); + auto result = pow_fn->Apply({a}); + auto grad = createTensor({2, 3}, 1.0f); + auto grad_inputs = pow_fn->Backward({grad}); + EXPECT_EQ(grad_inputs.size(), 1); +} + +TEST_P(AutogradElementwiseBackwardTest, RsqrtBackward) { + auto a = createTensor({2, 3}, 4.0f); + auto rsqrt_fn = std::make_shared(); + auto result = rsqrt_fn->Apply({a}); + auto grad = createTensor({2, 3}, 1.0f); + auto grad_inputs = rsqrt_fn->Backward({grad}); + EXPECT_EQ(grad_inputs.size(), 1); +} + +INFINI_TRAIN_REGISTER_TEST(AutogradElementwiseBackwardTest); diff --git a/tests/autograd/test_autograd_elementwise_forward.cc b/tests/autograd/test_autograd_elementwise_forward.cc new file mode 100644 index 00000000..20fe658e --- /dev/null +++ b/tests/autograd/test_autograd_elementwise_forward.cc @@ -0,0 +1,189 @@ +#include + +#include +#include + +#include "infini_train/include/tensor.h" +#include "infini_train/include/nn/parallel/global.h" +#include "infini_train/include/autograd/elementwise.h" +#include "infini_train/include/autograd/activations.h" +#include "test_utils.h" + +using namespace infini_train; + +class AutogradElementwiseForwardTest : public infini_train::test::AutogradTestBaseP {}; + +TEST_P(AutogradElementwiseForwardTest, AddForward) { + auto a = createTensor({2, 3}, 1.0f); + auto b = createTensor({2, 3}, 2.0f); + auto add_fn = std::make_shared(); + auto result = add_fn->Apply({a, b}); + EXPECT_EQ(result.size(), 1); + EXPECT_EQ(result[0]->Dims(), (std::vector{2, 3})); +} + +TEST_P(AutogradElementwiseForwardTest, SubForward) { + auto a = createTensor({2, 3}, 5.0f); + auto b = createTensor({2, 3}, 3.0f); + auto sub_fn = std::make_shared(); + auto result = sub_fn->Apply({a, b}); + EXPECT_EQ(result.size(), 1); +} + +TEST_P(AutogradElementwiseForwardTest, MulForward) { + auto a = createTensor({2, 3}, 2.0f); + auto b = createTensor({2, 3}, 3.0f); + auto mul_fn = std::make_shared(); + auto result = mul_fn->Apply({a, b}); + EXPECT_EQ(result.size(), 1); +} + +TEST_P(AutogradElementwiseForwardTest, DivForward) { + auto a = createTensor({2, 3}, 6.0f); + auto b = createTensor({2, 3}, 2.0f); + auto div_fn = std::make_shared(); + auto result = div_fn->Apply({a, b}); + EXPECT_EQ(result.size(), 1); +} + +TEST_P(AutogradElementwiseForwardTest, NegForward) { + auto a = createTensor({2, 3}, 5.0f); + auto neg_fn = std::make_shared(); + auto result = neg_fn->Apply({a}); + EXPECT_EQ(result.size(), 1); +} + +TEST_P(AutogradElementwiseForwardTest, SinForward) { + auto a = createTensor({2, 3}, 0.0f); + auto sin_fn = std::make_shared(); + auto result = sin_fn->Apply({a}); + EXPECT_EQ(result.size(), 1); +} + +TEST_P(AutogradElementwiseForwardTest, CosForward) { + auto a = createTensor({2, 3}, 0.0f); + auto cos_fn = std::make_shared(); + auto result = cos_fn->Apply({a}); + EXPECT_EQ(result.size(), 1); +} + +TEST_P(AutogradElementwiseForwardTest, TanhForward) { + auto a = createTensor({2, 3}, 0.0f); + auto tanh_fn = std::make_shared(); + auto result = tanh_fn->Apply({a}); + EXPECT_EQ(result.size(), 1); +} + +TEST_P(AutogradElementwiseForwardTest, ExpForward) { + auto a = createTensor({2, 3}, 1.0f); + auto exp_fn = std::make_shared(); + auto result = exp_fn->Apply({a}); + EXPECT_EQ(result.size(), 1); +} + +TEST_P(AutogradElementwiseForwardTest, LogForward) { + auto a = createTensor({2, 3}, 2.0f); + auto log_fn = std::make_shared(); + auto result = log_fn->Apply({a}); + EXPECT_EQ(result.size(), 1); +} + +TEST_P(AutogradElementwiseForwardTest, ReciprocalForward) { + auto a = createTensor({2, 3}, 2.0f); + auto reciprocal_fn = std::make_shared(); + auto result = reciprocal_fn->Apply({a}); + EXPECT_EQ(result.size(), 1); +} + +TEST_P(AutogradElementwiseForwardTest, PowForward) { + auto a = createTensor({2, 3}, 2.0f); + auto pow_fn = std::make_shared(2.0f); + auto result = pow_fn->Apply({a}); + EXPECT_EQ(result.size(), 1); +} + +TEST_P(AutogradElementwiseForwardTest, RsqrtForward) { + auto a = createTensor({2, 3}, 4.0f); + auto rsqrt_fn = std::make_shared(); + auto result = rsqrt_fn->Apply({a}); + EXPECT_EQ(result.size(), 1); +} + +TEST_P(AutogradElementwiseForwardTest, SigmoidForward) { + auto a = createTensor({2, 3}, 0.0f); + auto sigmoid_fn = std::make_shared(); + auto result = sigmoid_fn->Apply({a}); + EXPECT_EQ(result.size(), 1); +} + +TEST_P(AutogradElementwiseForwardTest, AddScalarForward) { + auto a = createTensor({2, 3}, 1.0f); + auto add_scalar_fn = std::make_shared(2.0f); + auto result = add_scalar_fn->Apply({a}); + EXPECT_EQ(result.size(), 1); +} + +TEST_P(AutogradElementwiseForwardTest, MulScalarForward) { + auto a = createTensor({2, 3}, 2.0f); + auto mul_scalar_fn = std::make_shared(3.0f); + auto result = mul_scalar_fn->Apply({a}); + EXPECT_EQ(result.size(), 1); +} + +TEST_P(AutogradElementwiseForwardTest, LtForward) { + auto a = createTensor({2, 3}, 5.0f); + auto b = createTensor({2, 3}, 3.0f); + auto lt_fn = std::make_shared(); + auto result = lt_fn->Apply({a, b}); + EXPECT_EQ(result.size(), 1); +} + +TEST_P(AutogradElementwiseForwardTest, LeForward) { + auto a = createTensor({2, 3}, 3.0f); + auto b = createTensor({2, 3}, 3.0f); + auto le_fn = std::make_shared(); + auto result = le_fn->Apply({a, b}); + EXPECT_EQ(result.size(), 1); +} + +TEST_P(AutogradElementwiseForwardTest, GtForward) { + auto a = createTensor({2, 3}, 5.0f); + auto b = createTensor({2, 3}, 3.0f); + auto gt_fn = std::make_shared(); + auto result = gt_fn->Apply({a, b}); + EXPECT_EQ(result.size(), 1); +} + +TEST_P(AutogradElementwiseForwardTest, GeForward) { + auto a = createTensor({2, 3}, 3.0f); + auto b = createTensor({2, 3}, 3.0f); + auto ge_fn = std::make_shared(); + auto result = ge_fn->Apply({a, b}); + EXPECT_EQ(result.size(), 1); +} + +TEST_P(AutogradElementwiseForwardTest, EqualsForward) { + auto a = createTensor({2, 3}, 3.0f); + auto b = createTensor({2, 3}, 3.0f); + auto eq_fn = std::make_shared(); + auto result = eq_fn->Apply({a, b}); + EXPECT_EQ(result.size(), 1); +} + +TEST_P(AutogradElementwiseForwardTest, AndForward) { + auto a = createTensor({2, 3}, 1.0f); + auto b = createTensor({2, 3}, 1.0f); + auto and_fn = std::make_shared(); + auto result = and_fn->Apply({a, b}); + EXPECT_EQ(result.size(), 1); +} + +TEST_P(AutogradElementwiseForwardTest, OrForward) { + auto a = createTensor({2, 3}, 0.0f); + auto b = createTensor({2, 3}, 1.0f); + auto or_fn = std::make_shared(); + auto result = or_fn->Apply({a, b}); + EXPECT_EQ(result.size(), 1); +} + +INFINI_TRAIN_REGISTER_TEST(AutogradElementwiseForwardTest); diff --git a/tests/autograd/test_autograd_linear_backward.cc b/tests/autograd/test_autograd_linear_backward.cc new file mode 100644 index 00000000..c62920b4 --- /dev/null +++ b/tests/autograd/test_autograd_linear_backward.cc @@ -0,0 +1,35 @@ +#include + +#include + +#include "infini_train/include/tensor.h" +#include "infini_train/include/nn/parallel/global.h" +#include "infini_train/include/autograd/linear.h" +#include "test_utils.h" + +using namespace infini_train; + +class AutogradLinearBackwardTest : public infini_train::test::AutogradTestBaseP {}; + +TEST_P(AutogradLinearBackwardTest, LinearBackward) { + auto input = createTensor({2, 3}, 1.0f); + auto weight = createTensor({4, 3}, 1.0f); + auto bias = createTensor({4}, 0.0f); + auto linear_fn = std::make_shared(); + auto result = linear_fn->Apply({input, weight, bias}); + auto grad = createTensor({2, 4}, 1.0f); + auto grad_inputs = linear_fn->Backward({grad}); + EXPECT_EQ(grad_inputs.size(), 3); +} + +TEST_P(AutogradLinearBackwardTest, LinearBackwardNoBias) { + auto input = createTensor({2, 3}, 1.0f); + auto weight = createTensor({4, 3}, 1.0f); + auto linear_fn = std::make_shared(); + auto result = linear_fn->Apply({input, weight}); + auto grad = createTensor({2, 4}, 1.0f); + auto grad_inputs = linear_fn->Backward({grad}); + EXPECT_EQ(grad_inputs.size(), 2); +} + +INFINI_TRAIN_REGISTER_TEST(AutogradLinearBackwardTest); diff --git a/tests/autograd/test_autograd_linear_forward.cc b/tests/autograd/test_autograd_linear_forward.cc new file mode 100644 index 00000000..5fb41546 --- /dev/null +++ b/tests/autograd/test_autograd_linear_forward.cc @@ -0,0 +1,43 @@ +#include + +#include + +#include "infini_train/include/tensor.h" +#include "infini_train/include/nn/parallel/global.h" +#include "infini_train/include/autograd/linear.h" +#include "test_utils.h" + +using namespace infini_train; + +class AutogradLinearForwardTest : public infini_train::test::AutogradTestBaseP {}; + +TEST_P(AutogradLinearForwardTest, LinearForward) { + auto input = createTensor({2, 3}, 1.0f); + auto weight = createTensor({4, 3}, 1.0f); + auto bias = createTensor({4}, 0.0f); + auto linear_fn = std::make_shared(); + auto result = linear_fn->Apply({input, weight, bias}); + EXPECT_EQ(result.size(), 1); + EXPECT_EQ(result[0]->Dims(), (std::vector{2, 4})); +} + +TEST_P(AutogradLinearForwardTest, LinearNoBias) { + auto input = createTensor({2, 3}, 1.0f); + auto weight = createTensor({4, 3}, 1.0f); + auto linear_fn = std::make_shared(); + auto result = linear_fn->Apply({input, weight}); + EXPECT_EQ(result.size(), 1); + EXPECT_EQ(result[0]->Dims(), (std::vector{2, 4})); +} + +TEST_P(AutogradLinearForwardTest, LinearBatch) { + auto input = createTensor({32, 128}, 1.0f); + auto weight = createTensor({64, 128}, 1.0f); + auto bias = createTensor({64}, 0.0f); + auto linear_fn = std::make_shared(); + auto result = linear_fn->Apply({input, weight, bias}); + EXPECT_EQ(result.size(), 1); + EXPECT_EQ(result[0]->Dims(), (std::vector{32, 64})); +} + +INFINI_TRAIN_REGISTER_TEST(AutogradLinearForwardTest); diff --git a/tests/autograd/test_autograd_matmul_backward.cc b/tests/autograd/test_autograd_matmul_backward.cc new file mode 100644 index 00000000..aeb26a55 --- /dev/null +++ b/tests/autograd/test_autograd_matmul_backward.cc @@ -0,0 +1,44 @@ +#include + +#include + +#include "infini_train/include/tensor.h" +#include "infini_train/include/nn/parallel/global.h" +#include "infini_train/include/autograd/matmul.h" +#include "test_utils.h" + +using namespace infini_train; + +class AutogradMatmulBackwardTest : public infini_train::test::AutogradTestBaseP {}; + +TEST_P(AutogradMatmulBackwardTest, MatmulBackward) { + auto a = createTensor({2, 3}, 1.0f); + auto b = createTensor({3, 4}, 1.0f); + auto matmul_fn = std::make_shared(); + auto result = matmul_fn->Apply({a, b}); + auto grad = createTensor({2, 4}, 1.0f); + auto grad_inputs = matmul_fn->Backward({grad}); + EXPECT_EQ(grad_inputs.size(), 2); +} + +TEST_P(AutogradMatmulBackwardTest, MatmulBackwardSquare) { + auto a = createTensor({3, 3}, 2.0f); + auto b = createTensor({3, 3}, 3.0f); + auto matmul_fn = std::make_shared(); + auto result = matmul_fn->Apply({a, b}); + auto grad = createTensor({3, 3}, 1.0f); + auto grad_inputs = matmul_fn->Backward({grad}); + EXPECT_EQ(grad_inputs.size(), 2); +} + +TEST_P(AutogradMatmulBackwardTest, MatmulBackwardDifferentShapes) { + auto a = createTensor({3, 4}, 1.5f); + auto b = createTensor({4, 2}, 2.5f); + auto matmul_fn = std::make_shared(); + auto result = matmul_fn->Apply({a, b}); + auto grad = createTensor({3, 2}, 1.0f); + auto grad_inputs = matmul_fn->Backward({grad}); + EXPECT_EQ(grad_inputs.size(), 2); +} + +INFINI_TRAIN_REGISTER_TEST(AutogradMatmulBackwardTest); diff --git a/tests/autograd/test_autograd_matmul_forward.cc b/tests/autograd/test_autograd_matmul_forward.cc new file mode 100644 index 00000000..8e325c03 --- /dev/null +++ b/tests/autograd/test_autograd_matmul_forward.cc @@ -0,0 +1,50 @@ +#include + +#include + +#include "infini_train/include/tensor.h" +#include "infini_train/include/nn/parallel/global.h" +#include "infini_train/include/autograd/matmul.h" +#include "test_utils.h" + +using namespace infini_train; + +class AutogradMatmulForwardTest : public infini_train::test::AutogradTestBaseP {}; + +TEST_P(AutogradMatmulForwardTest, MatmulForward) { + auto a = createTensor({2, 3}, 1.0f); + auto b = createTensor({3, 4}, 1.0f); + auto matmul_fn = std::make_shared(); + auto result = matmul_fn->Apply({a, b}); + EXPECT_EQ(result.size(), 1); + EXPECT_EQ(result[0]->Dims(), (std::vector{2, 4})); +} + +TEST_P(AutogradMatmulForwardTest, MatmulDifferentShapes) { + auto a = createTensor({3, 4}, 1.0f); + auto b = createTensor({4, 2}, 1.0f); + auto matmul_fn = std::make_shared(); + auto result = matmul_fn->Apply({a, b}); + EXPECT_EQ(result.size(), 1); + EXPECT_EQ(result[0]->Dims(), (std::vector{3, 2})); +} + +TEST_P(AutogradMatmulForwardTest, MatmulBatch) { + auto a = createTensor({2, 3, 4}, 1.0f); + auto b = createTensor({2, 4, 5}, 1.0f); + auto matmul_fn = std::make_shared(); + auto result = matmul_fn->Apply({a, b}); + EXPECT_EQ(result.size(), 1); + EXPECT_EQ(result[0]->Dims(), (std::vector{2, 3, 5})); +} + +TEST_P(AutogradMatmulForwardTest, MatmulSquare) { + auto a = createTensor({3, 3}, 1.0f); + auto b = createTensor({3, 3}, 1.0f); + auto matmul_fn = std::make_shared(); + auto result = matmul_fn->Apply({a, b}); + EXPECT_EQ(result.size(), 1); + EXPECT_EQ(result[0]->Dims(), (std::vector{3, 3})); +} + +INFINI_TRAIN_REGISTER_TEST(AutogradMatmulForwardTest); diff --git a/tests/autograd/test_autograd_normalization_backward.cc b/tests/autograd/test_autograd_normalization_backward.cc new file mode 100644 index 00000000..b477cf47 --- /dev/null +++ b/tests/autograd/test_autograd_normalization_backward.cc @@ -0,0 +1,36 @@ +#include + +#include + +#include "infini_train/include/tensor.h" +#include "infini_train/include/nn/parallel/global.h" +#include "infini_train/include/autograd/normalization.h" +#include "test_utils.h" + +using namespace infini_train; + +class AutogradNormalizationBackwardTest : public infini_train::test::AutogradTestBaseP {}; + +TEST_P(AutogradNormalizationBackwardTest, LayerNormBackward) { + auto a = createTensor({2, 3, 4}, 1.0f); + auto weight = createTensor({4}, 1.0f); + auto bias = createTensor({4}, 0.0f); + auto layernorm_fn = std::make_shared(1e-5f); + auto result = layernorm_fn->Apply({a, weight, bias}); + auto grad = createTensor({2, 3, 4}, 1.0f); + auto grad_inputs = layernorm_fn->Backward({grad}); + EXPECT_EQ(grad_inputs.size(), 3); +} + +TEST_P(AutogradNormalizationBackwardTest, LayerNormBackwardZeroBias) { + auto a = createTensor({2, 3, 4}, 1.0f); + auto weight = createTensor({4}, 1.0f); + auto bias = createTensor({4}, 0.0f); + auto layernorm_fn = std::make_shared(1e-5f); + auto result = layernorm_fn->Apply({a, weight, bias}); + auto grad = createTensor({2, 3, 4}, 1.0f); + auto grad_inputs = layernorm_fn->Backward({grad}); + EXPECT_EQ(grad_inputs.size(), 3); +} + +INFINI_TRAIN_REGISTER_TEST(AutogradNormalizationBackwardTest); diff --git a/tests/autograd/test_autograd_normalization_forward.cc b/tests/autograd/test_autograd_normalization_forward.cc new file mode 100644 index 00000000..4ed92296 --- /dev/null +++ b/tests/autograd/test_autograd_normalization_forward.cc @@ -0,0 +1,42 @@ +#include + +#include + +#include "infini_train/include/tensor.h" +#include "infini_train/include/nn/parallel/global.h" +#include "infini_train/include/autograd/normalization.h" +#include "test_utils.h" + +using namespace infini_train; + +class AutogradNormalizationForwardTest : public infini_train::test::AutogradTestBaseP {}; + +TEST_P(AutogradNormalizationForwardTest, LayerNormForward) { + auto a = createTensor({2, 3, 4}, 1.0f); + auto weight = createTensor({4}, 1.0f); + auto bias = createTensor({4}, 0.0f); + auto layernorm_fn = std::make_shared(1e-5f); + auto result = layernorm_fn->Apply({a, weight, bias}); + EXPECT_EQ(result.size(), 1); +} + +TEST_P(AutogradNormalizationForwardTest, LayerNormZeroBias) { + auto a = createTensor({2, 3, 4}, 1.0f); + auto weight = createTensor({4}, 1.0f); + auto bias = createTensor({4}, 0.0f); + auto layernorm_fn = std::make_shared(1e-5f); + auto result = layernorm_fn->Apply({a, weight, bias}); + EXPECT_EQ(result.size(), 1); +} + +TEST_P(AutogradNormalizationForwardTest, LayerNormThreeDim) { + auto a = createTensor({2, 1, 4}, 1.0f); + auto weight = createTensor({4}, 1.0f); + auto bias = createTensor({4}, 0.0f); + auto layernorm_fn = std::make_shared(1e-5f); + auto result = layernorm_fn->Apply({a, weight, bias}); + EXPECT_EQ(result.size(), 1); + EXPECT_EQ(result[0]->Dims(), (std::vector{2, 1, 4})); +} + +INFINI_TRAIN_REGISTER_TEST(AutogradNormalizationForwardTest); diff --git a/tests/autograd/test_autograd_reduction_backward.cc b/tests/autograd/test_autograd_reduction_backward.cc new file mode 100644 index 00000000..9834ffd6 --- /dev/null +++ b/tests/autograd/test_autograd_reduction_backward.cc @@ -0,0 +1,68 @@ +#include + +#include + +#include "infini_train/include/tensor.h" +#include "infini_train/include/nn/parallel/global.h" +#include "infini_train/include/autograd/reduction.h" +#include "test_utils.h" + +using namespace infini_train; + +class AutogradReductionBackwardTest : public infini_train::test::AutogradTestBaseP {}; + +TEST_P(AutogradReductionBackwardTest, SumBackward) { + auto a = createTensor({2, 3}, 1.0f); + auto sum_fn = std::make_shared(1, false); + auto result = sum_fn->Apply({a}); + auto grad = createTensor({2}, 1.0f); + auto grad_inputs = sum_fn->Backward({grad}); + EXPECT_EQ(grad_inputs.size(), 1); +} + +TEST_P(AutogradReductionBackwardTest, MeanBackward) { + auto a = createTensor({2, 3}, 1.0f); + auto mean_fn = std::make_shared(1, false); + auto result = mean_fn->Apply({a}); + auto grad = createTensor({2}, 1.0f); + auto grad_inputs = mean_fn->Backward({grad}); + EXPECT_EQ(grad_inputs.size(), 1); +} + +TEST_P(AutogradReductionBackwardTest, MaxBackward) { + auto a = createTensor({2, 3}, 1.0f); + auto max_fn = std::make_shared(1, false); + auto result = max_fn->Apply({a}); + auto grad = createTensor({2}, 1.0f); + auto grad_inputs = max_fn->Backward({grad}); + EXPECT_EQ(grad_inputs.size(), 1); +} + +TEST_P(AutogradReductionBackwardTest, MinBackward) { + auto a = createTensor({2, 3}, 1.0f); + auto min_fn = std::make_shared(1, false); + auto result = min_fn->Apply({a}); + auto grad = createTensor({2}, 1.0f); + auto grad_inputs = min_fn->Backward({grad}); + EXPECT_EQ(grad_inputs.size(), 1); +} + +TEST_P(AutogradReductionBackwardTest, SumBackwardKeepDim) { + auto a = createTensor({2, 3}, 1.0f); + auto sum_fn = std::make_shared(1, true); + auto result = sum_fn->Apply({a}); + auto grad = createTensor({2, 1}, 1.0f); + auto grad_inputs = sum_fn->Backward({grad}); + EXPECT_EQ(grad_inputs.size(), 1); +} + +TEST_P(AutogradReductionBackwardTest, MeanBackwardKeepDim) { + auto a = createTensor({2, 3}, 1.0f); + auto mean_fn = std::make_shared(1, true); + auto result = mean_fn->Apply({a}); + auto grad = createTensor({2, 1}, 1.0f); + auto grad_inputs = mean_fn->Backward({grad}); + EXPECT_EQ(grad_inputs.size(), 1); +} + +INFINI_TRAIN_REGISTER_TEST(AutogradReductionBackwardTest); diff --git a/tests/autograd/test_autograd_reduction_forward.cc b/tests/autograd/test_autograd_reduction_forward.cc new file mode 100644 index 00000000..4d8ed2e4 --- /dev/null +++ b/tests/autograd/test_autograd_reduction_forward.cc @@ -0,0 +1,56 @@ +#include + +#include + +#include "infini_train/include/tensor.h" +#include "infini_train/include/nn/parallel/global.h" +#include "infini_train/include/autograd/reduction.h" +#include "test_utils.h" + +using namespace infini_train; + +class AutogradReductionForwardTest : public infini_train::test::AutogradTestBaseP {}; + +TEST_P(AutogradReductionForwardTest, SumForward) { + auto a = createTensor({2, 3}, 1.0f); + auto sum_fn = std::make_shared(1, false); + auto result = sum_fn->Apply({a}); + EXPECT_EQ(result.size(), 1); +} + +TEST_P(AutogradReductionForwardTest, MeanForward) { + auto a = createTensor({2, 3}, 1.0f); + auto mean_fn = std::make_shared(1, false); + auto result = mean_fn->Apply({a}); + EXPECT_EQ(result.size(), 1); +} + +TEST_P(AutogradReductionForwardTest, MaxForward) { + auto a = createTensor({2, 3}, 1.0f); + auto max_fn = std::make_shared(1, false); + auto result = max_fn->Apply({a}); + EXPECT_EQ(result.size(), 1); +} + +TEST_P(AutogradReductionForwardTest, MinForward) { + auto a = createTensor({2, 3}, 1.0f); + auto min_fn = std::make_shared(1, false); + auto result = min_fn->Apply({a}); + EXPECT_EQ(result.size(), 1); +} + +TEST_P(AutogradReductionForwardTest, SumKeepDim) { + auto a = createTensor({2, 3}, 1.0f); + auto sum_fn = std::make_shared(1, true); + auto result = sum_fn->Apply({a}); + EXPECT_EQ(result.size(), 1); +} + +TEST_P(AutogradReductionForwardTest, MeanKeepDim) { + auto a = createTensor({2, 3}, 1.0f); + auto mean_fn = std::make_shared(1, true); + auto result = mean_fn->Apply({a}); + EXPECT_EQ(result.size(), 1); +} + +INFINI_TRAIN_REGISTER_TEST(AutogradReductionForwardTest); diff --git a/tests/autograd/test_autograd_softmax_backward.cc b/tests/autograd/test_autograd_softmax_backward.cc new file mode 100644 index 00000000..b07ac833 --- /dev/null +++ b/tests/autograd/test_autograd_softmax_backward.cc @@ -0,0 +1,32 @@ +#include + +#include + +#include "infini_train/include/tensor.h" +#include "infini_train/include/nn/parallel/global.h" +#include "infini_train/include/autograd/softmax.h" +#include "test_utils.h" + +using namespace infini_train; + +class AutogradSoftmaxBackwardTest : public infini_train::test::AutogradTestBaseP {}; + +TEST_P(AutogradSoftmaxBackwardTest, SoftmaxBackward) { + auto a = createTensor({2, 3}, 1.0f); + auto softmax_fn = std::make_shared(1); + auto result = softmax_fn->Apply({a}); + auto grad = createTensor({2, 3}, 1.0f); + auto grad_inputs = softmax_fn->Backward({grad}); + EXPECT_EQ(grad_inputs.size(), 1); +} + +TEST_P(AutogradSoftmaxBackwardTest, SoftmaxBackwardDim0) { + auto a = createTensor({4, 3}, 1.0f); + auto softmax_fn = std::make_shared(0); + auto result = softmax_fn->Apply({a}); + auto grad = createTensor({4, 3}, 1.0f); + auto grad_inputs = softmax_fn->Backward({grad}); + EXPECT_EQ(grad_inputs.size(), 1); +} + +INFINI_TRAIN_REGISTER_TEST(AutogradSoftmaxBackwardTest); diff --git a/tests/autograd/test_autograd_softmax_forward.cc b/tests/autograd/test_autograd_softmax_forward.cc new file mode 100644 index 00000000..42939fc9 --- /dev/null +++ b/tests/autograd/test_autograd_softmax_forward.cc @@ -0,0 +1,38 @@ +#include + +#include + +#include "infini_train/include/tensor.h" +#include "infini_train/include/nn/parallel/global.h" +#include "infini_train/include/autograd/softmax.h" +#include "test_utils.h" + +using namespace infini_train; + +class AutogradSoftmaxForwardTest : public infini_train::test::AutogradTestBaseP {}; + +TEST_P(AutogradSoftmaxForwardTest, SoftmaxForward) { + auto a = createTensor({2, 3}, 1.0f); + auto softmax_fn = std::make_shared(1); + auto result = softmax_fn->Apply({a}); + EXPECT_EQ(result.size(), 1); + EXPECT_EQ(result[0]->Dims(), (std::vector{2, 3})); +} + +TEST_P(AutogradSoftmaxForwardTest, SoftmaxDim0) { + auto a = createTensor({4, 3}, 1.0f); + auto softmax_fn = std::make_shared(0); + auto result = softmax_fn->Apply({a}); + EXPECT_EQ(result.size(), 1); + EXPECT_EQ(result[0]->Dims(), (std::vector{4, 3})); +} + +TEST_P(AutogradSoftmaxForwardTest, SoftmaxLastDim) { + auto a = createTensor({2, 3, 4}, 1.0f); + auto softmax_fn = std::make_shared(2); + auto result = softmax_fn->Apply({a}); + EXPECT_EQ(result.size(), 1); + EXPECT_EQ(result[0]->Dims(), (std::vector{2, 3, 4})); +} + +INFINI_TRAIN_REGISTER_TEST(AutogradSoftmaxForwardTest); diff --git a/tests/autograd/test_autograd_transform_backward.cc b/tests/autograd/test_autograd_transform_backward.cc new file mode 100644 index 00000000..2a542d4e --- /dev/null +++ b/tests/autograd/test_autograd_transform_backward.cc @@ -0,0 +1,23 @@ +#include + +#include + +#include "infini_train/include/tensor.h" +#include "infini_train/include/nn/parallel/global.h" +#include "infini_train/include/autograd/transform.h" +#include "test_utils.h" + +using namespace infini_train; + +class AutogradTransformBackwardTest : public infini_train::test::AutogradTestBaseP {}; + +TEST_P(AutogradTransformBackwardTest, TransposeBackward) { + auto a = createTensor({2, 3}, 1.0f); + auto transpose_fn = std::make_shared(0, 1); + auto result = transpose_fn->Apply({a}); + auto grad = createTensor({3, 2}, 1.0f); + auto grad_inputs = transpose_fn->Backward({grad}); + EXPECT_EQ(grad_inputs.size(), 1); +} + +INFINI_TRAIN_REGISTER_TEST(AutogradTransformBackwardTest); diff --git a/tests/autograd/test_autograd_transform_forward.cc b/tests/autograd/test_autograd_transform_forward.cc new file mode 100644 index 00000000..bc4da44f --- /dev/null +++ b/tests/autograd/test_autograd_transform_forward.cc @@ -0,0 +1,72 @@ +#include + +#include + +#include "infini_train/include/tensor.h" +#include "infini_train/include/nn/parallel/global.h" +#include "infini_train/include/autograd/transform.h" +#include "infini_train/include/autograd/misc.h" +#include "test_utils.h" + +using namespace infini_train; + +class AutogradTransformForwardTest : public infini_train::test::AutogradTestBaseP {}; + +TEST_P(AutogradTransformForwardTest, TransposeForward) { + auto a = createTensor({2, 3}, 1.0f); + auto transpose_fn = std::make_shared(0, 1); + auto result = transpose_fn->Apply({a}); + EXPECT_EQ(result.size(), 1); + EXPECT_EQ(result[0]->Dims(), (std::vector{3, 2})); +} + +TEST_P(AutogradTransformForwardTest, SliceForward) { + auto a = createTensor({4, 4}, 1.0f); + auto slice_fn = std::make_shared( + std::vector{1, 1}, + std::vector{3, 3}, + std::vector{1, 1}); + auto result = slice_fn->Apply({a}); + EXPECT_EQ(result.size(), 1); +} + +TEST_P(AutogradTransformForwardTest, SplitForward) { + auto a = createTensor({4, 4}, 1.0f); + auto split_fn = std::make_shared(2, 0); + auto result = split_fn->Apply({a}); + EXPECT_EQ(result.size(), 2); +} + +TEST_P(AutogradTransformForwardTest, ConcatForward) { + auto a = createTensor({2, 2}, 1.0f); + auto b = createTensor({2, 2}, 2.0f); + auto concat_fn = std::make_shared(0); + auto result = concat_fn->Apply({a, b}); + EXPECT_EQ(result.size(), 1); + EXPECT_EQ(result[0]->Dims(), (std::vector{4, 2})); +} + +TEST_P(AutogradTransformForwardTest, StackForward) { + auto a = createTensor({2, 3}, 1.0f); + auto b = createTensor({2, 3}, 2.0f); + auto stack_fn = std::make_shared(0); + auto result = stack_fn->Apply({a, b}); + EXPECT_EQ(result.size(), 1); + EXPECT_EQ(result[0]->Dims(), (std::vector{2, 2, 3})); +} + +TEST_P(AutogradTransformForwardTest, TrilForward) { + auto a = createTensor({3, 3}, 1.0f); + auto tril_fn = std::make_shared(0); + auto result = tril_fn->Apply({a}); + EXPECT_EQ(result.size(), 1); +} + +TEST_P(AutogradTransformForwardTest, TriuForward) { + auto a = createTensor({3, 3}, 1.0f); + auto triu_fn = std::make_shared(0); + auto result = triu_fn->Apply({a}); + EXPECT_EQ(result.size(), 1); +} + +INFINI_TRAIN_REGISTER_TEST(AutogradTransformForwardTest); diff --git a/tests/common/CMakeLists.txt b/tests/common/CMakeLists.txt new file mode 100644 index 00000000..3960d474 --- /dev/null +++ b/tests/common/CMakeLists.txt @@ -0,0 +1,4 @@ +# Common test utilities + +add_library(test_utils INTERFACE) +target_include_directories(test_utils INTERFACE ${CMAKE_CURRENT_SOURCE_DIR}) diff --git a/tests/common/test_macros.cmake b/tests/common/test_macros.cmake new file mode 100644 index 00000000..477a668d --- /dev/null +++ b/tests/common/test_macros.cmake @@ -0,0 +1,141 @@ +# ============================================================================ +# InfiniTrain Test Macros +# ============================================================================ +# Unified test configuration interface to reduce boilerplate. +# +# Usage: +# 1. Include this file in tests/CMakeLists.txt +# 2. Use infini_train_add_test macro to register tests +# +# Examples: +# infini_train_add_test( +# test_tensor_create +# SOURCES test_tensor_create.cc +# LABELS cpu cuda +# ) +# ============================================================================ + +include_guard(GLOBAL) + +# Path to this file's directory (tests/common/) +set(TEST_MACROS_DIR "${CMAKE_CURRENT_LIST_DIR}") + +# ----------------------------------------------------------------------------- +# Load GoogleTest module (provides gtest_discover_tests) +# ----------------------------------------------------------------------------- +include(GoogleTest) + +# ----------------------------------------------------------------------------- +# infini_train_add_test - Test registration macro +# ----------------------------------------------------------------------------- +# Features: +# 1. Create executable target +# 2. Configure compile options, link libraries, and include paths +# 3. Use gtest_discover_tests to auto-discover test cases +# 4. Set test labels +# +# Arguments: +# SOURCES: Source file list (required) +# LABELS: Test labels, e.g. "cpu" "cuda" "distributed" (optional, default "cpu") +# TEST_FILTER: gtest test filter pattern (optional) +# +# Examples: +# # Single-label test (one liner) +# infini_train_add_test(test_example SOURCES test_example.cc LABELS cpu) +# +# # Filter same binary by label suffix (one call per label) +# infini_train_add_test(test_example SOURCES test_example.cc LABELS cpu TEST_FILTER "-*CUDA*") +# infini_train_add_test(test_example_cuda SOURCES test_example.cc LABELS cuda TEST_FILTER "*CUDA*") +# ----------------------------------------------------------------------------- +macro(infini_train_add_test) + cmake_parse_arguments(ARG "" "TEST_NAME;TEST_FILTER" "SOURCES;LABELS" ${ARGN}) + + if(NOT ARG_TEST_NAME) + set(ARG_TEST_NAME ${ARG_UNPARSED_ARGUMENTS}) + endif() + + if(NOT ARG_SOURCES) + message(FATAL_ERROR "infini_train_add_test: TEST_NAME and SOURCES are required") + endif() + + # 1. Create executable target + add_executable(${ARG_TEST_NAME} ${ARG_SOURCES}) + + # 2. Disable -Werror so tests can run under relaxed warning levels + target_compile_options(${ARG_TEST_NAME} PRIVATE -Wno-error) + + # 3. Link Google Test + target_link_libraries(${ARG_TEST_NAME} PRIVATE + GTest::gtest + GTest::gtest_main + ) + + # 4. Add include paths + target_include_directories(${ARG_TEST_NAME} PRIVATE + ${TEST_MACROS_DIR} + ${glog_SOURCE_DIR}/src + ) + + # 5. Link project library (reuses framework linking strategy) + link_infini_train_exe(${ARG_TEST_NAME}) + + # 6. Auto-discover gtest cases and register as ctest tests + set(labels "cpu") + if(ARG_LABELS) + set(labels "${ARG_LABELS}") + endif() + + if(ARG_TEST_FILTER) + gtest_discover_tests(${ARG_TEST_NAME} + EXTRA_ARGS --gtest_output=xml:%T.xml + TEST_FILTER "${ARG_TEST_FILTER}" + PROPERTIES LABELS "${labels}" + ) + else() + gtest_discover_tests(${ARG_TEST_NAME} + EXTRA_ARGS --gtest_output=xml:%T.xml + PROPERTIES LABELS "${labels}" + ) + endif() +endmacro() + +# ----------------------------------------------------------------------------- +# infini_train_add_test_suite - Register cpu/cuda/distributed targets in one call +# ----------------------------------------------------------------------------- +# Calls infini_train_add_test three times (or fewer) with the correct +# TEST_FILTER and LABELS derived from the label list. +# +# Arguments: +# Base name; each target is named _