Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion include/infinicore/context/context.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
namespace infinicore {

namespace context {
void setDevice(Device device);
void setDevice(Device device, bool force_cpu = false);
Device getDevice();
size_t getDeviceCount(Device::Type type);

Expand Down
4 changes: 4 additions & 0 deletions include/infinicore/device.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,10 @@ class Device {

bool operator!=(const Device &other) const;

inline static Device cpu() {
return Device(Type::CPU, 0);
}

private:
Type type_;

Expand Down
13 changes: 12 additions & 1 deletion include/infinicore/nn/parameter.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,19 @@ class Parameter : public Tensor {

Parameter(const Shape &shape,
const DataType &dtype,
const Device &device);
const Device &device,
Size tp_dim = 0,
Size tp_rank = 0,
Size tp_size = 1);

void load_blob(const void *data);

void load(const Tensor &tensor);

protected:
// Tensor parallel configs
Size tp_dim_; // dimension partitioned
Size tp_rank_; // rank of this partition among tp group
Size tp_size_; // total number of partitions
};
} // namespace infinicore::nn
4 changes: 2 additions & 2 deletions python/infinicore/context.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,13 +23,13 @@ def get_device_count(device_type):
return _infinicore.get_device_count(infinicore.device(device_type)._underlying.type)


def set_device(device):
def set_device(device, force_cpu=False):
"""Set the current active device.
Args:
device: The device to set as active
"""
_infinicore.set_device(device._underlying)
_infinicore.set_device(device._underlying, force_cpu)


def sync_stream():
Expand Down
3 changes: 0 additions & 3 deletions src/infinicore-test/memory_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -709,9 +709,6 @@ TestResult PerformanceTest::testMemoryCopyPerformance() {
return false;
}

// Initialize source data
std::memset(src_memory->data(), 0xAB, data_size);
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

这里之后没有数值检查,只会导致set device地址segfault,故删除


auto start = std::chrono::high_resolution_clock::now();

// Perform memory copies
Expand Down
205 changes: 195 additions & 10 deletions src/infinicore-test/test_nn_module.cc
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,20 @@

namespace infinicore::test {

// Helper function to format shape for logging
inline std::string formatShape(const std::vector<size_t> &shape) {
std::ostringstream oss;
oss << "[";
for (size_t i = 0; i < shape.size(); ++i) {
if (i > 0) {
oss << ", ";
}
oss << shape[i];
}
oss << "]";
return oss.str();
}

// Test 1: Basic module operations (creation, parameters, state_dict, load_state_dict)
TestResult NNModuleTest::testBasicModuleCreation() {
return measureTime("BasicModuleOperations", [this]() {
Expand Down Expand Up @@ -115,6 +129,174 @@ TestResult NNModuleTest::testBasicModuleCreation() {
});
}

TestResult NNModuleTest::testTensorParallelParameters() {
return measureTime("TensorParallelParameters", [this]() {
try {
spdlog::info("==========================================");
spdlog::info("Testing Tensor Parallel Parameters");
spdlog::info("==========================================");

auto device = infinicore::context::getDevice();

spdlog::info("Test Tensor Parallel Parameter");
// Case 1: Partition along dimension 0 (row-wise partitioning)
infinicore::nn::Parameter param_dim0({8, 4}, infinicore::DataType::F32, device, 0, 0, 2);
if (param_dim0->shape() != std::vector<size_t>({4, 4})) {
spdlog::error("TP dim0: Expected shape [4, 4], got [{}]", formatShape(param_dim0->shape()));
return false;
}
spdlog::info("✓ TP dim0 parameter created with correct partitioned shape");
// Case 2: Partition along dimension 1 (column-wise partitioning)
infinicore::nn::Parameter param_dim1({8, 4}, infinicore::DataType::F32, device, 1, 0, 2);
if (param_dim1->shape() != std::vector<size_t>({8, 2})) {
spdlog::error("TP dim1: Expected shape [8, 2], got [{}]", formatShape(param_dim1->shape()));
return false;
}
spdlog::info("✓ TP dim1 parameter created with correct partitioned shape");
spdlog::info("✓ Parameter creation with tensor parallelism passed");

spdlog::info("Test Tensor Parallel Linear Module");
auto w_data = std::vector<float>(32 * 64);
auto b_data = std::vector<float>(32);
for (size_t i = 0; i < 32; ++i) {
for (size_t j = 0; j < 64; ++j) {
w_data[i * 64 + j] = static_cast<float>(j);
}
b_data[i] = static_cast<float>(i);
}
{
spdlog::info("Test tp_size=4 tp_dim=0");
Size tp_size = 4;
Size tp_dim = 0;
std::vector<std::unique_ptr<MockLinearModule>> tp_modules;

for (Size tp_rank = 0; tp_rank < tp_size; ++tp_rank) {
auto module = std::make_unique<MockLinearModule>(64, 32, device, tp_dim, tp_rank, tp_size);
tp_modules.push_back(std::move(module));
}

// Verify each partition has correct shape
for (size_t i = 0; i < tp_modules.size(); ++i) {
const auto &weight = tp_modules[i]->get_weight();
const auto &bias = tp_modules[i]->get_bias();

// Weight should be partitioned along output dimension (dim 0)
if (weight->shape() != std::vector<size_t>({8, 64})) { // 32/4 = 8
spdlog::error("TP rank {}: Weight shape mismatch. Expected [8, 64], got [{}]",
i, formatShape(weight->shape()));
return false;
}

// Bias should be partitioned along output dimension
if (bias->shape() != std::vector<size_t>({8})) { // 32/4 = 8
spdlog::error("TP rank {}: Bias shape mismatch. Expected [8], got [{}]",
i, formatShape(bias->shape()));
return false;
}

spdlog::debug("TP rank {}: weight shape [{}], bias shape [{}]",
i, formatShape(weight->shape()), formatShape(bias->shape()));

tp_modules[i]->load_parameter_from_blob("weight", w_data.data());
tp_modules[i]->load_parameter_from_blob("bias", b_data.data());

auto weight_loaded = infinicore::Tensor::from_blob(
w_data.data(),
{32, 64},
infinicore::DataType::F32,
infinicore::Device::cpu())
->narrow({{0, i * 8, 8}})
->to(device); // Narrow to get the partition
auto bias_loaded = infinicore::Tensor::from_blob(
b_data.data(),
{32},
infinicore::DataType::F32,
infinicore::Device::cpu())
->narrow({{0, i * 8, 8}})
->to(device); // Narrow to get the partition

if (!tensorsAllClose(tp_modules[i]->get_weight(), weight_loaded, 1e-6, 1e-6)) {
spdlog::error("TP rank {}: Weight values do not match after load_parameter_from_blob", i);
return false;
}

if (!tensorsAllClose(tp_modules[i]->get_bias(), bias_loaded, 1e-6, 1e-6)) {
spdlog::error("TP rank {}: Bias values do not match after load_parameter_from_blob", i);
return false;
}
}
}

{
spdlog::info("Test tp_size=4 tp_dim=1");
Size tp_size = 4;
Size tp_dim = 1;
std::vector<std::unique_ptr<MockLinearModule>> tp_modules;

for (Size tp_rank = 0; tp_rank < tp_size; ++tp_rank) {
auto module = std::make_unique<MockLinearModule>(64, 32, device, tp_dim, tp_rank, tp_size);
tp_modules.push_back(std::move(module));
}

// Verify each partition has correct shape
for (size_t i = 0; i < tp_modules.size(); ++i) {
const auto &weight = tp_modules[i]->get_weight();
const auto &bias = tp_modules[i]->get_bias();

// Weight should be partitioned along output dimension (dim 0)
if (weight->shape() != std::vector<size_t>({32, 16})) { // 64/4 = 16
spdlog::error("TP rank {}: Weight shape mismatch. Expected [32, 16], got [{}]",
i, formatShape(weight->shape()));
return false;
}

// Bias should be partitioned along output dimension
if (bias->shape() != std::vector<size_t>({32})) { // Bias not partitioned when tp_dim=1
spdlog::error("TP rank {}: Bias shape mismatch. Expected [32], got [{}]",
i, formatShape(bias->shape()));
return false;
}

spdlog::debug("TP rank {}: weight shape [{}], bias shape [{}]",
i, formatShape(weight->shape()), formatShape(bias->shape()));
;
tp_modules[i]->load_parameter_from_blob("weight", w_data.data());
tp_modules[i]->load_parameter_from_blob("bias", b_data.data());

auto weight_loaded = infinicore::Tensor::from_blob(
w_data.data(),
{32, 64},
infinicore::DataType::F32,
infinicore::Device::cpu())
->narrow({{1, i * 16, 16}})
->to(device); // Narrow to get the partition
auto bias_loaded = infinicore::Tensor::from_blob(
b_data.data(),
{32},
infinicore::DataType::F32,
infinicore::Device::cpu())
->to(device); // Narrow to get the partition
if (!tensorsAllClose(tp_modules[i]->get_weight(), weight_loaded, 1e-6, 1e-6)) {
spdlog::error("TP rank {}: Weight values do not match after load_parameter_from_blob", i);
return false;
}
if (!tensorsAllClose(tp_modules[i]->get_bias(), bias_loaded, 1e-6, 1e-6)) {
spdlog::error("TP rank {}: Bias values do not match after load_parameter_from_blob", i);
return false;
}
}
}

spdlog::info("=== All Tensor Parallel Parameter Tests Passed ===");
return true;

} catch (const std::exception &e) {
spdlog::error("Exception in testTensorParallelParameters: {}", e.what());
return false;
}
});
}

// Test 2: Advanced load state dict functionality (hierarchical modules)
TestResult NNModuleTest::testLoadStateDict() {
return measureTime("AdvancedLoadStateDict", [this]() {
Expand Down Expand Up @@ -384,6 +566,8 @@ TestResult NNModuleTest::testParameterLoading() {
return false;
}

MockLinearModule module_row_parallel(3, 2, infinicore::Device(), 0, 1, 2);

spdlog::info("Parameter loading test passed");
return true;
} catch (const std::exception &e) {
Expand Down Expand Up @@ -1708,16 +1892,17 @@ TestResult NNModuleTest::run() {
<< "InfiniCore nn::Module Test Suite\n"
<< "==============================================" << std::endl;

results.push_back(testBasicModuleCreation()); // Merged: creation + parameters + state_dict + load
results.push_back(testLoadStateDict()); // Advanced: hierarchical modules
results.push_back(testModuleHierarchy()); // Demonstrates hierarchical construction
results.push_back(testParameterLoading()); // Blob loading
results.push_back(testModuleLinear()); // Linear module comprehensive test
results.push_back(testModuleEmbedding()); // Embedding module test
results.push_back(testModuleRMSNorm()); // RMSNorm module test
results.push_back(testModuleRoPE()); // RoPE module test
results.push_back(testDtypeAssertion()); // Dtype assertion test
results.push_back(testTinyLlamaConstruction()); // Comprehensive: TinyLlama model test
results.push_back(testBasicModuleCreation()); // Merged: creation + parameters + state_dict + load
results.push_back(testTensorParallelParameters()); // Tensor-parallel parameters
results.push_back(testLoadStateDict()); // Advanced: hierarchical modules
results.push_back(testModuleHierarchy()); // Demonstrates hierarchical construction
results.push_back(testParameterLoading()); // Blob loading
results.push_back(testModuleLinear()); // Linear module comprehensive test
results.push_back(testModuleEmbedding()); // Embedding module test
results.push_back(testModuleRMSNorm()); // RMSNorm module test
results.push_back(testModuleRoPE()); // RoPE module test
results.push_back(testDtypeAssertion()); // Dtype assertion test
results.push_back(testTinyLlamaConstruction()); // Comprehensive: TinyLlama model test

// Check if all tests passed
bool all_passed = true;
Expand Down
41 changes: 27 additions & 14 deletions src/infinicore-test/test_nn_module.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,17 +26,25 @@ class MockLinearModule : public infinicore::nn::Module {
INFINICORE_NN_PARAMETER(weight);
INFINICORE_NN_PARAMETER(bias);

MockLinearModule(int input_size, int output_size, const infinicore::Device &device)
: input_size_(input_size), output_size_(output_size), device_(device) {
MockLinearModule(int input_size, int output_size, const infinicore::Device &device,
Size tp_dim = 0, Size tp_rank = 0, Size tp_size = 1)
: input_size_(input_size), output_size_(output_size), device_(device),
tp_dim_(tp_dim), tp_rank_(tp_rank), tp_size_(tp_size) {
// Initialize parameters using macros
INFINICORE_NN_PARAMETER_INIT(weight,
({static_cast<size_t>(output_size), static_cast<size_t>(input_size)},
infinicore::DataType::F32,
device));
device,
tp_dim_,
tp_rank_,
tp_size_));
INFINICORE_NN_PARAMETER_INIT(bias,
({static_cast<size_t>(output_size)},
infinicore::DataType::F32,
device));
device,
0,
tp_dim == 0 ? tp_rank_ : 0,
tp_dim == 0 ? tp_size_ : 1));
}

// Simple forward pass (conceptual - would need actual matrix operations)
Expand Down Expand Up @@ -68,6 +76,10 @@ class MockLinearModule : public infinicore::nn::Module {
int input_size_;
int output_size_;
infinicore::Device device_;

Size tp_dim_;
Size tp_rank_;
Size tp_size_;
};

class NNModuleTest : public TestFramework {
Expand All @@ -76,16 +88,17 @@ class NNModuleTest : public TestFramework {
std::string getName() const override { return "NNModuleTest"; }

private:
TestResult testBasicModuleCreation(); // Merged: creation, parameters, state_dict, load_state_dict
TestResult testLoadStateDict(); // Advanced: hierarchical modules
TestResult testModuleHierarchy(); // Demonstrates proper hierarchical construction pattern
TestResult testParameterLoading(); // Test blob parameter loading
TestResult testModuleLinear(); // Comprehensive Linear module test
TestResult testModuleEmbedding(); // Embedding module test
TestResult testModuleRMSNorm(); // RMSNorm module test
TestResult testModuleRoPE(); // RoPE module test
TestResult testDtypeAssertion(); // Test dtype assertions when loading parameters
TestResult testTinyLlamaConstruction(); // Comprehensive: construction + weight loading + validation
TestResult testBasicModuleCreation(); // Merged: creation, parameters, state_dict, load_state_dict
TestResult testTensorParallelParameters(); // Module with tensor parallel parameters
TestResult testLoadStateDict(); // Advanced: hierarchical modules
TestResult testModuleHierarchy(); // Demonstrates proper hierarchical construction pattern
TestResult testParameterLoading(); // Test blob parameter loading
TestResult testModuleLinear(); // Comprehensive Linear module test
TestResult testModuleEmbedding(); // Embedding module test
TestResult testModuleRMSNorm(); // RMSNorm module test
TestResult testModuleRoPE(); // RoPE module test
TestResult testDtypeAssertion(); // Test dtype assertions when loading parameters
TestResult testTinyLlamaConstruction(); // Comprehensive: construction + weight loading + validation
};

} // namespace infinicore::test
Expand Down
10 changes: 7 additions & 3 deletions src/infinicore/context/context_impl.cc
Original file line number Diff line number Diff line change
Expand Up @@ -33,11 +33,15 @@ Runtime *ContextImpl::getCpuRuntime() {
return runtime_table_[int(Device::Type::CPU)][0].get();
}

void ContextImpl::setDevice(Device device) {
void ContextImpl::setDevice(Device device, bool force_cpu) {
if (device == getCurrentRuntime()->device()) {
// Do nothing if the device is already set.
return;
}
if (device == Device(Device::Type::CPU, 0) && !force_cpu) {
// if not forced, no need to switch to CPU device runtime
return;
}

if (runtime_table_[int(device.getType())][device.getIndex()] == nullptr) {
// Lazy initialization of runtime if never set before.
Expand Down Expand Up @@ -83,8 +87,8 @@ ContextImpl::ContextImpl() {

namespace context {

void setDevice(Device device) {
ContextImpl::singleton().setDevice(device);
void setDevice(Device device, bool force_cpu) {
ContextImpl::singleton().setDevice(device, force_cpu);
}

Device getDevice() {
Expand Down
Loading