From f72193d40675050aedecc8f8f3fedf3d03902f61 Mon Sep 17 00:00:00 2001 From: zhuyue Date: Mon, 13 Oct 2025 15:14:07 +0800 Subject: [PATCH 1/3] Add debug function in InfiniCore tensor. --- include/infinicore/tensor.hpp | 4 + python/infinicore/tensor.py | 11 + src/infinicore/pybind11/tensor.hpp | 4 +- src/infinicore/tensor/debug.cc | 376 +++++++++++++++++++++++++++ test/infinicore/op/debug.py | 398 +++++++++++++++++++++++++++++ 5 files changed, 792 insertions(+), 1 deletion(-) create mode 100644 src/infinicore/tensor/debug.cc create mode 100644 test/infinicore/op/debug.py diff --git a/include/infinicore/tensor.hpp b/include/infinicore/tensor.hpp index b3570e9fe..828034c94 100644 --- a/include/infinicore/tensor.hpp +++ b/include/infinicore/tensor.hpp @@ -121,6 +121,10 @@ class TensorImpl : public std::enable_shared_from_this { std::string info() const; + void debug(const std::string &filename) const; + + void debug() const; + /// /// Data Transfer APIs /// diff --git a/python/infinicore/tensor.py b/python/infinicore/tensor.py index 5095768c5..2df6df681 100644 --- a/python/infinicore/tensor.py +++ b/python/infinicore/tensor.py @@ -75,6 +75,17 @@ def permute(self, dims): def view(self, shape): return Tensor(self._underlying.view(shape)) + def debug(self, filename=None): + """Print tensor data or save to file for debugging + + Args: + filename: Optional filename to save raw binary data. If None, prints to stdout. + """ + if filename is None: + self._underlying.debug() + else: + self._underlying.debug(filename) + def empty(size, *, dtype=None, device=None, pin_memory=False): return Tensor( diff --git a/src/infinicore/pybind11/tensor.hpp b/src/infinicore/pybind11/tensor.hpp index 66fa06678..b7e50d561 100644 --- a/src/infinicore/pybind11/tensor.hpp +++ b/src/infinicore/pybind11/tensor.hpp @@ -17,7 +17,7 @@ inline void bind(py::module &m) { .def_property_readonly("dtype", [](const Tensor &tensor) { return tensor->dtype(); }) .def_property_readonly("device", [](const Tensor &tensor) { return tensor->device(); }) - .def("data_ptr", [](const Tensor &tensor) { return tensor->data(); }) + .def("data_ptr", [](const Tensor &tensor) { return reinterpret_cast(tensor->data()); }) .def("size", [](const Tensor &tensor, std::size_t dim) { return tensor->size(dim); }) .def("stride", [](const Tensor &tensor, std::size_t dim) { return tensor->stride(dim); }) .def("numel", [](const Tensor &tensor) { return tensor->numel(); }) @@ -25,6 +25,8 @@ inline void bind(py::module &m) { .def("is_contiguous", [](const Tensor &tensor) { return tensor->is_contiguous(); }) .def("is_pinned", [](const Tensor &tensor) { return tensor->is_pinned(); }) .def("info", [](const Tensor &tensor) { return tensor->info(); }) + .def("debug", [](const Tensor &tensor) { return tensor->debug(); }) + .def("debug", [](const Tensor &tensor, const std::string &filename) { return tensor->debug(filename); }) .def("copy_", [](Tensor &tensor, const Tensor &other) { tensor->copy_from(other); }) .def("to", [](const Tensor &tensor, const Device &device) { return tensor->to(device); }) diff --git a/src/infinicore/tensor/debug.cc b/src/infinicore/tensor/debug.cc new file mode 100644 index 000000000..6a93cf628 --- /dev/null +++ b/src/infinicore/tensor/debug.cc @@ -0,0 +1,376 @@ +#include "infinicore/context/context.hpp" +#include "infinicore/dtype.hpp" +#include "infinicore/tensor.hpp" + +#include +#include +#include +#include + +namespace infinicore { + +inline float f16_to_f32(uint16_t h) { + uint32_t sign = (h & 0x8000) << 16; + int32_t exponent = (h >> 10) & 0x1F; + uint32_t mantissa = h & 0x3FF; + + uint32_t f32; + if (exponent == 31) { + if (mantissa != 0) { + f32 = sign | 0x7F800000 | (mantissa << 13); + } else { + f32 = sign | 0x7F800000; + } + } else if (exponent == 0) { + if (mantissa == 0) { + f32 = sign; + } else { + exponent = -14; + while ((mantissa & 0x400) == 0) { + mantissa <<= 1; + exponent--; + } + mantissa &= 0x3FF; + f32 = sign | ((exponent + 127) << 23) | (mantissa << 13); + } + } else { + f32 = sign | ((exponent + 127 - 15) << 23) | (mantissa << 13); + } + + float result; + std::memcpy(&result, &f32, sizeof(result)); + return result; +} + +inline float bf16_to_f32(uint16_t val) { + uint32_t bits32 = static_cast(val) << 16; + float out; + std::memcpy(&out, &bits32, sizeof(out)); + return out; +} + +// Template function for printing data recursively +template +void print_data(const T *data, const Shape &shape, const Strides &strides, size_t dim) { + if (dim == shape.size() - 1) { + for (size_t i = 0; i < shape[dim]; i++) { + std::cout << data[i * strides[dim]] << " "; + } + std::cout << std::endl; + } else if (dim < shape.size() - 1) { + for (size_t i = 0; i < shape[dim]; i++) { + print_data(data + i * strides[dim], shape, strides, dim + 1); + } + } +} + +// Specialization for F16 (uint16_t) +template <> +void print_data(const uint16_t *data, const Shape &shape, const Strides &strides, size_t dim) { + if (dim == shape.size() - 1) { + for (size_t i = 0; i < shape[dim]; i++) { + std::cout << f16_to_f32(data[i * strides[dim]]) << " "; + } + std::cout << std::endl; + } else if (dim < shape.size() - 1) { + for (size_t i = 0; i < shape[dim]; i++) { + print_data(data + i * strides[dim], shape, strides, dim + 1); + } + } +} + +// Function for printing BF16 data +void print_data_bf16(const uint16_t *data, const Shape &shape, const Strides &strides, size_t dim) { + if (dim == shape.size() - 1) { + for (size_t i = 0; i < shape[dim]; i++) { + std::cout << bf16_to_f32(data[i * strides[dim]]) << " "; + } + std::cout << std::endl; + } else if (dim < shape.size() - 1) { + for (size_t i = 0; i < shape[dim]; i++) { + print_data_bf16(data + i * strides[dim], shape, strides, dim + 1); + } + } +} + +void TensorImpl::debug(const std::string &filename) const { + // Synchronize device if needed + context::syncDevice(); + + std::cout << info() << std::endl; + + const std::byte *cpu_data = nullptr; + std::byte *allocated_memory = nullptr; + + // Copy data to CPU if not already on CPU + if (this->device().getType() != Device::Type::CPU) { + size_t mem_size = this->numel() * dsize(this->dtype()); + allocated_memory = new std::byte[mem_size]; + context::memcpyD2H(allocated_memory, this->data(), mem_size); + cpu_data = allocated_memory; + } else { + cpu_data = this->data(); + } + + // If filename is provided, save to file + if (!filename.empty()) { + // Determine file format based on extension + bool is_text_format = false; + size_t dot_pos = filename.find_last_of('.'); + if (dot_pos != std::string::npos) { + std::string ext = filename.substr(dot_pos); + is_text_format = (ext == ".txt"); + } + + if (is_text_format) { + // Save as text format + std::ofstream outFile(filename); + if (!outFile) { + std::cerr << "Error opening file for writing: " << filename << "\n"; + if (allocated_memory) { + delete[] allocated_memory; + } + return; + } + + // Write header with tensor information + outFile << "# Tensor Debug Output\n"; + outFile << "# Shape: ["; + for (size_t i = 0; i < this->shape().size(); ++i) { + outFile << this->shape()[i]; + if (i < this->shape().size() - 1) { + outFile << ", "; + } + } + outFile << "]\n"; + outFile << "# Strides: ["; + for (size_t i = 0; i < this->strides().size(); ++i) { + outFile << this->strides()[i]; + if (i < this->strides().size() - 1) { + outFile << ", "; + } + } + outFile << "]\n"; + outFile << "# Dtype: " << toString(this->dtype()) << "\n"; + outFile << "# Contiguous: " << (this->is_contiguous() ? "Yes" : "No") << "\n"; + outFile << "# Elements: " << this->numel() << "\n"; + outFile << "#\n"; + + // Helper function to write data recursively + std::function write_data; + + switch (this->dtype()) { + case DataType::F16: + write_data = [&write_data](const std::byte *data, const Shape &shape, const Strides &strides, size_t dim, std::ofstream &out) { + const uint16_t *ptr = reinterpret_cast(data); + if (dim == shape.size() - 1) { + for (size_t i = 0; i < shape[dim]; i++) { + out << f16_to_f32(ptr[i * strides[dim]]); + if (i < shape[dim] - 1) { + out << " "; + } + } + out << "\n"; + } else { + for (size_t i = 0; i < shape[dim]; i++) { + write_data(data + i * strides[dim] * sizeof(uint16_t), shape, strides, dim + 1, out); + } + } + }; + break; + case DataType::F32: + write_data = [&write_data](const std::byte *data, const Shape &shape, const Strides &strides, size_t dim, std::ofstream &out) { + const float *ptr = reinterpret_cast(data); + if (dim == shape.size() - 1) { + for (size_t i = 0; i < shape[dim]; i++) { + out << ptr[i * strides[dim]]; + if (i < shape[dim] - 1) { + out << " "; + } + } + out << "\n"; + } else { + for (size_t i = 0; i < shape[dim]; i++) { + write_data(data + i * strides[dim] * sizeof(float), shape, strides, dim + 1, out); + } + } + }; + break; + case DataType::F64: + write_data = [&write_data](const std::byte *data, const Shape &shape, const Strides &strides, size_t dim, std::ofstream &out) { + const double *ptr = reinterpret_cast(data); + if (dim == shape.size() - 1) { + for (size_t i = 0; i < shape[dim]; i++) { + out << ptr[i * strides[dim]]; + if (i < shape[dim] - 1) { + out << " "; + } + } + out << "\n"; + } else { + for (size_t i = 0; i < shape[dim]; i++) { + write_data(data + i * strides[dim] * sizeof(double), shape, strides, dim + 1, out); + } + } + }; + break; + case DataType::I32: + write_data = [&write_data](const std::byte *data, const Shape &shape, const Strides &strides, size_t dim, std::ofstream &out) { + const int32_t *ptr = reinterpret_cast(data); + if (dim == shape.size() - 1) { + for (size_t i = 0; i < shape[dim]; i++) { + out << ptr[i * strides[dim]]; + if (i < shape[dim] - 1) { + out << " "; + } + } + out << "\n"; + } else { + for (size_t i = 0; i < shape[dim]; i++) { + write_data(data + i * strides[dim] * sizeof(int32_t), shape, strides, dim + 1, out); + } + } + }; + break; + case DataType::I64: + write_data = [&write_data](const std::byte *data, const Shape &shape, const Strides &strides, size_t dim, std::ofstream &out) { + const int64_t *ptr = reinterpret_cast(data); + if (dim == shape.size() - 1) { + for (size_t i = 0; i < shape[dim]; i++) { + out << ptr[i * strides[dim]]; + if (i < shape[dim] - 1) { + out << " "; + } + } + out << "\n"; + } else { + for (size_t i = 0; i < shape[dim]; i++) { + write_data(data + i * strides[dim] * sizeof(int64_t), shape, strides, dim + 1, out); + } + } + }; + break; + case DataType::BF16: + write_data = [&write_data](const std::byte *data, const Shape &shape, const Strides &strides, size_t dim, std::ofstream &out) { + const uint16_t *ptr = reinterpret_cast(data); + if (dim == shape.size() - 1) { + for (size_t i = 0; i < shape[dim]; i++) { + out << bf16_to_f32(ptr[i * strides[dim]]); + if (i < shape[dim] - 1) { + out << " "; + } + } + out << "\n"; + } else { + for (size_t i = 0; i < shape[dim]; i++) { + write_data(data + i * strides[dim] * sizeof(uint16_t), shape, strides, dim + 1, out); + } + } + }; + break; + default: + outFile << "# Unsupported data type for text output\n"; + outFile.close(); + if (allocated_memory) { + delete[] allocated_memory; + } + return; + } + + // Write the actual data + write_data(cpu_data, this->shape(), this->strides(), 0, outFile); + + outFile.close(); + std::cout << "Data written to text file: " << filename << "\n"; + } else { + // Save as binary format (default) + std::ofstream outFile(filename, std::ios::binary); + if (!outFile) { + std::cerr << "Error opening file for writing: " << filename << "\n"; + if (allocated_memory) { + delete[] allocated_memory; + } + return; + } + size_t mem_size = this->numel() * dsize(this->dtype()); + outFile.write(reinterpret_cast(cpu_data), mem_size); + outFile.close(); + std::cout << "Data written to binary file: " << filename << "\n"; + } + + if (allocated_memory) { + delete[] allocated_memory; + } + return; + } + + // Print data based on dtype + switch (this->dtype()) { + case DataType::F16: + print_data(reinterpret_cast(cpu_data), + this->shape(), this->strides(), 0); + break; + case DataType::F32: + print_data(reinterpret_cast(cpu_data), + this->shape(), this->strides(), 0); + break; + case DataType::F64: + print_data(reinterpret_cast(cpu_data), + this->shape(), this->strides(), 0); + break; + case DataType::U64: + print_data(reinterpret_cast(cpu_data), + this->shape(), this->strides(), 0); + break; + case DataType::I64: + print_data(reinterpret_cast(cpu_data), + this->shape(), this->strides(), 0); + break; + case DataType::U32: + print_data(reinterpret_cast(cpu_data), + this->shape(), this->strides(), 0); + break; + case DataType::I32: + print_data(reinterpret_cast(cpu_data), + this->shape(), this->strides(), 0); + break; + case DataType::U16: + print_data(reinterpret_cast(cpu_data), + this->shape(), this->strides(), 0); + break; + case DataType::I16: + print_data(reinterpret_cast(cpu_data), + this->shape(), this->strides(), 0); + break; + case DataType::U8: + print_data(reinterpret_cast(cpu_data), + this->shape(), this->strides(), 0); + break; + case DataType::I8: + print_data(reinterpret_cast(cpu_data), + this->shape(), this->strides(), 0); + break; + case DataType::BF16: + print_data_bf16(reinterpret_cast(cpu_data), + this->shape(), this->strides(), 0); + break; + case DataType::BOOL: + print_data(reinterpret_cast(cpu_data), + this->shape(), this->strides(), 0); + break; + default: + std::cout << "Unsupported data type for debug" << std::endl; + break; + } + + // Clean up allocated memory + if (allocated_memory) { + delete[] allocated_memory; + } +} + +void TensorImpl::debug() const { + this->debug(""); +} + +} // namespace infinicore diff --git a/test/infinicore/op/debug.py b/test/infinicore/op/debug.py new file mode 100644 index 000000000..5db66bc44 --- /dev/null +++ b/test/infinicore/op/debug.py @@ -0,0 +1,398 @@ +#!/usr/bin/env python3 +""" +Tensor Debug 功能测试脚本 + +简单测试 debug 功能是否正常工作 +""" + +import torch +import infinicore +import sys +import os +import numpy as np + + +def test_basic_debug(): + """测试基本的 debug 打印功能""" + print("\n" + "=" * 80) + print("测试 1: 基本 debug 打印") + print("=" * 80) + + device = infinicore.device("cpu", 0) + + # 测试 float32 + print("\n--- Float32 张量 (2x3) ---") + torch_tensor = torch.tensor([[1.0, 2.0, 3.0], + [4.0, 5.0, 6.0]], dtype=torch.float32) + infini_tensor = infinicore.from_blob( + torch_tensor.data_ptr(), + list(torch_tensor.shape), + dtype=infinicore.float32, + device=device + ) + infini_tensor.debug() + print("✓ Float32 打印成功") + + # 测试 int32 + print("\n--- Int32 张量 (2x2) ---") + torch_i32 = torch.tensor([[1, 2], [3, 4]], dtype=torch.int32) + infini_i32 = infinicore.from_blob( + torch_i32.data_ptr(), + list(torch_i32.shape), + dtype=infinicore.int32, + device=device + ) + infini_i32.debug() + print("✓ Int32 打印成功") + + +def test_save_to_file(): + """测试保存到文件""" + print("\n" + "=" * 80) + print("测试 2: 保存张量到文件") + print("=" * 80) + + device = infinicore.device("cpu", 0) + + # 创建张量 + torch_tensor = torch.arange(1, 13, dtype=torch.float32).reshape(3, 4) + print("\n原始张量:") + print(torch_tensor) + + infini_tensor = infinicore.from_blob( + torch_tensor.data_ptr(), + list(torch_tensor.shape), + dtype=infinicore.float32, + device=device + ) + + # 保存到文件 + filename = "/tmp/tensor_debug_test.bin" + print(f"\n保存到: {filename}") + infini_tensor.debug(filename) + + # 验证文件 + if os.path.exists(filename): + file_size = os.path.getsize(filename) + expected_size = 12 * 4 # 12 个 float32 + assert file_size == expected_size, f"文件大小不匹配: {file_size} vs {expected_size}" + + # 读取验证 + loaded = np.fromfile(filename, dtype=np.float32).reshape(3, 4) + print("\n从文件读取:") + print(loaded) + + os.remove(filename) + print("✓ 文件保存和读取成功") + else: + raise RuntimeError("文件未创建") + + +def test_multidimensional(): + """测试多维张量""" + print("\n" + "=" * 80) + print("测试 3: 多维张量") + print("=" * 80) + + device = infinicore.device("cpu", 0) + + # 3D 张量 + print("\n--- 3D 张量 (2x2x3) ---") + torch_3d = torch.arange(1, 13, dtype=torch.float32).reshape(2, 2, 3) + print("PyTorch 张量:") + print(torch_3d) + + infini_3d = infinicore.from_blob( + torch_3d.data_ptr(), + list(torch_3d.shape), + dtype=infinicore.float32, + device=device + ) + + print("\nInfiniCore debug 输出:") + infini_3d.debug() + print("✓ 3D 张量打印成功") + + +def test_infinicore_created(): + """测试 InfiniCore 创建的张量""" + print("\n" + "=" * 80) + print("测试 4: InfiniCore 创建的张量") + print("=" * 80) + + device = infinicore.device("cpu", 0) + + # ones 张量 + print("\n--- ones 张量 (2x3) ---") + ones_tensor = infinicore.ones([2, 3], dtype=infinicore.float32, device=device) + ones_tensor.debug() + print("✓ ones 张量打印成功") + + # zeros 张量 + print("\n--- zeros 张量 (3x2) ---") + zeros_tensor = infinicore.zeros([3, 2], dtype=infinicore.float32, device=device) + zeros_tensor.debug() + print("✓ zeros 张量打印成功") + + +def test_different_dtypes(): + """测试不同数据类型""" + print("\n" + "=" * 80) + print("测试 5: 不同数据类型") + print("=" * 80) + + device = infinicore.device("cpu", 0) + + dtypes = [ + (infinicore.float32, torch.float32, "Float32"), + (infinicore.int32, torch.int32, "Int32"), + (infinicore.int64, torch.int64, "Int64"), + ] + + for infini_dtype, torch_dtype, name in dtypes: + print(f"\n--- {name} ---") + torch_tensor = torch.arange(1, 7, dtype=torch_dtype).reshape(2, 3) + infini_tensor = infinicore.from_blob( + torch_tensor.data_ptr(), + list(torch_tensor.shape), + dtype=infini_dtype, + device=device + ) + infini_tensor.debug() + print(f"✓ {name} 测试通过") + + +def test_text_format(): + """测试文本格式保存""" + print("\n" + "=" * 80) + print("测试 6: 文本格式保存 (.txt)") + print("=" * 80) + + device = infinicore.device("cpu", 0) + + # 创建张量 + torch_tensor = torch.arange(1, 13, dtype=torch.float32).reshape(3, 4) + print("\n原始张量:") + print(torch_tensor) + + infini_tensor = infinicore.from_blob( + torch_tensor.data_ptr(), + list(torch_tensor.shape), + dtype=infinicore.float32, + device=device + ) + + # 保存为文本文件 + txt_filename = "/tmp/tensor_debug_test.txt" + print(f"\n保存为文本格式: {txt_filename}") + infini_tensor.debug(txt_filename) + + # 验证文本文件 + if os.path.exists(txt_filename): + print("\n文本文件内容:") + with open(txt_filename, 'r') as f: + content = f.read() + print(content) + + # 1. 验证元数据 + assert "# Tensor Debug Output" in content, "文本文件缺少标题" + assert "# Shape: [3, 4]" in content, "文本文件缺少形状信息" + assert "# Dtype: F32" in content, "文本文件缺少类型信息" + print("✓ 元数据验证通过") + + # 2. 提取并验证数值数据 + lines = content.split('\n') + data_lines = [line.strip() for line in lines + if line.strip() and not line.startswith('#')] + + print(f"\n提取到 {len(data_lines)} 行数据") + + # 解析数值 + loaded_data = [] + for i, line in enumerate(data_lines): + row = [float(x) for x in line.split()] + loaded_data.append(row) + print(f" 第 {i+1} 行: {row}") + + # 转换为 numpy 数组 + loaded_array = np.array(loaded_data, dtype=np.float32) + + # 3. 与原始数据对比 + expected = torch_tensor.numpy() + assert loaded_array.shape == expected.shape, \ + f"形状不匹配: {loaded_array.shape} vs {expected.shape}" + assert np.allclose(loaded_array, expected), \ + f"数值不匹配:\n加载的数据:\n{loaded_array}\n期望的数据:\n{expected}" + + print("✓ 数值验证通过") + + os.remove(txt_filename) + print("✓ 文本格式保存测试通过") + else: + raise RuntimeError("文本文件未创建") + + +def test_binary_format(): + """测试二进制格式保存""" + print("\n" + "=" * 80) + print("测试 7: 二进制格式保存 (.bin)") + print("=" * 80) + + device = infinicore.device("cpu", 0) + + # 创建张量 + torch_tensor = torch.arange(1, 13, dtype=torch.float32).reshape(3, 4) + print("\n原始张量:") + print(torch_tensor) + + infini_tensor = infinicore.from_blob( + torch_tensor.data_ptr(), + list(torch_tensor.shape), + dtype=infinicore.float32, + device=device + ) + + # 保存为二进制文件 + bin_filename = "/tmp/tensor_debug_test.bin" + print(f"\n保存为二进制格式: {bin_filename}") + infini_tensor.debug(bin_filename) + + # 验证二进制文件 + if os.path.exists(bin_filename): + file_size = os.path.getsize(bin_filename) + expected_size = 12 * 4 # 12 个 float32 + assert file_size == expected_size, \ + f"二进制文件大小不匹配: {file_size} vs {expected_size}" + + # 读取并验证数据 + loaded = np.fromfile(bin_filename, dtype=np.float32).reshape(3, 4) + print("\n从二进制文件读取:") + print(loaded) + + # 验证数据正确性 + assert np.allclose(loaded, torch_tensor.numpy()), "数据不匹配" + + os.remove(bin_filename) + print("✓ 二进制格式保存测试通过") + else: + raise RuntimeError("二进制文件未创建") + + +def test_format_comparison(): + """对比不同格式""" + print("\n" + "=" * 80) + print("测试 8: 对比不同格式") + print("=" * 80) + + device = infinicore.device("cpu", 0) + + # 创建小张量用于对比 + torch_tensor = torch.tensor([[1.5, 2.5], [3.5, 4.5]], dtype=torch.float32) + print("\n原始张量:") + print(torch_tensor) + + infini_tensor = infinicore.from_blob( + torch_tensor.data_ptr(), + list(torch_tensor.shape), + dtype=infinicore.float32, + device=device + ) + + bin_file = "/tmp/compare_test.bin" + txt_file = "/tmp/compare_test.txt" + + # 保存两种格式 + print("\n保存两种格式...") + infini_tensor.debug(bin_file) + infini_tensor.debug(txt_file) + + # 对比文件大小 + bin_size = os.path.getsize(bin_file) + txt_size = os.path.getsize(txt_file) + + print(f"\n文件大小对比:") + print(f" 二进制文件: {bin_size} 字节") + print(f" 文本文件: {txt_size} 字节") + print(f" 文本/二进制比: {txt_size/bin_size:.2f}x") + + # ===== 验证二进制文件 ===== + print("\n验证二进制文件:") + bin_data = np.fromfile(bin_file, dtype=np.float32).reshape(2, 2) + print(f" 读取的数据:\n{bin_data}") + assert np.allclose(bin_data, torch_tensor.numpy()), "二进制数据不匹配" + print(" ✓ 二进制文件数值正确") + + # ===== 验证文本文件 ===== + print("\n验证文本文件:") + with open(txt_file, 'r') as f: + txt_content = f.read() + + # 1. 元数据验证 + assert "# Tensor Debug Output" in txt_content, "缺少标题" + assert "# Shape: [2, 2]" in txt_content, "缺少形状信息" + assert "# Dtype: F32" in txt_content, "缺少类型信息" + print(" ✓ 元数据正确") + + # 2. 数值验证 + lines = txt_content.split('\n') + data_lines = [line.strip() for line in lines + if line.strip() and not line.startswith('#')] + + txt_data = [] + for line in data_lines: + row = [float(x) for x in line.split()] + txt_data.append(row) + + txt_array = np.array(txt_data, dtype=np.float32) + print(f" 读取的数据:\n{txt_array}") + + assert txt_array.shape == torch_tensor.shape, \ + f"文本文件形状不匹配: {txt_array.shape} vs {torch_tensor.shape}" + assert np.allclose(txt_array, torch_tensor.numpy()), \ + f"文本文件数值不匹配" + print(" ✓ 文本文件数值正确") + + # ===== 对比两种格式的数据一致性 ===== + print("\n验证两种格式数据一致性:") + assert np.allclose(bin_data, txt_array), \ + "二进制和文本文件的数据不一致!" + print(" ✓ 两种格式数据完全一致") + + # 清理 + os.remove(bin_file) + os.remove(txt_file) + + print("\n✓ 格式对比测试通过") + + +def main(): + """主测试函数""" + print("\n" + "=" * 80) + print("InfiniCore Tensor Debug 功能测试") + print("=" * 80) + + try: + test_basic_debug() + test_save_to_file() + test_multidimensional() + test_infinicore_created() + test_different_dtypes() + test_text_format() + test_binary_format() + test_format_comparison() + + print("\n" + "=" * 80) + print("✅ 所有测试通过!") + print("=" * 80) + return 0 + + except Exception as e: + print(f"\n❌ 测试失败: {e}") + import traceback + traceback.print_exc() + return 1 + + +if __name__ == "__main__": + sys.exit(main()) + From c7054b7acfdabee4bee52bc2d6a7c81be7078d93 Mon Sep 17 00:00:00 2001 From: zhuyue Date: Tue, 14 Oct 2025 09:59:43 +0800 Subject: [PATCH 2/3] refactor test scripts and remove txt write add large-scale and non-contiguous tensor I/O tests --- src/infinicore/tensor/debug.cc | 274 +++++------- test/infinicore/op/debug.py | 768 ++++++++++++++++++--------------- 2 files changed, 513 insertions(+), 529 deletions(-) diff --git a/src/infinicore/tensor/debug.cc b/src/infinicore/tensor/debug.cc index 6a93cf628..8cf31d592 100644 --- a/src/infinicore/tensor/debug.cc +++ b/src/infinicore/tensor/debug.cc @@ -5,6 +5,8 @@ #include #include #include +#include +#include #include namespace infinicore { @@ -93,6 +95,22 @@ void print_data_bf16(const uint16_t *data, const Shape &shape, const Strides &st } } +// Template function for writing data recursively to binary file (handles non-contiguous tensors) +template +void write_binary_data(std::ofstream &out, const T *data, const Shape &shape, const Strides &strides, size_t dim) { + if (dim == shape.size() - 1) { + // Write the innermost dimension + for (size_t i = 0; i < shape[dim]; i++) { + out.write(reinterpret_cast(&data[i * strides[dim]]), sizeof(T)); + } + } else { + // Recursively process higher dimensions + for (size_t i = 0; i < shape[dim]; i++) { + write_binary_data(out, data + i * strides[dim], shape, strides, dim + 1); + } + } +} + void TensorImpl::debug(const std::string &filename) const { // Synchronize device if needed context::syncDevice(); @@ -100,207 +118,112 @@ void TensorImpl::debug(const std::string &filename) const { std::cout << info() << std::endl; const std::byte *cpu_data = nullptr; - std::byte *allocated_memory = nullptr; + std::unique_ptr allocated_memory; // RAII: 自动管理内存 // Copy data to CPU if not already on CPU if (this->device().getType() != Device::Type::CPU) { - size_t mem_size = this->numel() * dsize(this->dtype()); - allocated_memory = new std::byte[mem_size]; - context::memcpyD2H(allocated_memory, this->data(), mem_size); - cpu_data = allocated_memory; + size_t numel = this->numel(); + size_t element_size = dsize(this->dtype()); + + // 检查乘法溢出 + if (numel > 0 && element_size > std::numeric_limits::max() / numel) { + std::cerr << "Error: Memory size calculation overflow for tensor with " + << numel << " elements of size " << element_size << "\n"; + return; + } + + size_t mem_size = numel * element_size; + allocated_memory = std::make_unique(mem_size); + context::memcpyD2H(allocated_memory.get(), this->data(), mem_size); + cpu_data = allocated_memory.get(); } else { cpu_data = this->data(); } - // If filename is provided, save to file + // If filename is provided, save to binary file if (!filename.empty()) { - // Determine file format based on extension - bool is_text_format = false; - size_t dot_pos = filename.find_last_of('.'); - if (dot_pos != std::string::npos) { - std::string ext = filename.substr(dot_pos); - is_text_format = (ext == ".txt"); + std::ofstream outFile(filename, std::ios::binary); + if (!outFile) { + std::cerr << "Error opening file for writing: " << filename << "\n"; + return; // allocated_memory 会自动释放(RAII) } - if (is_text_format) { - // Save as text format - std::ofstream outFile(filename); - if (!outFile) { - std::cerr << "Error opening file for writing: " << filename << "\n"; - if (allocated_memory) { - delete[] allocated_memory; - } - return; - } - - // Write header with tensor information - outFile << "# Tensor Debug Output\n"; - outFile << "# Shape: ["; - for (size_t i = 0; i < this->shape().size(); ++i) { - outFile << this->shape()[i]; - if (i < this->shape().size() - 1) { - outFile << ", "; - } - } - outFile << "]\n"; - outFile << "# Strides: ["; - for (size_t i = 0; i < this->strides().size(); ++i) { - outFile << this->strides()[i]; - if (i < this->strides().size() - 1) { - outFile << ", "; - } - } - outFile << "]\n"; - outFile << "# Dtype: " << toString(this->dtype()) << "\n"; - outFile << "# Contiguous: " << (this->is_contiguous() ? "Yes" : "No") << "\n"; - outFile << "# Elements: " << this->numel() << "\n"; - outFile << "#\n"; - - // Helper function to write data recursively - std::function write_data; - + // Check if tensor is contiguous - for optimization + if (this->is_contiguous()) { + // Fast path: contiguous tensor, write in one go + size_t mem_size = this->numel() * dsize(this->dtype()); + outFile.write(reinterpret_cast(cpu_data), mem_size); + } else { + // Slow path: non-contiguous tensor, write element by element using strides switch (this->dtype()) { case DataType::F16: - write_data = [&write_data](const std::byte *data, const Shape &shape, const Strides &strides, size_t dim, std::ofstream &out) { - const uint16_t *ptr = reinterpret_cast(data); - if (dim == shape.size() - 1) { - for (size_t i = 0; i < shape[dim]; i++) { - out << f16_to_f32(ptr[i * strides[dim]]); - if (i < shape[dim] - 1) { - out << " "; - } - } - out << "\n"; - } else { - for (size_t i = 0; i < shape[dim]; i++) { - write_data(data + i * strides[dim] * sizeof(uint16_t), shape, strides, dim + 1, out); - } - } - }; + case DataType::BF16: + write_binary_data(outFile, reinterpret_cast(cpu_data), + this->shape(), this->strides(), 0); break; case DataType::F32: - write_data = [&write_data](const std::byte *data, const Shape &shape, const Strides &strides, size_t dim, std::ofstream &out) { - const float *ptr = reinterpret_cast(data); - if (dim == shape.size() - 1) { - for (size_t i = 0; i < shape[dim]; i++) { - out << ptr[i * strides[dim]]; - if (i < shape[dim] - 1) { - out << " "; - } - } - out << "\n"; - } else { - for (size_t i = 0; i < shape[dim]; i++) { - write_data(data + i * strides[dim] * sizeof(float), shape, strides, dim + 1, out); - } - } - }; + write_binary_data(outFile, reinterpret_cast(cpu_data), + this->shape(), this->strides(), 0); break; case DataType::F64: - write_data = [&write_data](const std::byte *data, const Shape &shape, const Strides &strides, size_t dim, std::ofstream &out) { - const double *ptr = reinterpret_cast(data); - if (dim == shape.size() - 1) { - for (size_t i = 0; i < shape[dim]; i++) { - out << ptr[i * strides[dim]]; - if (i < shape[dim] - 1) { - out << " "; - } - } - out << "\n"; - } else { - for (size_t i = 0; i < shape[dim]; i++) { - write_data(data + i * strides[dim] * sizeof(double), shape, strides, dim + 1, out); - } - } - }; + write_binary_data(outFile, reinterpret_cast(cpu_data), + this->shape(), this->strides(), 0); break; - case DataType::I32: - write_data = [&write_data](const std::byte *data, const Shape &shape, const Strides &strides, size_t dim, std::ofstream &out) { - const int32_t *ptr = reinterpret_cast(data); - if (dim == shape.size() - 1) { - for (size_t i = 0; i < shape[dim]; i++) { - out << ptr[i * strides[dim]]; - if (i < shape[dim] - 1) { - out << " "; - } - } - out << "\n"; - } else { - for (size_t i = 0; i < shape[dim]; i++) { - write_data(data + i * strides[dim] * sizeof(int32_t), shape, strides, dim + 1, out); - } - } - }; + case DataType::U64: + write_binary_data(outFile, reinterpret_cast(cpu_data), + this->shape(), this->strides(), 0); break; case DataType::I64: - write_data = [&write_data](const std::byte *data, const Shape &shape, const Strides &strides, size_t dim, std::ofstream &out) { - const int64_t *ptr = reinterpret_cast(data); - if (dim == shape.size() - 1) { - for (size_t i = 0; i < shape[dim]; i++) { - out << ptr[i * strides[dim]]; - if (i < shape[dim] - 1) { - out << " "; - } - } - out << "\n"; - } else { - for (size_t i = 0; i < shape[dim]; i++) { - write_data(data + i * strides[dim] * sizeof(int64_t), shape, strides, dim + 1, out); - } - } - }; + write_binary_data(outFile, reinterpret_cast(cpu_data), + this->shape(), this->strides(), 0); break; - case DataType::BF16: - write_data = [&write_data](const std::byte *data, const Shape &shape, const Strides &strides, size_t dim, std::ofstream &out) { - const uint16_t *ptr = reinterpret_cast(data); - if (dim == shape.size() - 1) { - for (size_t i = 0; i < shape[dim]; i++) { - out << bf16_to_f32(ptr[i * strides[dim]]); - if (i < shape[dim] - 1) { - out << " "; - } - } - out << "\n"; - } else { - for (size_t i = 0; i < shape[dim]; i++) { - write_data(data + i * strides[dim] * sizeof(uint16_t), shape, strides, dim + 1, out); - } - } - }; + case DataType::U32: + write_binary_data(outFile, reinterpret_cast(cpu_data), + this->shape(), this->strides(), 0); + break; + case DataType::I32: + write_binary_data(outFile, reinterpret_cast(cpu_data), + this->shape(), this->strides(), 0); + break; + case DataType::U16: + write_binary_data(outFile, reinterpret_cast(cpu_data), + this->shape(), this->strides(), 0); + break; + case DataType::I16: + write_binary_data(outFile, reinterpret_cast(cpu_data), + this->shape(), this->strides(), 0); + break; + case DataType::U8: + write_binary_data(outFile, reinterpret_cast(cpu_data), + this->shape(), this->strides(), 0); + break; + case DataType::I8: + write_binary_data(outFile, reinterpret_cast(cpu_data), + this->shape(), this->strides(), 0); + break; + case DataType::BOOL: + // 布尔类型特殊处理:转换为 uint8_t 以保证跨平台一致性 + write_binary_data(outFile, reinterpret_cast(cpu_data), + this->shape(), this->strides(), 0); break; default: - outFile << "# Unsupported data type for text output\n"; - outFile.close(); - if (allocated_memory) { - delete[] allocated_memory; - } + std::cerr << "Unsupported data type for binary output\n"; return; } + } - // Write the actual data - write_data(cpu_data, this->shape(), this->strides(), 0, outFile); - - outFile.close(); - std::cout << "Data written to text file: " << filename << "\n"; - } else { - // Save as binary format (default) - std::ofstream outFile(filename, std::ios::binary); - if (!outFile) { - std::cerr << "Error opening file for writing: " << filename << "\n"; - if (allocated_memory) { - delete[] allocated_memory; - } - return; - } - size_t mem_size = this->numel() * dsize(this->dtype()); - outFile.write(reinterpret_cast(cpu_data), mem_size); - outFile.close(); - std::cout << "Data written to binary file: " << filename << "\n"; + // 显式关闭文件并检查是否成功 + outFile.close(); + if (!outFile) { + std::cerr << "Error: Failed to write data to file: " << filename << "\n"; + return; } - if (allocated_memory) { - delete[] allocated_memory; + std::cout << "Data written to binary file: " << filename; + if (!this->is_contiguous()) { + std::cout << " (non-contiguous tensor, wrote " << this->numel() << " elements)"; } + std::cout << "\n"; return; } @@ -362,11 +285,6 @@ void TensorImpl::debug(const std::string &filename) const { std::cout << "Unsupported data type for debug" << std::endl; break; } - - // Clean up allocated memory - if (allocated_memory) { - delete[] allocated_memory; - } } void TensorImpl::debug() const { diff --git a/test/infinicore/op/debug.py b/test/infinicore/op/debug.py index 5db66bc44..e65db29d8 100644 --- a/test/infinicore/op/debug.py +++ b/test/infinicore/op/debug.py @@ -2,7 +2,7 @@ """ Tensor Debug 功能测试脚本 -简单测试 debug 功能是否正常工作 +测试 debug 功能在不同设备和数据类型下的正确性 """ import torch @@ -10,389 +10,455 @@ import sys import os import numpy as np +import time +# Framework path +sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..")) -def test_basic_debug(): - """测试基本的 debug 打印功能""" - print("\n" + "=" * 80) - print("测试 1: 基本 debug 打印") - print("=" * 80) - - device = infinicore.device("cpu", 0) - - # 测试 float32 - print("\n--- Float32 张量 (2x3) ---") - torch_tensor = torch.tensor([[1.0, 2.0, 3.0], - [4.0, 5.0, 6.0]], dtype=torch.float32) - infini_tensor = infinicore.from_blob( - torch_tensor.data_ptr(), - list(torch_tensor.shape), - dtype=infinicore.float32, - device=device - ) - infini_tensor.debug() - print("✓ Float32 打印成功") - - # 测试 int32 - print("\n--- Int32 张量 (2x2) ---") - torch_i32 = torch.tensor([[1, 2], [3, 4]], dtype=torch.int32) - infini_i32 = infinicore.from_blob( - torch_i32.data_ptr(), - list(torch_i32.shape), - dtype=infinicore.int32, - device=device - ) - infini_i32.debug() - print("✓ Int32 打印成功") +from framework import ( + TestConfig, + TestRunner, + TestCase, + create_infinicore_tensor, + get_args, + get_test_devices, + to_torch_dtype, + InfiniDeviceNames, + torch_device_map, +) +# ============================================================================== +# Test Setup +# ============================================================================== -def test_save_to_file(): - """测试保存到文件""" - print("\n" + "=" * 80) - print("测试 2: 保存张量到文件") - print("=" * 80) - - device = infinicore.device("cpu", 0) - - # 创建张量 - torch_tensor = torch.arange(1, 13, dtype=torch.float32).reshape(3, 4) - print("\n原始张量:") - print(torch_tensor) - - infini_tensor = infinicore.from_blob( - torch_tensor.data_ptr(), - list(torch_tensor.shape), - dtype=infinicore.float32, - device=device - ) - - # 保存到文件 - filename = "/tmp/tensor_debug_test.bin" - print(f"\n保存到: {filename}") - infini_tensor.debug(filename) - - # 验证文件 - if os.path.exists(filename): - file_size = os.path.getsize(filename) - expected_size = 12 * 4 # 12 个 float32 - assert file_size == expected_size, f"文件大小不匹配: {file_size} vs {expected_size}" - - # 读取验证 - loaded = np.fromfile(filename, dtype=np.float32).reshape(3, 4) - print("\n从文件读取:") - print(loaded) - - os.remove(filename) - print("✓ 文件保存和读取成功") - else: - raise RuntimeError("文件未创建") +# Test cases - 定义不同的测试场景 +_TEST_CASES = [ + TestCase("basic_print", (2, 3)), # 基本打印 + TestCase("binary_save", (3, 4)), # 二进制保存 + TestCase("multidimensional", (2, 2, 3)), # 多维张量 +] +# 非连续内存布局测试用例 (is_contiguous=False) +_NON_CONTIGUOUS_TEST_CASES = [ + TestCase("non_contiguous", (3, 4)), # 测试 transpose 等导致的非连续内存布局 +] -def test_multidimensional(): - """测试多维张量""" - print("\n" + "=" * 80) - print("测试 3: 多维张量") - print("=" * 80) - - device = infinicore.device("cpu", 0) - - # 3D 张量 - print("\n--- 3D 张量 (2x2x3) ---") - torch_3d = torch.arange(1, 13, dtype=torch.float32).reshape(2, 2, 3) - print("PyTorch 张量:") - print(torch_3d) - - infini_3d = infinicore.from_blob( - torch_3d.data_ptr(), - list(torch_3d.shape), - dtype=infinicore.float32, - device=device - ) - - print("\nInfiniCore debug 输出:") - infini_3d.debug() - print("✓ 3D 张量打印成功") +# 大规模性能测试用例 - 一千万个数据 +_LARGE_SCALE_TEST_CASES = [ + TestCase("large_scale_binary", (10000000,)), # 1D: 一千万个元素 +] +# Data types - 包含所有需要测试的数据类型 +_TENSOR_DTYPES = [ + infinicore.float32, + infinicore.float16, + infinicore.bfloat16, +] -def test_infinicore_created(): - """测试 InfiniCore 创建的张量""" - print("\n" + "=" * 80) - print("测试 4: InfiniCore 创建的张量") - print("=" * 80) - - device = infinicore.device("cpu", 0) - - # ones 张量 - print("\n--- ones 张量 (2x3) ---") - ones_tensor = infinicore.ones([2, 3], dtype=infinicore.float32, device=device) - ones_tensor.debug() - print("✓ ones 张量打印成功") - - # zeros 张量 - print("\n--- zeros 张量 (3x2) ---") - zeros_tensor = infinicore.zeros([3, 2], dtype=infinicore.float32, device=device) - zeros_tensor.debug() - print("✓ zeros 张量打印成功") +# Tolerance map - 用于数值验证时的容差 +_TOLERANCE_MAP = { + infinicore.float16: {"atol": 0, "rtol": 1e-3}, + infinicore.float32: {"atol": 0, "rtol": 1e-5}, + infinicore.bfloat16: {"atol": 0, "rtol": 1e-2}, + infinicore.int32: {"atol": 0, "rtol": 0}, + infinicore.int64: {"atol": 0, "rtol": 0}, +} +# ============================================================================== +# Helper Functions +# ============================================================================== -def test_different_dtypes(): - """测试不同数据类型""" - print("\n" + "=" * 80) - print("测试 5: 不同数据类型") - print("=" * 80) - - device = infinicore.device("cpu", 0) - - dtypes = [ - (infinicore.float32, torch.float32, "Float32"), - (infinicore.int32, torch.int32, "Int32"), - (infinicore.int64, torch.int64, "Int64"), - ] - - for infini_dtype, torch_dtype, name in dtypes: - print(f"\n--- {name} ---") - torch_tensor = torch.arange(1, 7, dtype=torch_dtype).reshape(2, 3) - infini_tensor = infinicore.from_blob( - torch_tensor.data_ptr(), - list(torch_tensor.shape), - dtype=infini_dtype, - device=device - ) - infini_tensor.debug() - print(f"✓ {name} 测试通过") +def load_binary_with_torch(filename, dtype, shape): + """使用 torch.frombuffer 读取二进制文件""" + torch_dtype = to_torch_dtype(dtype) + with open(filename, 'rb') as f: + data = f.read() + return torch.frombuffer(data, dtype=torch_dtype).reshape(shape) -def test_text_format(): - """测试文本格式保存""" - print("\n" + "=" * 80) - print("测试 6: 文本格式保存 (.txt)") - print("=" * 80) - - device = infinicore.device("cpu", 0) - - # 创建张量 - torch_tensor = torch.arange(1, 13, dtype=torch.float32).reshape(3, 4) - print("\n原始张量:") - print(torch_tensor) - - infini_tensor = infinicore.from_blob( - torch_tensor.data_ptr(), - list(torch_tensor.shape), - dtype=infinicore.float32, - device=device - ) +# ============================================================================== +# Test Methods +# ============================================================================== + +def test_basic_print(device, test_case, dtype, config): + """测试基本的 debug 打印功能""" + test_name, shape = test_case.args - # 保存为文本文件 - txt_filename = "/tmp/tensor_debug_test.txt" - print(f"\n保存为文本格式: {txt_filename}") - infini_tensor.debug(txt_filename) - - # 验证文本文件 - if os.path.exists(txt_filename): - print("\n文本文件内容:") - with open(txt_filename, 'r') as f: - content = f.read() - print(content) - - # 1. 验证元数据 - assert "# Tensor Debug Output" in content, "文本文件缺少标题" - assert "# Shape: [3, 4]" in content, "文本文件缺少形状信息" - assert "# Dtype: F32" in content, "文本文件缺少类型信息" - print("✓ 元数据验证通过") - - # 2. 提取并验证数值数据 - lines = content.split('\n') - data_lines = [line.strip() for line in lines - if line.strip() and not line.startswith('#')] - - print(f"\n提取到 {len(data_lines)} 行数据") - - # 解析数值 - loaded_data = [] - for i, line in enumerate(data_lines): - row = [float(x) for x in line.split()] - loaded_data.append(row) - print(f" 第 {i+1} 行: {row}") - - # 转换为 numpy 数组 - loaded_array = np.array(loaded_data, dtype=np.float32) - - # 3. 与原始数据对比 - expected = torch_tensor.numpy() - assert loaded_array.shape == expected.shape, \ - f"形状不匹配: {loaded_array.shape} vs {expected.shape}" - assert np.allclose(loaded_array, expected), \ - f"数值不匹配:\n加载的数据:\n{loaded_array}\n期望的数据:\n{expected}" - - print("✓ 数值验证通过") - - os.remove(txt_filename) - print("✓ 文本格式保存测试通过") - else: - raise RuntimeError("文本文件未创建") + print(f"Testing Basic Print on {InfiniDeviceNames[device]} with " + f"shape:{shape}, dtype:{dtype}") + + device_str = torch_device_map[device] + torch_dtype = to_torch_dtype(dtype) + + # 创建测试张量 + torch_tensor = torch.arange(1, int(np.prod(shape)) + 1, + dtype=torch_dtype, device=device_str).reshape(shape) + + infini_tensor = create_infinicore_tensor(torch_tensor, device_str) + + # 测试 debug 打印(不保存文件) + infini_tensor.debug() + + print(f"✓ Basic print test passed") -def test_binary_format(): +def test_binary_save(device, test_case, dtype, config): """测试二进制格式保存""" - print("\n" + "=" * 80) - print("测试 7: 二进制格式保存 (.bin)") - print("=" * 80) - - device = infinicore.device("cpu", 0) - - # 创建张量 - torch_tensor = torch.arange(1, 13, dtype=torch.float32).reshape(3, 4) - print("\n原始张量:") - print(torch_tensor) - - infini_tensor = infinicore.from_blob( - torch_tensor.data_ptr(), - list(torch_tensor.shape), - dtype=infinicore.float32, - device=device - ) + test_name, shape = test_case.args + + print(f"Testing Binary Save on {InfiniDeviceNames[device]} with " + f"shape:{shape}, dtype:{dtype}") + + device_str = torch_device_map[device] + torch_dtype = to_torch_dtype(dtype) + + # 创建测试张量 + torch_tensor = torch.arange(1, int(np.prod(shape)) + 1, + dtype=torch_dtype, device=device_str).reshape(shape) + + infini_tensor = create_infinicore_tensor(torch_tensor, device_str) # 保存为二进制文件 - bin_filename = "/tmp/tensor_debug_test.bin" - print(f"\n保存为二进制格式: {bin_filename}") - infini_tensor.debug(bin_filename) + bin_file = f"/tmp/debug_test_{device}_{dtype}_binary.bin" + infini_tensor.debug(bin_file) - # 验证二进制文件 - if os.path.exists(bin_filename): - file_size = os.path.getsize(bin_filename) - expected_size = 12 * 4 # 12 个 float32 - assert file_size == expected_size, \ - f"二进制文件大小不匹配: {file_size} vs {expected_size}" - - # 读取并验证数据 - loaded = np.fromfile(bin_filename, dtype=np.float32).reshape(3, 4) - print("\n从二进制文件读取:") - print(loaded) - - # 验证数据正确性 - assert np.allclose(loaded, torch_tensor.numpy()), "数据不匹配" - - os.remove(bin_filename) - print("✓ 二进制格式保存测试通过") - else: - raise RuntimeError("二进制文件未创建") + # 验证文件存在 + assert os.path.exists(bin_file), f"Binary file not created: {bin_file}" + + # 验证文件大小 + expected_size = int(np.prod(shape)) * torch_tensor.element_size() + actual_size = os.path.getsize(bin_file) + assert actual_size == expected_size, \ + f"Binary file size mismatch: {actual_size} vs {expected_size}" + + # 使用 torch.frombuffer 读取并验证 + loaded_tensor = load_binary_with_torch(bin_file, dtype, shape) + + # 将两个张量都移到 CPU 进行比较 + torch_tensor_cpu = torch_tensor.cpu() + loaded_tensor_cpu = loaded_tensor.cpu() + + tolerance = _TOLERANCE_MAP.get(dtype, {"atol": 0, "rtol": 1e-5}) + assert torch.allclose(loaded_tensor_cpu, torch_tensor_cpu, + atol=tolerance["atol"], rtol=tolerance["rtol"]), \ + f"Binary data mismatch" + + # 清理 + os.remove(bin_file) + print(f"✓ Binary save test passed") + + +def test_multidimensional(device, test_case, dtype, config): + """测试多维张量""" + test_name, shape = test_case.args + + print(f"Testing Multidimensional on {InfiniDeviceNames[device]} with " + f"shape:{shape}, dtype:{dtype}") + + device_str = torch_device_map[device] + torch_dtype = to_torch_dtype(dtype) + + # 创建多维张量 + torch_tensor = torch.arange(1, int(np.prod(shape)) + 1, + dtype=torch_dtype, device=device_str).reshape(shape) + + infini_tensor = create_infinicore_tensor(torch_tensor, device_str) + + # 测试打印 + infini_tensor.debug() + + # 测试保存和读取 + bin_file = f"/tmp/debug_test_multidim_{device}_{dtype}.bin" + infini_tensor.debug(bin_file) + + assert os.path.exists(bin_file), "Multidimensional binary file not created" + + # 验证 + loaded_tensor = load_binary_with_torch(bin_file, dtype, shape) + torch_tensor_cpu = torch_tensor.cpu() + loaded_tensor_cpu = loaded_tensor.cpu() + + tolerance = _TOLERANCE_MAP.get(dtype, {"atol": 0, "rtol": 1e-5}) + assert torch.allclose(loaded_tensor_cpu, torch_tensor_cpu, + atol=tolerance["atol"], rtol=tolerance["rtol"]), \ + f"Multidimensional data mismatch" + + # 清理 + os.remove(bin_file) + print(f"✓ Multidimensional test passed") -def test_format_comparison(): - """对比不同格式""" - print("\n" + "=" * 80) - print("测试 8: 对比不同格式") - print("=" * 80) - - device = infinicore.device("cpu", 0) - - # 创建小张量用于对比 - torch_tensor = torch.tensor([[1.5, 2.5], [3.5, 4.5]], dtype=torch.float32) - print("\n原始张量:") - print(torch_tensor) - - infini_tensor = infinicore.from_blob( - torch_tensor.data_ptr(), - list(torch_tensor.shape), - dtype=infinicore.float32, - device=device +def test_non_contiguous_stride(device, test_case, dtype, config): + """测试非连续内存布局的情况(is_contiguous=False,例如 transpose 后的张量)""" + test_name, shape = test_case.args + + print(f"\n{'='*70}") + print(f"Testing Non-Contiguous Memory Layout on {InfiniDeviceNames[device]}") + print(f" Shape: {shape}, Dtype: {dtype}") + print(f"{'='*70}") + + device_str = torch_device_map[device] + torch_dtype = to_torch_dtype(dtype) + + # 创建连续张量 + print(f"\nStep 1: Creating contiguous tensor...") + torch_tensor_orig = torch.arange(1, int(np.prod(shape)) + 1, + dtype=torch_dtype, device=device_str).reshape(shape) + print(f" Original shape: {torch_tensor_orig.shape}") + print(f" Original stride: {torch_tensor_orig.stride()}") + print(f" Is contiguous: {torch_tensor_orig.is_contiguous()}") + print(f" Data:\n{torch_tensor_orig}") + + # 进行 transpose 操作,创建非连续张量 + print(f"\nStep 2: Transposing to create non-contiguous tensor...") + torch_tensor_t = torch_tensor_orig.t() # transpose + print(f" Transposed shape: {torch_tensor_t.shape}") + print(f" Transposed stride: {torch_tensor_t.stride()}") + print(f" Is contiguous: {torch_tensor_t.is_contiguous()}") + print(f" Data:\n{torch_tensor_t}") + + # 创建 InfiniCore 张量(非连续) + # 注意:from_blob 不支持 strides,所以我们使用 permute 创建非连续张量 + # permute([1, 0]) 相当于 transpose,会创建非连续的内存布局 + infini_tensor_orig = create_infinicore_tensor(torch_tensor_orig, device_str) + infini_tensor_t = infini_tensor_orig.as_strided( + list(torch_tensor_t.shape), + list(torch_tensor_t.stride()) ) + + print(f"\nStep 3: InfiniCore tensor after permute:") + print(f" Shape: {infini_tensor_t.shape}") + print(f" Stride: {infini_tensor_t.stride()}") + print(f" Is contiguous: {infini_tensor_t.is_contiguous()}") + + # ===== 测试二进制格式 ===== + print(f"\n{'='*70}") + print(f"Testing Binary Format (.bin) with Non-Contiguous Memory Layout") + print(f"{'='*70}") + print(f"Note: Binary format now SUPPORTS non-contiguous memory layout!") + print(f" It automatically detects and handles stride correctly.") + + bin_file = f"/tmp/debug_non_contiguous_{device}_{dtype}.bin" + infini_tensor_t.debug(bin_file) + + # 验证二进制文件 + assert os.path.exists(bin_file), f"Binary file not created: {bin_file}" + + # 检查文件大小 + actual_size = os.path.getsize(bin_file) + expected_size = int(np.prod(torch_tensor_t.shape)) * torch_tensor_t.element_size() + + print(f"\nFile size check:") + print(f" Expected: {expected_size} bytes ({int(np.prod(torch_tensor_t.shape))} elements)") + print(f" Actual: {actual_size} bytes") + + assert actual_size == expected_size, \ + f"File size mismatch: {actual_size} vs {expected_size}" + print(f" ✓ File size is correct") + + # 读取并验证数据 + loaded_tensor = load_binary_with_torch(bin_file, dtype, torch_tensor_t.shape) + torch_tensor_cpu = torch_tensor_t.cpu() + loaded_tensor_cpu = loaded_tensor.cpu() + + tolerance = _TOLERANCE_MAP.get(dtype, {"atol": 0, "rtol": 1e-5}) - bin_file = "/tmp/compare_test.bin" - txt_file = "/tmp/compare_test.txt" + print(f"\nData verification:") + print(f" Expected (first 2 rows):\n{torch_tensor_cpu[:2]}") + print(f" Got (first 2 rows):\n{loaded_tensor_cpu[:2]}") - # 保存两种格式 - print("\n保存两种格式...") + assert torch.allclose(loaded_tensor_cpu, torch_tensor_cpu, + atol=tolerance["atol"], rtol=tolerance["rtol"]), \ + f"Data verification failed: loaded data doesn't match expected" + + print(f"\n✓ Binary format: Data matches perfectly!") + print(f" Binary format correctly handles non-contiguous memory layout using stride") + + # 清理 + os.remove(bin_file) + + print(f"\n{'='*70}") + print(f"Non-Contiguous Memory Layout Test Summary:") + print(f" ✅ Binary format (.bin): NOW supports non-contiguous memory!") + print(f" Performance: Contiguous tensors use fast path, non-contiguous use stride-based writing") + print(f"{'='*70}\n") + + +def test_large_scale_binary_performance(device, test_case, dtype, config): + """测试大规模数据二进制保存性能(一千万个数据)""" + test_name, shape = test_case.args + + num_elements = int(np.prod(shape)) + element_size_bytes = { + infinicore.float32: 4, + infinicore.float16: 2, + infinicore.bfloat16: 2, + infinicore.int32: 4, + infinicore.int64: 8, + } + + total_size_mb = (num_elements * element_size_bytes.get(dtype, 4)) / (1024 * 1024) + + print(f"\n{'='*70}") + print(f"Performance Test: Large Scale Binary Save") + print(f" Device: {InfiniDeviceNames[device]}") + print(f" Shape: {shape}") + print(f" Elements: {num_elements:,}") + print(f" Dtype: {dtype}") + print(f" Expected file size: {total_size_mb:.2f} MB") + print(f"{'='*70}") + + device_str = torch_device_map[device] + torch_dtype = to_torch_dtype(dtype) + + # 创建大规模张量 + print(f"Creating tensor with {num_elements:,} elements...") + create_start = time.time() + torch_tensor = torch.randn(shape, dtype=torch_dtype, device=device_str) + create_time = time.time() - create_start + print(f" Tensor creation time: {create_time:.4f} seconds") + + infini_tensor = create_infinicore_tensor(torch_tensor, device_str) + + # 测试保存性能 + bin_file = f"/tmp/debug_large_scale_{device}_{dtype}.bin" + + print(f"\n{'='*70}") + print(f"[1/2] Writing Binary File") + print(f"{'='*70}") + print(f"File: {bin_file}") + save_start = time.time() infini_tensor.debug(bin_file) - infini_tensor.debug(txt_file) - - # 对比文件大小 - bin_size = os.path.getsize(bin_file) - txt_size = os.path.getsize(txt_file) - - print(f"\n文件大小对比:") - print(f" 二进制文件: {bin_size} 字节") - print(f" 文本文件: {txt_size} 字节") - print(f" 文本/二进制比: {txt_size/bin_size:.2f}x") - - # ===== 验证二进制文件 ===== - print("\n验证二进制文件:") - bin_data = np.fromfile(bin_file, dtype=np.float32).reshape(2, 2) - print(f" 读取的数据:\n{bin_data}") - assert np.allclose(bin_data, torch_tensor.numpy()), "二进制数据不匹配" - print(" ✓ 二进制文件数值正确") - - # ===== 验证文本文件 ===== - print("\n验证文本文件:") - with open(txt_file, 'r') as f: - txt_content = f.read() - - # 1. 元数据验证 - assert "# Tensor Debug Output" in txt_content, "缺少标题" - assert "# Shape: [2, 2]" in txt_content, "缺少形状信息" - assert "# Dtype: F32" in txt_content, "缺少类型信息" - print(" ✓ 元数据正确") - - # 2. 数值验证 - lines = txt_content.split('\n') - data_lines = [line.strip() for line in lines - if line.strip() and not line.startswith('#')] - - txt_data = [] - for line in data_lines: - row = [float(x) for x in line.split()] - txt_data.append(row) - - txt_array = np.array(txt_data, dtype=np.float32) - print(f" 读取的数据:\n{txt_array}") - - assert txt_array.shape == torch_tensor.shape, \ - f"文本文件形状不匹配: {txt_array.shape} vs {torch_tensor.shape}" - assert np.allclose(txt_array, torch_tensor.numpy()), \ - f"文本文件数值不匹配" - print(" ✓ 文本文件数值正确") - - # ===== 对比两种格式的数据一致性 ===== - print("\n验证两种格式数据一致性:") - assert np.allclose(bin_data, txt_array), \ - "二进制和文本文件的数据不一致!" - print(" ✓ 两种格式数据完全一致") + save_time = time.time() - save_start + + # 验证文件存在 + assert os.path.exists(bin_file), f"Binary file not created: {bin_file}" + + # 获取实际文件大小 + actual_size = os.path.getsize(bin_file) + actual_size_mb = actual_size / (1024 * 1024) + + # 计算写入吞吐量 + write_throughput_mbps = actual_size_mb / save_time if save_time > 0 else 0 + + # 打印写入性能结果 + print(f"\n✓ Write Performance:") + print(f" File size: {actual_size_mb:.2f} MB ({actual_size:,} bytes)") + print(f" Write time: {save_time:.4f} seconds") + print(f" Write throughput: {write_throughput_mbps:.2f} MB/s") + print(f" Elements written/sec: {num_elements/save_time:,.0f}") + + # 测试读取性能 + print(f"\n{'='*70}") + print(f"[2/2] Reading Binary File (for verification)") + print(f"{'='*70}") + read_start = time.time() + loaded_tensor = load_binary_with_torch(bin_file, dtype, shape) + read_time = time.time() - read_start + read_throughput_mbps = actual_size_mb / read_time if read_time > 0 else 0 + + print(f"\n✓ Read Performance:") + print(f" Read time: {read_time:.4f} seconds") + print(f" Read throughput: {read_throughput_mbps:.2f} MB/s") + print(f" Elements read/sec: {num_elements/read_time:,.0f}") + + # 简单验证前几个元素(不做完整验证以节省时间) + torch_tensor_cpu = torch_tensor.cpu() + loaded_tensor_cpu = loaded_tensor.cpu() + + sample_size = min(1000, num_elements) + tolerance = _TOLERANCE_MAP.get(dtype, {"atol": 0, "rtol": 1e-5}) + assert torch.allclose(loaded_tensor_cpu.flatten()[:sample_size], + torch_tensor_cpu.flatten()[:sample_size], + atol=tolerance["atol"], rtol=tolerance["rtol"]), \ + f"Data verification failed (sampled first {sample_size} elements)" + + print(f" Data verification: ✓ (sampled first {sample_size} elements)") + + # 打印性能总结 + print(f"\n{'='*70}") + print(f"Performance Summary") + print(f"{'='*70}") + print(f" Elements: {num_elements:,}") + print(f" File size: {actual_size_mb:.2f} MB") + print(f" Write time: {save_time:.4f} sec → {write_throughput_mbps:.2f} MB/s") + print(f" Read time: {read_time:.4f} sec → {read_throughput_mbps:.2f} MB/s") + print(f" Speed ratio (Read/Write): {read_throughput_mbps/write_throughput_mbps:.2f}x") + print(f"{'='*70}") # 清理 os.remove(bin_file) - os.remove(txt_file) + print(f"\n✓ Large scale performance test passed\n") - print("\n✓ 格式对比测试通过") +# ============================================================================== +# Main Execution Function +# ============================================================================== def main(): - """主测试函数""" - print("\n" + "=" * 80) - print("InfiniCore Tensor Debug 功能测试") - print("=" * 80) - - try: - test_basic_debug() - test_save_to_file() - test_multidimensional() - test_infinicore_created() - test_different_dtypes() - test_text_format() - test_binary_format() - test_format_comparison() - - print("\n" + "=" * 80) - print("✅ 所有测试通过!") - print("=" * 80) - return 0 + args = get_args() + + # 创建测试配置 + config = TestConfig( + tensor_dtypes=_TENSOR_DTYPES, + tolerance_map=_TOLERANCE_MAP, + debug=args.debug, + bench=False, # debug 测试不需要性能测试 + ) + + # 获取测试设备 + devices = get_test_devices(args) + + print("Starting debug tests...") + + all_passed = True + + # 为每种测试类型运行测试 + test_funcs = [ + ("Basic Print", test_basic_print, [_TEST_CASES[0]]), + ("Binary Save", test_binary_save, [_TEST_CASES[1]]), + ("Multidimensional", test_multidimensional, [_TEST_CASES[2]]), + ] + + for test_name, test_func, test_cases in test_funcs: + print(f"\n{'='*60}") + print(f"Testing {test_name}") + print(f"{'='*60}") - except Exception as e: - print(f"\n❌ 测试失败: {e}") - import traceback - traceback.print_exc() - return 1 + runner = TestRunner(test_cases, config) + passed = runner.run_tests(devices, test_func) + all_passed = all_passed and passed + + # 运行非连续内存布局测试 + print(f"\n{'='*60}") + print(f"Testing Non-Contiguous Memory Layout (is_contiguous=False)") + print(f"{'='*60}") + + non_contiguous_runner = TestRunner(_NON_CONTIGUOUS_TEST_CASES, config) + non_contiguous_passed = non_contiguous_runner.run_tests(devices, test_non_contiguous_stride) + all_passed = all_passed and non_contiguous_passed + + # 运行大规模性能测试 + print(f"\n{'='*60}") + print(f"Testing Large Scale Performance (10M elements)") + print(f"{'='*60}") + + large_scale_runner = TestRunner(_LARGE_SCALE_TEST_CASES, config) + large_scale_passed = large_scale_runner.run_tests(devices, test_large_scale_binary_performance) + all_passed = all_passed and large_scale_passed + + # 打印总结 + print(f"\n{'='*60}") + print("Test Summary") + print(f"{'='*60}") + + if all_passed: + print("\033[92m✅ All debug tests passed!\033[0m") + else: + print("\033[91m❌ Some tests failed!\033[0m") + + sys.exit(0 if all_passed else 1) if __name__ == "__main__": - sys.exit(main()) - + main() From 6c0bd2c615910eb662114112183e15d639aa4746 Mon Sep 17 00:00:00 2001 From: zhuyue Date: Fri, 17 Oct 2025 15:37:34 +0800 Subject: [PATCH 3/3] Move debug.py out of the op operator test folder. --- test/infinicore/{op => }/debug.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename test/infinicore/{op => }/debug.py (100%) diff --git a/test/infinicore/op/debug.py b/test/infinicore/debug.py similarity index 100% rename from test/infinicore/op/debug.py rename to test/infinicore/debug.py