Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions include/infinicore/tensor.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,10 @@ class TensorImpl : public std::enable_shared_from_this<TensorImpl> {

std::string info() const;

void debug(const std::string &filename) const;

void debug() const;

///
/// Data Transfer APIs
///
Expand Down
11 changes: 11 additions & 0 deletions python/infinicore/tensor.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,17 @@ def permute(self, dims):
def view(self, shape):
return Tensor(self._underlying.view(shape))

def debug(self, filename=None):
"""Print tensor data or save to file for debugging

Args:
filename: Optional filename to save raw binary data. If None, prints to stdout.
"""
if filename is None:
self._underlying.debug()
else:
self._underlying.debug(filename)


def empty(size, *, dtype=None, device=None, pin_memory=False):
return Tensor(
Expand Down
4 changes: 3 additions & 1 deletion src/infinicore/pybind11/tensor.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,14 +17,16 @@ inline void bind(py::module &m) {
.def_property_readonly("dtype", [](const Tensor &tensor) { return tensor->dtype(); })
.def_property_readonly("device", [](const Tensor &tensor) { return tensor->device(); })

.def("data_ptr", [](const Tensor &tensor) { return tensor->data(); })
.def("data_ptr", [](const Tensor &tensor) { return reinterpret_cast<uintptr_t>(tensor->data()); })
.def("size", [](const Tensor &tensor, std::size_t dim) { return tensor->size(dim); })
.def("stride", [](const Tensor &tensor, std::size_t dim) { return tensor->stride(dim); })
.def("numel", [](const Tensor &tensor) { return tensor->numel(); })

.def("is_contiguous", [](const Tensor &tensor) { return tensor->is_contiguous(); })
.def("is_pinned", [](const Tensor &tensor) { return tensor->is_pinned(); })
.def("info", [](const Tensor &tensor) { return tensor->info(); })
.def("debug", [](const Tensor &tensor) { return tensor->debug(); })
.def("debug", [](const Tensor &tensor, const std::string &filename) { return tensor->debug(filename); })

.def("copy_", [](Tensor &tensor, const Tensor &other) { tensor->copy_from(other); })
.def("to", [](const Tensor &tensor, const Device &device) { return tensor->to(device); })
Expand Down
294 changes: 294 additions & 0 deletions src/infinicore/tensor/debug.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,294 @@
#include "infinicore/context/context.hpp"
#include "infinicore/dtype.hpp"
#include "infinicore/tensor.hpp"

#include <cstring>
#include <fstream>
#include <iostream>
#include <limits>
#include <memory>
#include <sstream>

namespace infinicore {

inline float f16_to_f32(uint16_t h) {
uint32_t sign = (h & 0x8000) << 16;
int32_t exponent = (h >> 10) & 0x1F;
uint32_t mantissa = h & 0x3FF;

uint32_t f32;
if (exponent == 31) {
if (mantissa != 0) {
f32 = sign | 0x7F800000 | (mantissa << 13);
} else {
f32 = sign | 0x7F800000;
}
} else if (exponent == 0) {
if (mantissa == 0) {
f32 = sign;
} else {
exponent = -14;
while ((mantissa & 0x400) == 0) {
mantissa <<= 1;
exponent--;
}
mantissa &= 0x3FF;
f32 = sign | ((exponent + 127) << 23) | (mantissa << 13);
}
} else {
f32 = sign | ((exponent + 127 - 15) << 23) | (mantissa << 13);
}

float result;
std::memcpy(&result, &f32, sizeof(result));
return result;
}

inline float bf16_to_f32(uint16_t val) {
uint32_t bits32 = static_cast<uint32_t>(val) << 16;
float out;
std::memcpy(&out, &bits32, sizeof(out));
return out;
}

// Template function for printing data recursively
template <typename T>
void print_data(const T *data, const Shape &shape, const Strides &strides, size_t dim) {
if (dim == shape.size() - 1) {
for (size_t i = 0; i < shape[dim]; i++) {
std::cout << data[i * strides[dim]] << " ";
}
std::cout << std::endl;
} else if (dim < shape.size() - 1) {
for (size_t i = 0; i < shape[dim]; i++) {
print_data(data + i * strides[dim], shape, strides, dim + 1);
}
}
}

// Specialization for F16 (uint16_t)
template <>
void print_data<uint16_t>(const uint16_t *data, const Shape &shape, const Strides &strides, size_t dim) {
if (dim == shape.size() - 1) {
for (size_t i = 0; i < shape[dim]; i++) {
std::cout << f16_to_f32(data[i * strides[dim]]) << " ";
}
std::cout << std::endl;
} else if (dim < shape.size() - 1) {
for (size_t i = 0; i < shape[dim]; i++) {
print_data(data + i * strides[dim], shape, strides, dim + 1);
}
}
}

// Function for printing BF16 data
void print_data_bf16(const uint16_t *data, const Shape &shape, const Strides &strides, size_t dim) {
if (dim == shape.size() - 1) {
for (size_t i = 0; i < shape[dim]; i++) {
std::cout << bf16_to_f32(data[i * strides[dim]]) << " ";
}
std::cout << std::endl;
} else if (dim < shape.size() - 1) {
for (size_t i = 0; i < shape[dim]; i++) {
print_data_bf16(data + i * strides[dim], shape, strides, dim + 1);
}
}
}

// Template function for writing data recursively to binary file (handles non-contiguous tensors)
template <typename T>
void write_binary_data(std::ofstream &out, const T *data, const Shape &shape, const Strides &strides, size_t dim) {
if (dim == shape.size() - 1) {
// Write the innermost dimension
for (size_t i = 0; i < shape[dim]; i++) {
out.write(reinterpret_cast<const char *>(&data[i * strides[dim]]), sizeof(T));
}
} else {
// Recursively process higher dimensions
for (size_t i = 0; i < shape[dim]; i++) {
write_binary_data(out, data + i * strides[dim], shape, strides, dim + 1);
}
}
}

void TensorImpl::debug(const std::string &filename) const {
// Synchronize device if needed
context::syncDevice();

std::cout << info() << std::endl;

const std::byte *cpu_data = nullptr;
std::unique_ptr<std::byte[]> allocated_memory; // RAII: 自动管理内存

// Copy data to CPU if not already on CPU
if (this->device().getType() != Device::Type::CPU) {
size_t numel = this->numel();
size_t element_size = dsize(this->dtype());

// 检查乘法溢出
if (numel > 0 && element_size > std::numeric_limits<size_t>::max() / numel) {
std::cerr << "Error: Memory size calculation overflow for tensor with "
<< numel << " elements of size " << element_size << "\n";
return;
}

size_t mem_size = numel * element_size;
allocated_memory = std::make_unique<std::byte[]>(mem_size);
context::memcpyD2H(allocated_memory.get(), this->data(), mem_size);
cpu_data = allocated_memory.get();
} else {
cpu_data = this->data();
}

// If filename is provided, save to binary file
if (!filename.empty()) {
std::ofstream outFile(filename, std::ios::binary);
if (!outFile) {
std::cerr << "Error opening file for writing: " << filename << "\n";
return; // allocated_memory 会自动释放(RAII)
}

// Check if tensor is contiguous - for optimization
if (this->is_contiguous()) {
// Fast path: contiguous tensor, write in one go
size_t mem_size = this->numel() * dsize(this->dtype());
outFile.write(reinterpret_cast<const char *>(cpu_data), mem_size);
} else {
// Slow path: non-contiguous tensor, write element by element using strides
switch (this->dtype()) {
case DataType::F16:
case DataType::BF16:
write_binary_data(outFile, reinterpret_cast<const uint16_t *>(cpu_data),
this->shape(), this->strides(), 0);
break;
case DataType::F32:
write_binary_data(outFile, reinterpret_cast<const float *>(cpu_data),
this->shape(), this->strides(), 0);
break;
case DataType::F64:
write_binary_data(outFile, reinterpret_cast<const double *>(cpu_data),
this->shape(), this->strides(), 0);
break;
case DataType::U64:
write_binary_data(outFile, reinterpret_cast<const uint64_t *>(cpu_data),
this->shape(), this->strides(), 0);
break;
case DataType::I64:
write_binary_data(outFile, reinterpret_cast<const int64_t *>(cpu_data),
this->shape(), this->strides(), 0);
break;
case DataType::U32:
write_binary_data(outFile, reinterpret_cast<const uint32_t *>(cpu_data),
this->shape(), this->strides(), 0);
break;
case DataType::I32:
write_binary_data(outFile, reinterpret_cast<const int32_t *>(cpu_data),
this->shape(), this->strides(), 0);
break;
case DataType::U16:
write_binary_data(outFile, reinterpret_cast<const uint16_t *>(cpu_data),
this->shape(), this->strides(), 0);
break;
case DataType::I16:
write_binary_data(outFile, reinterpret_cast<const int16_t *>(cpu_data),
this->shape(), this->strides(), 0);
break;
case DataType::U8:
write_binary_data(outFile, reinterpret_cast<const uint8_t *>(cpu_data),
this->shape(), this->strides(), 0);
break;
case DataType::I8:
write_binary_data(outFile, reinterpret_cast<const int8_t *>(cpu_data),
this->shape(), this->strides(), 0);
break;
case DataType::BOOL:
// 布尔类型特殊处理:转换为 uint8_t 以保证跨平台一致性
write_binary_data(outFile, reinterpret_cast<const uint8_t *>(cpu_data),
this->shape(), this->strides(), 0);
break;
default:
std::cerr << "Unsupported data type for binary output\n";
return;
}
}

// 显式关闭文件并检查是否成功
outFile.close();
if (!outFile) {
std::cerr << "Error: Failed to write data to file: " << filename << "\n";
return;
}

std::cout << "Data written to binary file: " << filename;
if (!this->is_contiguous()) {
std::cout << " (non-contiguous tensor, wrote " << this->numel() << " elements)";
}
std::cout << "\n";
return;
}

// Print data based on dtype
switch (this->dtype()) {
case DataType::F16:
print_data(reinterpret_cast<const uint16_t *>(cpu_data),
this->shape(), this->strides(), 0);
break;
case DataType::F32:
print_data(reinterpret_cast<const float *>(cpu_data),
this->shape(), this->strides(), 0);
break;
case DataType::F64:
print_data(reinterpret_cast<const double *>(cpu_data),
this->shape(), this->strides(), 0);
break;
case DataType::U64:
print_data(reinterpret_cast<const uint64_t *>(cpu_data),
this->shape(), this->strides(), 0);
break;
case DataType::I64:
print_data(reinterpret_cast<const int64_t *>(cpu_data),
this->shape(), this->strides(), 0);
break;
case DataType::U32:
print_data(reinterpret_cast<const uint32_t *>(cpu_data),
this->shape(), this->strides(), 0);
break;
case DataType::I32:
print_data(reinterpret_cast<const int32_t *>(cpu_data),
this->shape(), this->strides(), 0);
break;
case DataType::U16:
print_data(reinterpret_cast<const uint16_t *>(cpu_data),
this->shape(), this->strides(), 0);
break;
case DataType::I16:
print_data(reinterpret_cast<const int16_t *>(cpu_data),
this->shape(), this->strides(), 0);
break;
case DataType::U8:
print_data(reinterpret_cast<const uint8_t *>(cpu_data),
this->shape(), this->strides(), 0);
break;
case DataType::I8:
print_data(reinterpret_cast<const int8_t *>(cpu_data),
this->shape(), this->strides(), 0);
break;
case DataType::BF16:
print_data_bf16(reinterpret_cast<const uint16_t *>(cpu_data),
this->shape(), this->strides(), 0);
break;
case DataType::BOOL:
print_data(reinterpret_cast<const bool *>(cpu_data),
this->shape(), this->strides(), 0);
break;
default:
std::cout << "Unsupported data type for debug" << std::endl;
break;
}
}

void TensorImpl::debug() const {
this->debug("");
}

} // namespace infinicore
Loading