Skip to content

Commit

Permalink
feat(codegen): map data type patches (#3841)
Browse files Browse the repository at this point in the history
* feat(codegen): printLog utility

2. make LLJIT accepts jit options

* fix(map extract_element): handle on map value is NULL itself

NULL map values, specifically struct NULL values, still get evaluated
even it is marked NULL. So a safe version of default Null struct is
created, instead of UndefValue.

* test: fix create table in sql_sdk_test
  • Loading branch information
aceforeverd committed Apr 9, 2024
1 parent 18115ab commit 7f758af
Show file tree
Hide file tree
Showing 14 changed files with 176 additions and 28 deletions.
7 changes: 3 additions & 4 deletions cases/query/udf_query.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -572,12 +572,11 @@ cases:
map('1', 2, '3', 4, '5', 6, '7', 8, '9', 10, '11', 12)['10'] as e8,
# first match on duplicate keys
map('1', 2, '1', 4, '1', 6, '7', 8, '9', 10, '11', 12)['1'] as e9,
# map("c", 99, "d", NULL)["d"] as e10,
map("c", 99, "d", NULL)["d"] as e10,
expect:
# FIXME(someone): add e10 result core dump occasionally on centOS
columns: ["e1 string", "e2 int", "e3 string", "e4 int", "e5 string", "e6 timestamp", "e7 int", "e8 int", "e9 int"]
columns: ["e1 string", "e2 int", "e3 string", "e4 int", "e5 string", "e6 timestamp", "e7 int", "e8 int", "e9 int", "e10 int"]
data: |
2, 100, NULL, 101, f, 2000, 10, NULL, 2
2, 100, NULL, 101, f, 2000, 10, NULL, 2, NULL
- id: 14
mode: request-unsupport
sql: |
Expand Down
3 changes: 3 additions & 0 deletions hybridse/include/codec/fe_row_codec.h
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,9 @@ const std::unordered_map<::hybridse::type::Type, uint8_t>& GetTypeSizeMap();
bool IsCodecBaseType(const type::ColumnSchema& sc);
bool IsCodecStrLikeType(const type::ColumnSchema& sc);

// returns the corresponding SQL string representation for input ColumnSchema
absl::StatusOr<std::string> ColumnSchemaStr(const type::ColumnSchema&);

inline uint8_t GetAddrLength(uint32_t size) {
if (size <= UINT8_MAX) {
return 1;
Expand Down
13 changes: 9 additions & 4 deletions hybridse/src/case/sql_case.cc
Original file line number Diff line number Diff line change
Expand Up @@ -345,11 +345,13 @@ bool SqlCase::BuildCreateSqlFromSchema(const type::TableDef& table,
std::string sql = "CREATE TABLE " + table.name() + "(\n";
for (int i = 0; i < table.columns_size(); i++) {
auto column = table.columns(i);
sql.append(column.name()).append(" ").append(TypeString(column.type()));

if (column.is_not_null()) {
sql.append(" NOT NULL");
auto s = codec::ColumnSchemaStr(column.schema());
if (!s.ok()) {
LOG(WARNING) << s.status();
return false;
}
sql.append(column.name()).append(" ").append(s.value());

if (isGenerateIndex || i < table.columns_size() - 1) {
sql.append(",\n");
}
Expand Down Expand Up @@ -900,6 +902,9 @@ bool SqlCase::BuildInsertSqlListFromInput(
}
sql_list->push_back(insert_sql);
}
} else if (!inputs_[input_idx].inserts_.empty()) {
auto& inserts = inputs_[input_idx].inserts_;
sql_list->insert(sql_list->end(), inserts.begin(), inserts.end());
}
return true;
}
Expand Down
66 changes: 65 additions & 1 deletion hybridse/src/codec/fe_row_codec.cc
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,11 @@
#include <string>
#include <utility>

#include "absl/status/status.h"
#include "absl/strings/str_join.h"
#include "codec/type_codec.h"
#include "gflags/gflags.h"
#include "codegen/insert_row_builder.h"
#include "gflags/gflags.h"
#include "glog/logging.h"
#include "proto/fe_common.pb.h"

Expand Down Expand Up @@ -74,6 +75,69 @@ bool IsCodecStrLikeType(const type::ColumnSchema& sc) {
return sc.has_map_type() || sc.has_array_type() || (sc.has_base_type() && sc.base_type() == type::kVarchar);
}

static absl::Status ColumnSchemaStr(std::ostream& os, const type::ColumnSchema& cs) {
if (cs.has_base_type()) {
switch (cs.base_type()) {
case type::kInt16:
os << "smallint";
break;
case type::kInt32:
os << "int";
break;
case type::kInt64:
os << "bigint";
break;
case type::kFloat:
os << "float";
break;
case type::kDouble:
os << "double";
break;
case type::kVarchar:
os << "string";
break;
case type::kTimestamp:
os << "timestamp";
break;
case type::kDate:
os << "date";
break;
case type::kBool:
os << "bool";
break;
case type::kNull:
os << "null";
break;
default:
return absl::UnimplementedError(absl::StrCat("un-support tostring: ", cs.DebugString()));
}
} else if (cs.has_array_type()) {
os << "ARRAY<";
CHECK_ABSL_STATUS(ColumnSchemaStr(os, cs.array_type().ele_type()));
os << ">";
} else if (cs.has_map_type()) {
os << "MAP<";
CHECK_ABSL_STATUS(ColumnSchemaStr(os, cs.map_type().key_type()));
os << ", ";
CHECK_ABSL_STATUS(ColumnSchemaStr(os, cs.map_type().value_type()));
os << ">";
} else {
return absl::UnimplementedError(absl::StrCat("un-support tostring: ", cs.DebugString()));
}

if (cs.is_not_null()) {
os << " NOT NULL";
}

return absl::OkStatus();
}

absl::StatusOr<std::string> ColumnSchemaStr(const type::ColumnSchema& cs) {
std::stringstream ss;
CHECK_ABSL_STATUS(ColumnSchemaStr(ss, cs));
return ss.str();
}

RowBuilder::RowBuilder(const Schema& schema)
: schema_(schema),
buf_(NULL),
Expand Down
27 changes: 27 additions & 0 deletions hybridse/src/codegen/ir_base_builder.cc
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@
#include "codegen/string_ir_builder.h"
#include "codegen/timestamp_ir_builder.h"
#include "glog/logging.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/IR/GlobalVariable.h"
#include "node/node_manager.h"
#include "proto/fe_type.pb.h"

Expand Down Expand Up @@ -1076,5 +1078,30 @@ std::string GetIRTypeName(llvm::Type* type) {
type->print(ss, false, true);
return ss.str();
}

void PrintLog(llvm::LLVMContext* context, llvm::Module* module, llvm::IRBuilder<>* builder, absl::string_view toPrint,
bool useGlobal) {
llvm::FunctionCallee printFunct =
module->getOrInsertFunction("printLog", builder->getVoidTy(), builder->getInt8PtrTy());

llvm::Value* stringVar;
llvm::Constant* stringConstant =
llvm::ConstantDataArray::getString(*context, llvm::StringRef(toPrint.data(), toPrint.size()));

// array[i8] type
if (useGlobal) {
stringVar = builder->CreateGlobalString(llvm::StringRef(toPrint.data(), toPrint.size()));
// Note: Does not work without allocation
// stringVar = new llvm::GlobalVariable(*module, stringConstant->getType(), true,
// llvm::GlobalValue::PrivateLinkage, stringConstant, "");
} else {
stringVar = builder->CreateAlloca(stringConstant->getType());
builder->CreateStore(stringConstant, stringVar);
}

llvm::Value* cast = builder->CreatePointerCast(stringVar, builder->getInt8PtrTy());
builder->CreateCall(printFunct, cast);
}

} // namespace codegen
} // namespace hybridse
3 changes: 3 additions & 0 deletions hybridse/src/codegen/ir_base_builder.h
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,9 @@ llvm::Value* CreateAllocaAtHead(llvm::IRBuilder<>* builder, llvm::Type* dtype,

llvm::Value* CodecSizeForPrimitive(llvm::IRBuilder<>* builder, llvm::Type* type);

void PrintLog(llvm::LLVMContext* context, llvm::Module* module, llvm::IRBuilder<>* builder, absl::string_view toPrint,
bool useGlobal = true);

} // namespace codegen
} // namespace hybridse
#endif // HYBRIDSE_SRC_CODEGEN_IR_BASE_BUILDER_H_
28 changes: 17 additions & 11 deletions hybridse/src/codegen/map_ir_builder.cc
Original file line number Diff line number Diff line change
Expand Up @@ -198,7 +198,7 @@ absl::StatusOr<NativeValue> MapIRBuilder::ExtractElement(CodeGenContextBase* ctx
{
struct_type_->getPointerTo(), // arr ptr
ctx->GetBuilder()->getInt1Ty(), // arr is null
key_type_, // key type
key_type_, // key value
ctx->GetBuilder()->getInt1Ty(), // key is null
value_type_->getPointerTo(), // output value ptr
ctx->GetBuilder()->getInt1Ty()->getPointerTo() // output is null ptr
Expand All @@ -218,12 +218,20 @@ absl::StatusOr<NativeValue> MapIRBuilder::ExtractElement(CodeGenContextBase* ctx
auto builder = ctx->GetBuilder();

builder->CreateStore(builder->getInt1(true), out_null_alloca_param);
::llvm::Value* idx_alloc = builder->CreateAlloca(builder->getInt32Ty());
builder->CreateStore(builder->getInt32(0), idx_alloc);
::llvm::Value* found_idx_alloc = builder->CreateAlloca(builder->getInt32Ty());
builder->CreateStore(builder->getInt32(-1), found_idx_alloc);

llvm::Value* sz_alloca = builder->CreateAlloca(builder->getInt32Ty());
llvm::Value* keys_alloca = builder->CreateAlloca(key_type_->getPointerTo());

auto s = ctx->CreateBranchNot(
builder->CreateOr(arr_is_null_param, key_is_null_param),
[&]() -> base::Status {
::llvm::Value* sz = nullptr;
CHECK_TRUE(Load(ctx->GetCurrentBlock(), map_ptr_param, SZ_IDX, &sz), common::kCodegenError);
ctx->GetBuilder()->CreateStore(sz, sz_alloca);

CHECK_STATUS(ctx->CreateBranch(builder->CreateICmpSLE(sz, builder->getInt32(0)), [&]() -> base::Status {
builder->CreateRetVoid();
Expand All @@ -232,25 +240,21 @@ absl::StatusOr<NativeValue> MapIRBuilder::ExtractElement(CodeGenContextBase* ctx

::llvm::Value* keys = nullptr;
CHECK_TRUE(Load(ctx->GetCurrentBlock(), map_ptr_param, KEY_VEC_IDX, &keys), common::kCodegenError);

::llvm::Value* idx_alloc = builder->CreateAlloca(builder->getInt32Ty());
builder->CreateStore(builder->getInt32(0), idx_alloc);
::llvm::Value* found_idx_alloc = builder->CreateAlloca(builder->getInt32Ty());
builder->CreateStore(builder->getInt32(-1), found_idx_alloc);
ctx->GetBuilder()->CreateStore(keys, keys_alloca);

CHECK_STATUS(
ctx->CreateWhile(
[&](::llvm::Value** cond) -> base::Status {
::llvm::Value* idx = builder->CreateLoad(idx_alloc);
::llvm::Value* found = builder->CreateLoad(found_idx_alloc);
*cond = builder->CreateAnd(builder->CreateICmpSLT(idx, sz),
*cond = builder->CreateAnd(builder->CreateICmpSLT(idx, builder->CreateLoad(sz_alloca)),
builder->CreateICmpSLT(found, builder->getInt32(0)));
return {};
},
[&]() -> base::Status {
::llvm::Value* idx = builder->CreateLoad(idx_alloc);
// key never null
auto* ele = builder->CreateLoad(builder->CreateGEP(keys, idx));
auto* ele = builder->CreateLoad(builder->CreateGEP(builder->CreateLoad(keys_alloca), idx));
::llvm::Value* eq = nullptr;
base::Status s;
PredicateIRBuilder::BuildEqExpr(ctx->GetCurrentBlock(), ele, key_val_param, &eq, s);
Expand All @@ -267,7 +271,7 @@ absl::StatusOr<NativeValue> MapIRBuilder::ExtractElement(CodeGenContextBase* ctx
auto* found_idx = builder->CreateLoad(found_idx_alloc);

CHECK_STATUS(ctx->CreateBranch(
builder->CreateAnd(builder->CreateICmpSLT(found_idx, sz),
builder->CreateAnd(builder->CreateICmpSLT(found_idx, builder->CreateLoad(sz_alloca)),
builder->CreateICmpSGE(found_idx, builder->getInt32(0))),
[&]() -> base::Status {
::llvm::Value* values = nullptr;
Expand Down Expand Up @@ -299,10 +303,12 @@ absl::StatusOr<NativeValue> MapIRBuilder::ExtractElement(CodeGenContextBase* ctx
}

auto builder = ctx->GetBuilder();
auto s = CreateSafeNull(ctx->GetCurrentBlock(), value_type_);
CHECK_ABSL_STATUSOR(s);
auto* out_val_alloca = builder->CreateAlloca(value_type_);
builder->CreateStore(::llvm::UndefValue::get(value_type_), out_val_alloca);
builder->CreateStore(s->GetValue(ctx), out_val_alloca);
auto* out_null_alloca = builder->CreateAlloca(builder->getInt1Ty());
builder->CreateStore(builder->getInt1(true), out_null_alloca);
builder->CreateStore(s->GetIsNull(ctx), out_null_alloca);

builder->CreateCall(fn, {arr.GetValue(builder), arr.GetIsNull(builder), key.GetValue(builder),
key.GetIsNull(builder), out_val_alloca, out_null_alloca});
Expand Down
6 changes: 6 additions & 0 deletions hybridse/src/udf/udf.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1407,6 +1407,12 @@ int64_t FarmFingerprint(absl::string_view input) {
return absl::bit_cast<int64_t>(farmhash::Fingerprint64(input));
}

void printLog(const char* fmt) {
if (fmt) {
fprintf(stderr, "%s\n", fmt);
}
}

} // namespace v1

bool RegisterMethod(UdfLibrary *lib, const std::string &fn_name, hybridse::node::TypeNode *ret,
Expand Down
2 changes: 2 additions & 0 deletions hybridse/src/udf/udf.h
Original file line number Diff line number Diff line change
Expand Up @@ -519,6 +519,8 @@ void hex(StringRef *str, StringRef *output);

void unhex(StringRef *str, StringRef *output, bool* is_null);

void printLog(const char* fmt);

} // namespace v1

/// \brief register native udf related methods into given UdfLibrary `lib`
Expand Down
36 changes: 34 additions & 2 deletions hybridse/src/vm/jit.cc
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,8 @@ extern "C" {
#include "llvm/ExecutionEngine/Orc/ExecutionUtils.h"
#include "llvm/ExecutionEngine/Orc/IRCompileLayer.h"
#include "llvm/ExecutionEngine/Orc/IRTransformLayer.h"
#include "llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h"
#include "llvm/ExecutionEngine/SectionMemoryManager.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/LegacyPassManager.h"
Expand All @@ -52,6 +54,8 @@ HybridSeJit::HybridSeJit(::llvm::orc::LLJITBuilderState& s, ::llvm::Error& e)
: LLJIT(s, e) {}
HybridSeJit::~HybridSeJit() {}

HybridSeLlvmJitWrapper::HybridSeLlvmJitWrapper(const JitOptions& options) : jit_options_(options) {}

static void RunDefaultOptPasses(::llvm::Module* m) {
::llvm::legacy::FunctionPassManager fpm(m);
// Add some optimizations.
Expand Down Expand Up @@ -156,8 +160,36 @@ bool HybridSeLlvmJitWrapper::Init() {
return true;
}

auto jit = ::llvm::Expected<std::unique_ptr<HybridSeJit>>(
HybridSeJitBuilder().create());
HybridSeJitBuilder builder;
if (jit_options_.IsEnableGdb()) {
auto JTMB = llvm::orc::JITTargetMachineBuilder::detectHost();
auto e = JTMB.takeError();
if (e) {
LOG(WARNING) << "fail to init lljit";;
::llvm::errs() << e;
return false;
}
if (!JTMB.get().getTargetTriple().isOSLinux()) {
LOG(WARNING) << "GDB listener not enabled for non-Linux";;
}
// require higher LLVM
// builder
// .setJITTargetMachineBuilder(std::move(JTMB.get()))
// .setObjectLinkingLayerCreator(
// [&](llvm::orc::ExecutionSession& ES) {
// auto GetMemMgr = []() { return std::make_unique<llvm::SectionMemoryManager>(); };
// auto ObjLinkingLayer = std::make_unique<llvm::orc::RTDyldObjectLinkingLayer>(ES, std::move(GetMemMgr));
//
// // Register the event listener.
// ObjLinkingLayer->registerJITEventListener(*JITEventListener::createGDBRegistrationListener());
//
// // Make sure the debug info sections aren't stripped.
// ObjLinkingLayer->setProcessAllSections(true);
//
// return ObjLinkingLayer;
// });
}
auto jit = builder.create();
{
::llvm::Error e = jit.takeError();
if (e) {
Expand Down
5 changes: 3 additions & 2 deletions hybridse/src/vm/jit.h
Original file line number Diff line number Diff line change
Expand Up @@ -85,8 +85,8 @@ std::string LlvmToString(const T& value) {

class HybridSeLlvmJitWrapper : public HybridSeJitWrapper {
public:
HybridSeLlvmJitWrapper() {}
~HybridSeLlvmJitWrapper() {}
explicit HybridSeLlvmJitWrapper(const JitOptions& options = {});
~HybridSeLlvmJitWrapper() override {}

bool Init() override;

Expand All @@ -99,6 +99,7 @@ class HybridSeLlvmJitWrapper : public HybridSeJitWrapper {
hybridse::vm::RawPtrHandle FindFunction(const std::string& funcname) override;

private:
const JitOptions jit_options_;
std::unique_ptr<HybridSeJit> jit_;
std::unique_ptr<::llvm::orc::MangleAndInterner> mi_;
};
Expand Down
6 changes: 3 additions & 3 deletions hybridse/src/vm/jit_wrapper.cc
Original file line number Diff line number Diff line change
Expand Up @@ -113,11 +113,10 @@ HybridSeJitWrapper* HybridSeJitWrapper::Create(const JitOptions& jit_options) {
return new HybridSeLlvmJitWrapper();
#endif
} else {
if (jit_options.IsEnableVtune() || jit_options.IsEnablePerf() ||
jit_options.IsEnableGdb()) {
if (jit_options.IsEnableVtune() || jit_options.IsEnablePerf()) {
LOG(WARNING) << "LLJIT do not support jit events";
}
return new HybridSeLlvmJitWrapper();
return new HybridSeLlvmJitWrapper(jit_options);
}
}

Expand All @@ -132,6 +131,7 @@ void InitBuiltinJitSymbols(HybridSeJitWrapper* jit) {
jit->AddExternalFunction("memset", (reinterpret_cast<void*>(&memset)));
jit->AddExternalFunction("memcpy", (reinterpret_cast<void*>(&memcpy)));
jit->AddExternalFunction("__bzero", (reinterpret_cast<void*>(&bzero)));
jit->AddExternalFunction("printLog", (reinterpret_cast<void*>(&udf::v1::printLog)));

jit->AddExternalFunction(
"hybridse_storage_get_bool_field",
Expand Down
1 change: 1 addition & 0 deletions hybridse/src/vm/jit_wrapper.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ class HybridSeJitWrapper {
public:
HybridSeJitWrapper();
HybridSeJitWrapper(const HybridSeJitWrapper&) = delete;
HybridSeJitWrapper& operator=(const HybridSeJitWrapper&) = delete;

virtual ~HybridSeJitWrapper() {}

Expand Down
Loading

0 comments on commit 7f758af

Please sign in to comment.