Skip to content

Commit

Permalink
feat(udf): list_except_by_key & list_except_by_value
Browse files Browse the repository at this point in the history
  • Loading branch information
aceforeverd committed Jun 21, 2023
1 parent 6b7cb9f commit 5859fb0
Show file tree
Hide file tree
Showing 3 changed files with 108 additions and 9 deletions.
33 changes: 33 additions & 0 deletions cases/function/test_feature_zero_function.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -201,3 +201,36 @@ cases:
- [3, 3, 1, 1]
- [4, 0, 0, 0]
- [5, 2, 2, 2]

- id: 7
desc: list_except_by_key/list_except_by_value
inputs:
- name: t1
columns: ["idx int", "gp int", "val string", "ts timestamp"]
indexs: ['idx:gp:ts']
rows:
- [100, 1, "a:1,b:2,c:0", 1000]
- [101, 1, "abc", 1000]
- [102, 1, "a,b,c", 1000]
# note
# 1.'abc' as kv pair parsed to `abc=`, value is a empty string
# 2. list is not kv pair but simply 'k1,k2,k3' ? it fallbacked (same result as) filter on whole kv pair
sql: |
select idx,
`join`(list_except_by_key(split(val, ','), 'a,b'), " ") as keys_filterd,
`join`(list_except_by_value(split(val, ','), '1,2'), " ") as values_filterd,
`join`(list_except_by_key(split(val, ','), ''), " ") as filter_nothing1,
`join`(list_except_by_value(split(val, ','), ''), " ") as filter_nothing2,
from t1
expect:
order: idx
columns:
- idx int
- keys_filterd string
- values_filterd string
- filter_nothing1 string
- filter_nothing2 string
rows:
- [100, "c:0", "c:0", "a:1 b:2 c:0", "a:1 b:2 c:0"]
- [101, "abc", "abc", "abc", ""]
- [102, "c", "a b c", "a b c", ""]
82 changes: 74 additions & 8 deletions hybridse/src/udf/default_defs/feature_zero_def.cc
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,10 @@
#include <utility>
#include <vector>

#include "absl/strings/str_split.h"
#include "boost/algorithm/string.hpp"
#include "boost/algorithm/string/join.hpp"
#include "boost/algorithm/string/regex.hpp"

#include "codec/list_iterator_codec.h"
#include "codec/type_codec.h"
#include "udf/containers.h"
Expand Down Expand Up @@ -169,7 +169,7 @@ class StringSplitState : public base::FeBaseObject {
};

struct FZStringOpsDef {
static StringSplitState* InitList() {
static StringSplitState* NewList() {
auto list = new StringSplitState();
vm::JitRuntime::get()->AddManagedObject(list);
return list;
Expand Down Expand Up @@ -221,7 +221,7 @@ struct FZStringOpsDef {

static void SingleSplit(StringRef* str, bool is_null, StringRef* delimeter,
ListRef<StringRef>* output) {
auto list = InitList();
auto list = NewList();
UpdateSplit(list, str, is_null, delimeter);
output->list = reinterpret_cast<int8_t*>(list->GetListV());
}
Expand Down Expand Up @@ -274,7 +274,7 @@ struct FZStringOpsDef {
static void SingleSplitByKey(StringRef* str, bool is_null,
StringRef* delimeter, StringRef* kv_delimeter,
ListRef<StringRef>* output) {
auto list = InitList();
auto list = NewList();
UpdateSplitByKey(list, str, is_null, delimeter, kv_delimeter);
output->list = reinterpret_cast<int8_t*>(list->GetListV());
}
Expand Down Expand Up @@ -341,7 +341,7 @@ struct FZStringOpsDef {
StringRef* delimeter,
StringRef* kv_delimeter,
ListRef<StringRef>* output) {
auto list = InitList();
auto list = NewList();
UpdateSplitByValue(list, str, is_null, delimeter, kv_delimeter);
output->list = reinterpret_cast<int8_t*>(list->GetListV());
}
Expand Down Expand Up @@ -389,6 +389,37 @@ struct FZStringOpsDef {
auto list_v = reinterpret_cast<hybridse::codec::ListV<StringRef> *>(list->list);
return list_v->GetCount();
}

template <std::size_t Idx>
static void ListExceptByKey(ListRef<StringRef>* list_ref, StringRef* keys, ListRef<StringRef>* output,
bool* out_null) {
if (list_ref == nullptr || keys == nullptr) {
*out_null = true;
return;

Check warning on line 398 in hybridse/src/udf/default_defs/feature_zero_def.cc

View check run for this annotation

Codecov / codecov/patch

hybridse/src/udf/default_defs/feature_zero_def.cc#L397-L398

Added lines #L397 - L398 were not covered by tests
}

absl::string_view view(keys->data_, keys->size_);
std::set<absl::string_view> key_list = absl::StrSplit(view, ',');

auto list = reinterpret_cast<codec::ListV<StringRef>*>(list_ref->list);
auto iter = list->GetIterator();

auto out = NewList();

while (iter->Valid()) {
StringRef str = iter->GetValue();
std::pair<absl::string_view, absl::string_view> p =
absl::StrSplit(absl::string_view(str.data_, str.size_), ':');
auto key = std::get<Idx>(p);
if (key_list.find(key) == key_list.end()) {
out->GetListV()->Add(str.ToString());
}
iter->Next();
}

output->list = reinterpret_cast<int8_t*>(out->GetListV());
*out_null = false;
}
};

template <typename K>
Expand Down Expand Up @@ -560,7 +591,7 @@ void DefaultUdfLibrary::InitFeatureZero() {
RegisterUdaf("window_split")
.templates<ListRef<StringRef>, Opaque<StringSplitState>,
Nullable<StringRef>, StringRef>()
.init("window_split_init", FZStringOpsDef::InitList)
.init("window_split_init", FZStringOpsDef::NewList)
.update("window_split_update", FZStringOpsDef::UpdateSplit)
.output("window_split_output", FZStringOpsDef::OutputList)
.doc(R"(
Expand Down Expand Up @@ -593,7 +624,7 @@ void DefaultUdfLibrary::InitFeatureZero() {
RegisterUdaf("window_split_by_key")
.templates<ListRef<StringRef>, Opaque<StringSplitState>,
Nullable<StringRef>, StringRef, StringRef>()
.init("window_split_by_key_init", FZStringOpsDef::InitList)
.init("window_split_by_key_init", FZStringOpsDef::NewList)
.update("window_split_by_key_update",
FZStringOpsDef::UpdateSplitByKey)
.output("window_split_by_key_output", FZStringOpsDef::OutputList)
Expand Down Expand Up @@ -631,7 +662,7 @@ void DefaultUdfLibrary::InitFeatureZero() {
RegisterUdaf("window_split_by_value")
.templates<ListRef<StringRef>, Opaque<StringSplitState>,
Nullable<StringRef>, StringRef, StringRef>()
.init("window_split_by_value_init", FZStringOpsDef::InitList)
.init("window_split_by_value_init", FZStringOpsDef::NewList)
.update("window_split_by_value_update",
FZStringOpsDef::UpdateSplitByValue)
.output("window_split_by_value_output", FZStringOpsDef::OutputList)
Expand Down Expand Up @@ -733,6 +764,41 @@ void DefaultUdfLibrary::InitFeatureZero() {
@endcode
@since 0.7.0)");

RegisterExternal("list_except_by_key")
.list_argument_at(0)
.args<ListRef<StringRef>, StringRef>(FZStringOpsDef::ListExceptByKey<0>)
.doc(R"s(
@brief Return list of elements in list1 but keys not in except_str
@param list1 List of string, with each element as the format of `key:vaule`.
@param except_str String joined list, as `key1,key2`, split by comma(,)
Example:
@code{.sql}
select `join`(list_except_by_key(split("a:1,b:2,c:0", ","), "a,c"), " ");
-- output b:2
@endcode
@since 0.8.1)s");
RegisterExternal("list_except_by_value")
.list_argument_at(0)
.args<ListRef<StringRef>, StringRef>(FZStringOpsDef::ListExceptByKey<1>)
.doc(R"s(
@brief Return list of elements in list1 but values not in except_str
@param list1 List of string, with each element as the format of `key:vaule`.
@param except_str String joined list, as `value1,value2`, split by comma(,). Empty string filters list whose value is empty
Example:
@code{.sql}
select `join`(list_except_by_value(split("a:1,b:2,c:0", ","), "0,1"), " ");
-- output b:2
@endcode
@since 0.8.1)s");
}

} // namespace udf
Expand Down
2 changes: 1 addition & 1 deletion hybridse/src/udf/default_defs/window_functions_def.cc
Original file line number Diff line number Diff line change
Expand Up @@ -232,7 +232,7 @@ void RegisterBaseListLag(UdfLibrary* lib) {
@endcode
)")
.args<codec::ListRef<V>, int64_t>(reinterpret_cast<void*>(AtList<V>))
.args<codec::ListRef<V>, int64_t>(AtList<V>)
.return_by_arg(true)
.template returns<Nullable<V>>();
}
Expand Down

0 comments on commit 5859fb0

Please sign in to comment.