Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 9 additions & 1 deletion ddprof-lib/src/main/cpp/flightRecorder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@
#include "dictionary.h"
#include "os.h"
#include "profiler.h"
#include "rustDemangler.h"
#include "spinLock.h"
#include "symbols.h"
#include "threadFilter.h"
Expand Down Expand Up @@ -80,9 +81,16 @@ void Lookup::fillNativeMethodInfo(MethodInfo* mi, const char* name, const char*
char* demangled = abi::__cxa_demangle(name, NULL, NULL, &status);
if (demangled != NULL) {
cutArguments(demangled);
mi->_name = _symbols.lookup(demangled);
mi->_sig = _symbols.lookup("()L;");
mi->_type = FRAME_CPP;

// Rust legacy demangling
if (RustDemangler::is_probably_rust_legacy(demangled)) {
std::string rust_demangled = RustDemangler::demangle(demangled);
mi->_name = _symbols.lookup(rust_demangled.c_str());
} else {
mi->_name = _symbols.lookup(demangled);
}
free(demangled);
return;
}
Expand Down
166 changes: 166 additions & 0 deletions ddprof-lib/src/main/cpp/rustDemangler.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,166 @@
/*
* Copyright 2021, 2023 Datadog, Inc
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/


#include "rustDemangler.h"

#include <array>

namespace RustDemangler {

// With some exceptions we don't handle here, v0 Rust symbols can end in a
// prefix followed by a 16-hexdigit hash, which must be removed
const std::string hash_pre = "::h";
const std::string hash_eg = "0123456789abcdef";

const std::array<std::pair<std::string, std::string>, 9> patterns = {{
{"..", "::"},
{"$C$", ","},
{"$BP$", "*"},
{"$GT$", ">"},
{"$LT$", "<"},
{"$LP$", "("},
{"$RP$", ")"},
{"$RF$", "&"},
{"$SP$", "@"},
}};

inline bool is_hexdig(const char c) {
return (c >= 'a' && c <= 'f') || (c >= '0' && c <= '9');
Comment thread
jbachorik marked this conversation as resolved.
}

// Simple conversion from hex digit to integer
inline int hex_to_int(char dig) {
constexpr int k_a_hex_value = 0xa;
if (dig >= '0' && dig <= '9') {
return dig - '0';
}
if (dig >= 'a' && dig <= 'f') {
return dig - 'a' + k_a_hex_value;
}
if (dig >= 'A' && dig <= 'F') {
return dig - 'A' + k_a_hex_value;
}
return -1;
}

// Minimal check that a string can end, and does end, in a hashlike substring
inline bool has_hash(const std::string &str) {
// If the size can't conform, then the string is invalid
if (str.size() <= hash_pre.size() + hash_eg.size()) {
Comment thread
jbachorik marked this conversation as resolved.
return false;
}

// Check that the string contains the hash prefix in the right position
if (str.compare(str.size() - hash_eg.size() - hash_pre.size(),
hash_pre.size(), hash_pre)) {
return false;
}

// Check that the string ends in lowercase hex digits
for (size_t i = str.size() - hash_eg.size(); i < str.size(); ++i) {
if (!is_hexdig(str[i])) {
return false;
}
}
return true;
}

bool is_probably_rust_legacy(const std::string &str) {
// Is the string too short to have a hash part in thefirst place?
if (!has_hash(str)) {
return false;
}

// Throw out `$$` and `$????$`, but not in-between
const char *ptr = str.data();
const char *end = ptr + str.size() - hash_pre.size() - hash_eg.size();
for (; ptr <= end; ++ptr) {
if (*ptr == '$') {
if (ptr[1] == '$') {
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I guess this is safe because the string has the tailing hash and there never will be $ as the last char here.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Indeed, and also because end excludes the hash part, so we're guaranteed to have several characters of "string" left after any ptr.

return false;
}
return ptr[2] == '$' || ptr[3] == '$' || ptr[4] == '$';
Comment thread
jbachorik marked this conversation as resolved.
}
if (*ptr == '.') {
return '.' != ptr[1] ||
'.' != ptr[2]; // '.' and '..' are fine, '...' is not
}
}
return true;
}

// Demangles a Rust string by building a copy piece-by-piece
std::string demangle(const std::string &str) {
std::string ret;
ret.reserve(str.size() - hash_eg.size() - hash_pre.size());

size_t i = 0;

// Special-case for repairing C++ demangling defect for Rust
if (str[0] == '_' && str[1] == '$') {
++i;
}

for (; i < str.size() - hash_pre.size() - hash_eg.size(); ++i) {

// Fast sieve for pattern-matching, since we know first chars
if (str[i] == '.' || str[i] == '$') {
bool replaced = false;

// Try to replace one of the patterns
for (const auto &pair : patterns) {
const std::string &pattern = pair.first;
const std::string &replacement = pair.second;
if (!str.compare(i, pattern.size(), pattern)) {
ret += replacement;
i += pattern.size() - 1; // -1 because iterator inc
replaced = true;
break;
}
}

// If we failed to replace, try a few failovers. Notably, we recognize
// that Rust may insert Unicode code points in the function name (other
// implementations treat many individual points as patterns to search on)
if (!replaced && str[i] == '.') {
// Special-case for '.'
ret += '-';
} else if (!replaced && !str.compare(i, 2, "$u") && str[i + 4] == '$') {
const size_t k_nb_read_chars = 5;
const int hexa_base = 16;
const int hi = hex_to_int(str[i + 2]);
const int lo = hex_to_int(str[i + 3]);
if (hi != -1 && lo != -1) {
ret += static_cast<unsigned char>(lo + hexa_base * hi);
i += k_nb_read_chars - 1; // - 1 because iterator inc
} else {
// We didn't have valid unicode values. No further processing is
// done, reinsert the `$u...$` sequence into the output string.
ret += str.substr(i, k_nb_read_chars);
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this skipping the $u??$ sequence or copying it to the ret string?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good question. It "skips" the $u???$ pattern in the sense that it doesn't perform additional processing on it, but it actually inserts the unprocessed $u???$ sequence into the destination string. In other words, tokens are only consumed when they are successfully matched, otherwise they are inserted literally into the result.

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you extend the comment with 'inserted, not processed' part? Thanks!

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed in 645fe69

i += k_nb_read_chars - 1; // -1 because iterator inc
}
} else if (!replaced) {
ret += str[i];
}
} else {
ret += str[i];
}
}

return ret;
}
} // namespace RustDemangler
25 changes: 25 additions & 0 deletions ddprof-lib/src/main/cpp/rustDemangler.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
/*
* Copyright 2021, 2023 Datadog, Inc
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#pragma once
#include <string>

namespace RustDemangler {
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh, namespace :)
First time I see it here. It's been a long time, I almost forgot you could do this in C++ 🤣

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Heh, mostly here I was afraid of conflicting with something else in the project (I'm not familiar with the codebase). Happy to remove if it would align with the design of the repo. 🙏

Copy link
Copy Markdown
Collaborator

@jbachorik jbachorik Dec 27, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think it is necessary to remove it - I assume it is a purely syntactic construct and won't result in linking more of libstdc++ to the final library ...

bool is_probably_rust_legacy(const std::string &str);
std::string demangle(const std::string &str);
}; // namespace RustDemangler


1 change: 1 addition & 0 deletions ddprof-lib/src/test/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ file(GLOB_RECURSE TEST_FILES CONFIGURE_DEPENDS
file(GLOB_RECURSE SRC_FILES CONFIGURE_DEPENDS
"${PROJECT_SOURCE_DIR}/../main/cpp/os_${OS_SUFFIX}.cpp"
"${PROJECT_SOURCE_DIR}/../main/cpp/context.cpp"
"${PROJECT_SOURCE_DIR}/../main/cpp/rustDemangler.cpp"
"${PROJECT_SOURCE_DIR}/../main/cpp/counters.cpp"
"${PROJECT_SOURCE_DIR}/../main/cpp/threadFilter.cpp"
"${PROJECT_SOURCE_DIR}/../main/cpp/dictionary.cpp"
Expand Down
Loading