From 61d3e3d014aaa3a0fd871812e6ed56892623842e Mon Sep 17 00:00:00 2001 From: Eladash Date: Fri, 19 Mar 2021 17:25:32 +0200 Subject: [PATCH] Memory string searcher improvements * Search the entire PS3 4GB address space instead of a small area of memory which is meant mostly for PRX/SELF memory aka vm::main segment. This limitation rendered string seraching nearly useless. * Implement hexadecimal byte-string, hexadecimal integer, float and double value search formats. * Multi-thread searching. * Optimize searching algorithm vastly. * Fix crash on empty string, report an error. * Implement case-insensitive string search. --- rpcs3/rpcs3qt/memory_string_searcher.cpp | 282 ++++++++++++++++++++--- rpcs3/rpcs3qt/memory_string_searcher.h | 7 +- 2 files changed, 262 insertions(+), 27 deletions(-) diff --git a/rpcs3/rpcs3qt/memory_string_searcher.cpp b/rpcs3/rpcs3qt/memory_string_searcher.cpp index 614ae77cb1e1..a6a037625d0e 100644 --- a/rpcs3/rpcs3qt/memory_string_searcher.cpp +++ b/rpcs3/rpcs3qt/memory_string_searcher.cpp @@ -1,14 +1,33 @@ #include "memory_string_searcher.h" #include "Emu/Memory/vm.h" +#include "Emu/Memory/vm_reservation.h" + +#include "Utilities/Thread.h" +#include "Utilities/StrUtil.h" #include +#include #include #include +#include +#include + +#include #include "util/logs.hpp" +#include "util/sysinfo.hpp" +#include "util/asm.hpp" LOG_CHANNEL(gui_log, "GUI"); +enum : int +{ + as_string, + as_hex, + as_f64, + as_f32, +}; + memory_string_searcher::memory_string_searcher(QWidget* parent) : QDialog(parent) { @@ -19,11 +38,29 @@ memory_string_searcher::memory_string_searcher(QWidget* parent) m_addr_line = new QLineEdit(this); m_addr_line->setFixedWidth(QLabel("This is the very length of the lineedit due to hidpi reasons.").sizeHint().width()); m_addr_line->setPlaceholderText(tr("Search...")); + m_addr_line->setMaxLength(4096); QPushButton* button_search = new QPushButton(tr("&Search"), this); + m_chkbox_case_insensitive = new QCheckBox(tr("Case Insensitive"), this); + m_chkbox_case_insensitive->setCheckable(true); + m_chkbox_case_insensitive->setToolTip(tr("When using string mode, the Characaters' case will not matter both in string and in memory." + "\nWarning: this may reduce performance of the search.")); + + m_cbox_input_mode = new QComboBox(this); + m_cbox_input_mode->addItem("String", QVariant::fromValue(+as_string)); + m_cbox_input_mode->addItem("HEX bytes/integer", QVariant::fromValue(+as_hex)); + m_cbox_input_mode->addItem("Double", QVariant::fromValue(+as_f64)); + m_cbox_input_mode->addItem("Float", QVariant::fromValue(+as_f32)); + m_cbox_input_mode->setToolTip(tr("String - search the memory for the specfied string." + "\nHEX bytes/integer: search the memory for hexadeciaml values. Spaces, commas, \"0x\", \"0X\", \"\\x\" ensure separation of bytes but they are not mandatory." + "\nDouble: reinterpret the string as 64-bit precision floating point value. Values are searched for exact representation, meaning -0 != 0." + "\nFloat: reinterpret the string as 32-bit precision floating point value. Values are searched for exact representation, meaning -0 != 0.")); + QHBoxLayout* hbox_panel = new QHBoxLayout(); hbox_panel->addWidget(m_addr_line); + hbox_panel->addWidget(m_cbox_input_mode); + hbox_panel->addWidget(m_chkbox_case_insensitive); hbox_panel->addWidget(button_search); setLayout(hbox_panel); @@ -35,48 +72,241 @@ memory_string_searcher::memory_string_searcher(QWidget* parent) void memory_string_searcher::OnSearch() { - const std::string wstr = m_addr_line->text().toStdString(); - const char *str = wstr.c_str(); - const u32 len = ::size32(wstr); + std::string wstr = m_addr_line->text().toStdString(); - gui_log.notice("Searching for string %s", str); + if (wstr.empty() || wstr.size() >= 4096u) + { + gui_log.error("String is empty or too long (size=%u)", wstr.size()); + return; + } - // Search the address space for the string - u32 strIndex = 0; - u32 numFound = 0; - const auto area = vm::get(vm::main); - for (u32 addr = area->addr; addr < area->addr + area->size; addr++) + gui_log.notice("Searching for %s", wstr); + + const int mode = std::max(m_cbox_input_mode->currentIndex(), 0); + bool case_insensitive = false; + + switch (mode) { - if (!vm::check_addr(addr)) + case as_string: + { + case_insensitive = m_chkbox_case_insensitive->isChecked(); + + if (case_insensitive) { - strIndex = 0; - continue; + std::transform(wstr.begin(), wstr.end(), wstr.begin(), ::tolower); } - u8 byte = vm::read8(addr); - if (byte == str[strIndex]) + break; + } + case as_hex: + { + constexpr std::string_view hex_chars = "0123456789ABCDEFabcdef"; + + // Split + std::vector parts = fmt::split(wstr, {" ", ",", "0x", "0X", "\\x"}); + + // Pad zeroes + for (std::string& part : parts) { - if (strIndex == len) + if (part.size() % 2) { - // Found it - gui_log.notice("Found @ %04x", addr - len); - numFound++; - strIndex = 0; - continue; + gui_log.warning("Padding string part with '0' at front due to odd hexadeciaml characters count."); + part.insert(wstr.begin(), '0'); } + } + + // Concat strings + wstr.clear(); + for (const std::string& part : parts) + { + wstr += part; + } - strIndex++; + if (usz pos = wstr.find_first_not_of(hex_chars); pos != umax) + { + gui_log.error("String '%s' cannot be interpreted as hexadecimal byte string due to unknown character '%s'." + , m_addr_line->text().toStdString(), std::string_view{&wstr[pos], 1}); + return; } - else + + std::string dst; + dst.resize(wstr.size() / 2); + + for (usz pos = 0; pos < wstr.size() / 2; pos++) { - strIndex = 0; + uchar value = 0; + std::from_chars(wstr.data() + pos * 2, wstr.data() + (pos + 1) * 2, value, 16); + std::memcpy(dst.data() + pos, &value, 1); } - if (addr % (1024 * 1024 * 64) == 0) // Log every 64mb + wstr = std::move(dst); + break; + } + case as_f64: + { + // format arg is a compilation workaround (compiler bug) + f64 value = 0; + const auto [ptr, err] = std::from_chars(wstr.data(), wstr.data() + wstr.size(), value, std::chars_format::general); + + if (ptr != wstr.data() + wstr.size() || err != std::errc()) { - gui_log.notice("Searching %04x ...", addr); + gui_log.error("String '%s' cannot be interpreted as double.", wstr); + return; } + + // Modify string to the floating point value (big endian) + be_t final = value; + wstr.resize(sizeof(final)); + std::memcpy(wstr.data(), &final, sizeof(final)); + break; } + case as_f32: + { + f32 value = 0; + const auto [ptr, err] = std::from_chars(wstr.data(), wstr.data() + wstr.size(), value, std::chars_format::general); + + if (ptr != wstr.data() + wstr.size() || err != std::errc()) + { + gui_log.error("String '%s' cannot be interpreted as float.", wstr); + return; + } + + // Modify string to the floating point value + be_t final = value; + wstr.resize(sizeof(final)); + std::memcpy(wstr.data(), &final, sizeof(final)); + break; + } + default: ensure(false); + } + + // Search the address space for the string + atomic_t found = 0; + atomic_t avail_addr = 0; + + // There's no need for so many threads + const u32 max_threads = utils::aligned_div(utils::get_thread_count(), 2); + + constexpr u32 block_size = 0x2000000; + + vm::reader_lock rlock; + + named_thread_group workers("String Searcher "sv, max_threads, [&]() + { + u32 local_found = 0; + + u32 addr = 0; + bool ok = false; + + while (true) + { + if (!(addr % block_size)) + { + std::tie(addr, ok) = avail_addr.fetch_op([](u32& val) + { + if (val <= 0 - block_size) + { + // Iterate in 32MB blocks + val += block_size; + + if (!val) val = -1; // Overflow detection + + return true; + } + + return false; + }); + } + + if (!ok) + { + break; + } + + if (![&addr = addr]() + { + // Skip unmapped memory + for (const u32 end = utils::align(addr + 1, block_size) - 0x1000; !vm::check_addr(addr, 0); addr += 0x1000) + { + if (addr == end) + { + return false; + } + } + + return true; + }()) + { + if (addr == 0u - 0x1000) + { + break; + } + + // The entire block is unmapped + addr += 0x1000; + continue; + } + + u64 addr_max = addr; + + const u64 end_mem = std::min(utils::align(addr + 1, block_size) + 0x1000, UINT32_MAX); + + // Determine allocation size quickly + while (addr_max < end_mem && vm::check_addr(static_cast(addr_max), vm::page_1m_size)) + { + addr_max += 0x100000; + } + + while (addr_max < end_mem && vm::check_addr(static_cast(addr_max), vm::page_64k_size)) + { + addr_max += 0x10000; + } + + while (addr_max < end_mem && vm::check_addr(static_cast(addr_max), 0)) + { + addr_max += 0x1000; + } + + std::string_view section{vm::get_super_ptr(addr), addr_max - addr}; + + usz found_first_char = 0; + + while (found_first_char = section.find_first_of(wstr[0], found_first_char), found_first_char != umax) + { + const u32 start = addr + found_first_char; + + if (std::string_view test_sv{vm::get_super_ptr(start), addr_max - start}; case_insensitive) + { + // Do not use allocating functions such as fmt::to_lower + if (test_sv.size() >= wstr.size() && std::all_of(wstr.begin(), wstr.end(), [&](const char& c) { return c == ::tolower(test_sv[&c - wstr.data()]); })) + { + gui_log.success("Found at 0x%08x", start); + local_found++; + } + } + else + { + if (test_sv.starts_with(wstr)) + { + gui_log.success("Found at 0x%08x", start); + local_found++; + } + } + + // Allow overlapping strings + found_first_char++; + } + + addr = static_cast(std::min(end_mem - 0x1000, addr_max)); + + // Check if at last page + if (addr_max == 0u - 0x1000) break; + } + + found += local_found; + }); + + workers.join(); - gui_log.notice("Search completed (found %d matches)", numFound); + gui_log.success("Search completed (found %u matches)", +found); } diff --git a/rpcs3/rpcs3qt/memory_string_searcher.h b/rpcs3/rpcs3qt/memory_string_searcher.h index c0c339996f91..d246d1688ebd 100644 --- a/rpcs3/rpcs3qt/memory_string_searcher.h +++ b/rpcs3/rpcs3qt/memory_string_searcher.h @@ -1,13 +1,18 @@ #pragma once #include -#include + +class QLineEdit; +class QCheckBox; +class QComboBox; class memory_string_searcher : public QDialog { Q_OBJECT QLineEdit* m_addr_line; + QCheckBox* m_chkbox_case_insensitive = nullptr; + QComboBox* m_cbox_input_mode = nullptr; public: memory_string_searcher(QWidget* parent);