Skip to content

Commit

Permalink
Resolved additional encoding issues which could take place when displ…
Browse files Browse the repository at this point in the history
…aying

RT_VERSION resources.
Now most strings handled by the program should be UTF-8 encoded.
  • Loading branch information
JusticeRage committed May 29, 2016
1 parent 7d1827c commit af2b664
Show file tree
Hide file tree
Showing 7 changed files with 80 additions and 27 deletions.
6 changes: 3 additions & 3 deletions include/manacommons/escape.h
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,8 @@ struct escaped_string_raw
*
* Paths contained in debug information insert unescaped backslashes which cause
* the resulting JSON to be invalid.
* Non-printable characters are not escaped in this grammar, because we expect
* UTF-8 strings.
*
* WARNING: Single quotes are NOT escaped.
*/
Expand All @@ -82,9 +84,7 @@ struct escaped_string_json
('\r', "\\r")('\t', "\\t")('\v', "\\v")('\\', "\\\\")
('\"', "\\\"");

// JSON fails miserably on non-printable characters, but
// at the same time doesn't support the \x notation.
esc_str = *(esc_char | boost::spirit::karma::iso8859_1::print | "\\u00" << karma::right_align(2, 0)[karma::hex]);
esc_str = *(esc_char | boost::spirit::karma::char_);
}

karma::rule<OutputIterator, std::string()> esc_str;
Expand Down
4 changes: 2 additions & 2 deletions include/manape/utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ std::string read_ascii_string(FILE* f, unsigned int max_bytes = 0);
*
* @param FILE* f The file from which to read. The read will occur at the cursor's current position!
*
* @return The string at the current location in the file, converted to ASCII.
* @return The string at the current location in the file, encoded as UTF-8.
*/
std::string read_prefixed_unicode_string(FILE* f);

Expand All @@ -94,7 +94,7 @@ std::wstring read_prefixed_unicode_wstring(FILE* f);
* @param int max_bytes The maximum number of bytes to read from the file. 0 means no limit.
* If this parameter is odd, it will be rounded to max_bytes-1 since bytes are read two by two.
*
* @return The string at the current location in the file, converted to ASCII.
* @return The string at the current location in the file, encoded as UTF-8.
*/
std::string read_unicode_string(FILE* f, unsigned int max_bytes = 0);

Expand Down
10 changes: 1 addition & 9 deletions manape/resources.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -292,16 +292,8 @@ DECLSPEC const_shared_strings Resource::interpret_as()
// RT_STRING resources are made of 16 contiguous "unicode" strings.
for (int i = 0; i < 16; ++i)
{
std::wstring s = utils::read_prefixed_unicode_wstring(f);
res->push_back(utils::read_prefixed_unicode_string(f));
std::vector<boost::uint8_t> utf8result;
try
{
utf8::utf16to8(s.begin(), s.end(), std::back_inserter(utf8result));
res->push_back(std::string(utf8result.begin(), utf8result.end()));
}
catch (utf8::invalid_utf16) {
PRINT_WARNING << "Couldn't convert a string from a RT_STRING resource to UTF-8!" << std::endl;
}
}

END:
Expand Down
32 changes: 22 additions & 10 deletions manape/utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -61,11 +61,17 @@ std::string read_unicode_string(FILE* f, unsigned int max_bytes)
}
}

// Convert the wstring into a string
auto conv = boost::shared_array<char>(new char[s.size() + 1]);
memset(conv.get(), 0, sizeof(char) * (s.size() + 1));
wcstombs(conv.get(), s.c_str(), s.size());
return std::string(conv.get());
try
{
std::vector<boost::uint8_t> utf8result;
utf8::utf16to8(s.begin(), s.end(), std::back_inserter(utf8result));
return std::string(utf8result.begin(), utf8result.end());
}
catch (utf8::invalid_utf16) {
PRINT_WARNING << "Couldn't convert a string from a RT_STRING resource to UTF-8!"
<< DEBUG_INFO << std::endl;
}
return "";
}

// ----------------------------------------------------------------------------
Expand Down Expand Up @@ -95,11 +101,17 @@ std::string read_prefixed_unicode_string(FILE* f)
{
std::wstring s = read_prefixed_unicode_wstring(f);

// Convert the wstring into a string
auto conv = boost::shared_array<char>(new char[s.size() + 1]);
memset(conv.get(), 0, sizeof(char) * (s.size() + 1));
wcstombs(conv.get(), s.c_str(), s.size());
return std::string(conv.get());
try
{
std::vector<boost::uint8_t> utf8result;
utf8::utf16to8(s.begin(), s.end(), std::back_inserter(utf8result));
return std::string(utf8result.begin(), utf8result.end());
}
catch (utf8::invalid_utf16) {
PRINT_WARNING << "Couldn't convert a string from a RT_STRING resource to UTF-8!"
<< DEBUG_INFO << std::endl;
}
return "";
}

// ----------------------------------------------------------------------------
Expand Down
2 changes: 1 addition & 1 deletion test/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ cmake_minimum_required (VERSION 2.6)
project (manalyze-tests)
include_directories(${PROJECT_SOURCE_DIR}/include)

add_executable(manalyze-tests fixtures.cpp hash-library.cpp pe.cpp imports.cpp resources.cpp section.cpp escape.cpp
add_executable(manalyze-tests fixtures.cpp hash-library.cpp pe.cpp imports.cpp resources.cpp section.cpp escape.cpp encoding.cpp
../src/import_hash.cpp)

target_link_libraries(
Expand Down
50 changes: 50 additions & 0 deletions test/encoding.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
/*
This file is part of Manalyze.
Manalyze is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
Manalyze is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Manalyze. If not, see <http://www.gnu.org/licenses/>.
*/

#include <string>
#include <vector>

#include <boost/test/unit_test.hpp>
#include <boost/cstdint.hpp>

#include "manape/utf8/utf8.h"

// ----------------------------------------------------------------------------

void check_conversion(std::wstring input, std::string expected)
{
std::vector<boost::uint8_t> utf8result;
utf8::utf16to8(input.begin(), input.end(), std::back_inserter(utf8result));
std::string result(utf8result.begin(), utf8result.end());
BOOST_CHECK_EQUAL(result, expected);
}

// ----------------------------------------------------------------------------

BOOST_AUTO_TEST_CASE(test_utf16_to_utf8)
{
check_conversion(L"Simple ascii string", "Simple ascii string");
check_conversion(L"é", "\xc3\xa9");
check_conversion(L"©", "\xc2\xa9");
check_conversion(L"© Microsoft Corporation. All rights reserved.", "\xc2\xa9 Microsoft Corporation. All rights reserved.");
check_conversion(L"Ūnĭcōde̽", "\xc5\xaa\x6e\xc4\xad\x63\xc5\x8d\x64\x65\xcc\xbd");
check_conversion(L"Юникод", "\xd0\xae\xd0\xbd\xd0\xb8\xd0\xba\xd0\xbe\xd0\xb4");
check_conversion(L"უნიკოდი", "\xe1\x83\xa3\xe1\x83\x9c\xe1\x83\x98\xe1\x83\x99\xe1\x83\x9d\xe1\x83\x93\xe1\x83\x98");
check_conversion(L"標準萬國碼", "\xe6\xa8\x99\xe6\xba\x96\xe8\x90\xac\xe5\x9c\x8b\xe7\xa2\xbc");
}

// ----------------------------------------------------------------------------
3 changes: 1 addition & 2 deletions test/escape.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,6 @@ BOOST_AUTO_TEST_CASE(test_string_escape_json)
check_string_escaping_json("\"", "\\\"");
check_string_escaping_json("\\", "\\\\");
check_string_escaping_json("\\\\", "\\\\\\\\");
check_string_escaping_json("é", "\\u00e9");
check_string_escaping_json("\x01", "\\u0001");
check_string_escaping_json("\x01", "\x01");
check_string_escaping_json("\r\n", "\\r\\n");
}

0 comments on commit af2b664

Please sign in to comment.