Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Handle BOM in the beginning of the script #439

Merged
merged 27 commits into from May 26, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
f37d0e1
Skip UTF-8 BOM before parsing begins.
AlekMosingiewicz May 10, 2018
1d78233
Cover skipping BOM with test.
AlekMosingiewicz May 10, 2018
1e8f7f9
Simplify BOM test.
AlekMosingiewicz May 10, 2018
efbebee
Throw exception when user-provided input contains BOM.
AlekMosingiewicz May 13, 2018
a024db0
Catch BOM at the beginning of file.
AlekMosingiewicz May 13, 2018
c09af92
Decrement file size when BOM is present to avoid parsing errors.
AlekMosingiewicz May 13, 2018
322568b
Check for illegal characters while parsing input.
AlekMosingiewicz May 15, 2018
0d44b0b
Added doc comment.
AlekMosingiewicz May 15, 2018
60c0a0b
Refactor skippable BOM detection.
AlekMosingiewicz May 21, 2018
b70a9e7
Non-ASCII characters now in random positions in test; test renamed.
AlekMosingiewicz May 21, 2018
be29b0a
Merge branch 'develop' into handle-bom-in-script
AlekMosingiewicz May 22, 2018
d880d46
Type cast fix.
AlekMosingiewicz May 22, 2018
f9615ef
Another text size assertion.
AlekMosingiewicz May 22, 2018
df6bc8f
Add missing test cases.
AlekMosingiewicz May 22, 2018
67dcd3e
Test case for BOM in user-provided string.
AlekMosingiewicz May 22, 2018
4ada12a
Check EOF rather than buffer_size when skipping BOM.
AlekMosingiewicz May 23, 2018
ac10575
Read the stream byte by byte, condition for size when skipping BOM.
AlekMosingiewicz May 24, 2018
edadb7a
Use readsome instead of reading the stream byte-by-byte to
AlekMosingiewicz May 24, 2018
51bb793
Initialize buffer to store potential BOM data before storing
AlekMosingiewicz May 24, 2018
51693aa
Skip buffer initialization.
AlekMosingiewicz May 25, 2018
0e964da
Attempt to remedy the problem occuring on Clang.
AlekMosingiewicz May 25, 2018
42c355a
Revert "Attempt to remedy the problem occuring on Clang."
AlekMosingiewicz May 25, 2018
1711d50
Another attempt to remedy the problem occuring on Clang.
AlekMosingiewicz May 25, 2018
393f8d3
Travis build quick fix.
AlekMosingiewicz May 25, 2018
fb63503
Fix for Clang.
AlekMosingiewicz May 25, 2018
0f67b2f
Another fix for Clang.
AlekMosingiewicz May 25, 2018
b3f77f0
Fix implicit conversion warning.
AlekMosingiewicz May 25, 2018
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
28 changes: 27 additions & 1 deletion include/chaiscript/language/chaiscript_engine.hpp
Expand Up @@ -204,6 +204,27 @@ namespace chaiscript
m_engine.add(fun([this](const std::string& t_namespace_name) { import(t_namespace_name); }), "import");
}

/// Skip BOM at the beginning of file
static bool skip_bom(std::ifstream &infile) {
size_t bytes_needed = 3;
char buffer[3];

memset(buffer, '\0', bytes_needed);

infile.read(buffer, static_cast<std::streamsize>(bytes_needed));

if ((buffer[0] == '\xef')
&& (buffer[1] == '\xbb')
&& (buffer[2] == '\xbf')) {

infile.seekg(3);
return true;
}

infile.seekg(0);

return false;
}

/// Helper function for loading a file
static std::string load_file(const std::string &t_filename) {
Expand All @@ -213,11 +234,16 @@ namespace chaiscript
throw chaiscript::exception::file_not_found_error(t_filename);
}

const auto size = infile.tellg();
auto size = infile.tellg();
infile.seekg(0, std::ios::beg);

assert(size >= 0);

if (skip_bom(infile)) {
size-=3; // decrement the BOM size from file size, otherwise we'll get parsing errors
assert(size >=0 ); //and check if there's more text
}

if (size == std::streampos(0))
{
return std::string();
Expand Down
4 changes: 4 additions & 0 deletions include/chaiscript/language/chaiscript_parser.hpp
Expand Up @@ -520,10 +520,14 @@ namespace chaiscript

/// Skips ChaiScript whitespace, which means space and tab, but not cr/lf
/// jespada: Modified SkipWS to skip optionally CR ('\n') and/or LF+CR ("\r\n")
/// AlekMosingiewicz: Added exception when illegal character detected
bool SkipWS(bool skip_cr=false) {
bool retval = false;

while (m_position.has_more()) {
if(static_cast<unsigned char>(*m_position) > 0x7e) {
throw exception::eval_error("Illegal character", File_Position(m_position.line, m_position.col), *m_filename);
}
auto end_line = (*m_position != 0) && ((*m_position == '\n') || (*m_position == '\r' && *(m_position+1) == '\n'));

if ( char_in_alphabet(*m_position,detail::white_alphabet) || (skip_cr && end_line)) {
Expand Down
22 changes: 22 additions & 0 deletions unittests/compiled_tests.cpp
Expand Up @@ -352,7 +352,29 @@ TEST_CASE("Functor cast")
CHECK(d == 3 * 6);
}

TEST_CASE("Non-ASCII characters in the middle of string")
{
chaiscript::ChaiScript_Basic chai(create_chaiscript_stdlib(),create_chaiscript_parser());
CHECK_THROWS_AS(chai.eval<std::string>("prin\xeft \"Hello World\""), chaiscript::exception::eval_error);
}

TEST_CASE("Non-ASCII characters in the beginning of string")
{
chaiscript::ChaiScript_Basic chai(create_chaiscript_stdlib(),create_chaiscript_parser());
CHECK_THROWS_AS(chai.eval<std::string>("\xefprint \"Hello World\""), chaiscript::exception::eval_error);
}

TEST_CASE("Non-ASCII characters in the end of string")
{
chaiscript::ChaiScript_Basic chai(create_chaiscript_stdlib(),create_chaiscript_parser());
CHECK_THROWS_AS(chai.eval<std::string>("print \"Hello World\"\xef"), chaiscript::exception::eval_error);
}

TEST_CASE("BOM in string")
{
chaiscript::ChaiScript_Basic chai(create_chaiscript_stdlib(),create_chaiscript_parser());
CHECK_THROWS_AS(chai.eval<std::string>("\xef\xbb\xbfprint \"Hello World\""), chaiscript::exception::eval_error);
}

int set_state_test_myfun()
{
Expand Down
2 changes: 2 additions & 0 deletions unittests/eval_file_with_bom.chai
@@ -0,0 +1,2 @@
eval_file("file_with_bom.inc")
assert_true(alwaysTrue())
3 changes: 3 additions & 0 deletions unittests/file_with_bom.inc
@@ -0,0 +1,3 @@
def alwaysTrue() {
return true
}