From e0fc67c108facf17738e5852a74798babcef0e1b Mon Sep 17 00:00:00 2001 From: Bastian Blokland Date: Wed, 8 Jan 2020 18:15:53 +0200 Subject: [PATCH 1/5] Add character literal to lexer --- include/lex/error.hpp | 8 ++++ include/lex/lexer.hpp | 2 + include/lex/token.hpp | 2 + include/lex/token_kind.hpp | 1 + include/lex/token_payload_lit_char.hpp | 32 +++++++++++++ src/CMakeLists.txt | 1 + src/input/char_escape.cpp | 2 + src/lex/error.cpp | 16 +++++++ src/lex/lexer.cpp | 56 ++++++++++++++++++++-- src/lex/token.cpp | 5 ++ src/lex/token_cat.cpp | 1 + src/lex/token_kind.cpp | 3 ++ src/lex/token_payload_lit_char.cpp | 17 +++++++ tests/CMakeLists.txt | 1 + tests/lex/litchar_test.cpp | 65 ++++++++++++++++++++++++++ 15 files changed, 207 insertions(+), 5 deletions(-) create mode 100644 include/lex/token_payload_lit_char.hpp create mode 100644 src/lex/token_payload_lit_char.cpp create mode 100644 tests/lex/litchar_test.cpp diff --git a/include/lex/error.hpp b/include/lex/error.hpp index f74c54bd..2cb0fc65 100644 --- a/include/lex/error.hpp +++ b/include/lex/error.hpp @@ -25,6 +25,14 @@ namespace lex { [[nodiscard]] auto errLitStrInvalidEscape(input::Span span = input::Span{0}) -> Token; +[[nodiscard]] auto erLitCharEmpty(input::Span span = input::Span{0}) -> Token; + +[[nodiscard]] auto errLitCharTooBig(input::Span span = input::Span{0}) -> Token; + +[[nodiscard]] auto errLitCharUnterminated(input::Span span = input::Span{0}) -> Token; + +[[nodiscard]] auto errLitCharInvalidEscape(input::Span span = input::Span{0}) -> Token; + [[nodiscard]] auto errIdentifierIllegalCharacter(input::Span span = input::Span{0}) -> Token; [[nodiscard]] auto errIdentifierIllegalSequence(input::Span span = input::Span{0}) -> Token; diff --git a/include/lex/lexer.hpp b/include/lex/lexer.hpp index df0d0026..3ad2f57a 100644 --- a/include/lex/lexer.hpp +++ b/include/lex/lexer.hpp @@ -22,7 +22,9 @@ class LexerImpl { auto nextLitIntHex() -> Token; auto nextLitIntBinary() -> Token; auto nextLitIntOctal() -> Token; + auto nextLitStr() -> Token; + auto nextLitChar() -> Token; auto nextWordToken(char startingChar) -> Token; auto nextLineComment() -> Token; diff --git a/include/lex/token.hpp b/include/lex/token.hpp index 657156cd..46abd5b6 100644 --- a/include/lex/token.hpp +++ b/include/lex/token.hpp @@ -67,6 +67,8 @@ auto litBoolToken(bool val, input::Span span = input::Span{0}) -> Token; auto litStrToken(std::string val, input::Span span = input::Span{0}) -> Token; +auto litCharToken(char val, input::Span span = input::Span{0}) -> Token; + auto keywordToken(Keyword keyword, input::Span span = input::Span{0}) -> Token; auto identiferToken(std::string id, input::Span span = input::Span{0}) -> Token; diff --git a/include/lex/token_kind.hpp b/include/lex/token_kind.hpp index 47e6e959..697028c8 100644 --- a/include/lex/token_kind.hpp +++ b/include/lex/token_kind.hpp @@ -44,6 +44,7 @@ enum class TokenKind { LitFloat, LitBool, LitString, + LitChar, Keyword, Identifier, LineComment, diff --git a/include/lex/token_payload_lit_char.hpp b/include/lex/token_payload_lit_char.hpp new file mode 100644 index 00000000..ef547b90 --- /dev/null +++ b/include/lex/token_payload_lit_char.hpp @@ -0,0 +1,32 @@ +#pragma once +#include "lex/token_payload.hpp" + +namespace lex { + +class LitCharTokenPayload final : public TokenPayload { +public: + LitCharTokenPayload() = delete; + explicit LitCharTokenPayload(const char val) : m_val{val} {} + + auto operator==(const TokenPayload& rhs) const noexcept -> bool override { + const auto castedRhs = dynamic_cast(&rhs); + return castedRhs != nullptr && m_val == castedRhs->m_val; + } + + auto operator!=(const TokenPayload& rhs) const noexcept -> bool override { + return !LitCharTokenPayload::operator==(rhs); + } + + [[nodiscard]] auto clone() -> std::unique_ptr override { + return std::make_unique(*this); + } + + [[nodiscard]] auto getValue() const noexcept { return m_val; } + +private: + const char m_val; + + auto print(std::ostream& out) const -> std::ostream& override; +}; + +} // namespace lex diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index e3e64eb5..eef45f13 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -16,6 +16,7 @@ add_library(lex STATIC lex/token.cpp lex/token_cat.cpp lex/token_kind.cpp + lex/token_payload_lit_char.cpp lex/token_payload_lit_string.cpp lex/keyword.cpp lex/utilities.cpp) diff --git a/src/input/char_escape.cpp b/src/input/char_escape.cpp index 4ab9ec0c..64d92588 100644 --- a/src/input/char_escape.cpp +++ b/src/input/char_escape.cpp @@ -6,6 +6,7 @@ namespace input { auto escape(const char c) -> std::optional { static const std::unordered_map table = { {'"', '"'}, + {'\'', '\''}, {'\\', '\\'}, {'\a', 'a'}, {'\b', 'b'}, @@ -43,6 +44,7 @@ auto escape(const std::string& str) -> std::string { auto unescape(const char c) -> std::optional { static const std::unordered_map table = { {'"', '"'}, + {'\'', '\''}, {'\\', '\\'}, {'a', '\a'}, {'b', '\b'}, diff --git a/src/lex/error.cpp b/src/lex/error.cpp index 030a49ea..32dce38b 100644 --- a/src/lex/error.cpp +++ b/src/lex/error.cpp @@ -47,6 +47,22 @@ auto errLitStrInvalidEscape(const input::Span span) -> Token { return errorToken("Invalid escape sequence in string literal", span); } +auto erLitCharEmpty(const input::Span span) -> Token { + return errorToken("Empty character literal", span); +} + +auto errLitCharTooBig(const input::Span span) -> Token { + return errorToken("Character literal too big", span); +} + +auto errLitCharUnterminated(const input::Span span) -> Token { + return errorToken("Unterminated character literal", span); +} + +auto errLitCharInvalidEscape(const input::Span span) -> Token { + return errorToken("Invalid escape sequence in character literal", span); +} + auto errIdentifierIllegalCharacter(const input::Span span) -> Token { return errorToken("Identifier contains an invalid character", span); } diff --git a/src/lex/lexer.cpp b/src/lex/lexer.cpp index d8397b85..4e32ac01 100644 --- a/src/lex/lexer.cpp +++ b/src/lex/lexer.cpp @@ -179,15 +179,17 @@ auto LexerImpl::next() -> Token { case '\r': break; // Skip whitespace. case '0': - if (peekChar(0) == 'x' || peekChar(0) == 'X') { + switch (peekChar(0)) { + case 'x': + case 'X': consumeChar(); return nextLitIntHex(); - } - if (peekChar(0) == 'b' || peekChar(0) == 'B') { + case 'b': + case 'B': consumeChar(); return nextLitIntBinary(); - } - if (peekChar(0) == 'o' || peekChar(0) == 'O') { + case 'o': + case 'O': consumeChar(); return nextLitIntOctal(); } @@ -204,6 +206,8 @@ auto LexerImpl::next() -> Token { return nextLitNumber(c); case '"': return nextLitStr(); + case '\'': + return nextLitChar(); case '_': { const auto& nextChar = peekChar(0); if (isWordStart(nextChar) || isDigit(nextChar) || nextChar == '_') { @@ -430,6 +434,48 @@ auto LexerImpl::nextLitStr() -> Token { } } +auto LexerImpl::nextLitChar() -> Token { + // Starting quote already consumed by caller. + const auto startPos = m_inputPos; + + auto tooBig = false; + auto invalidEscapeSequence = false; + char c = consumeChar(); + if (c == '\'') { + return erLitCharEmpty(input::Span{startPos, m_inputPos}); + } + if (c == '\\') { + // Backslash is used to start an escape sequence. + const auto unescapedC = input::unescape(consumeChar()); + if (unescapedC) { + c = unescapedC.value(); + } else { + invalidEscapeSequence = true; + } + } + while (true) { + switch (consumeChar()) { + case '\0': + case '\r': + case '\n': + return errLitCharUnterminated(input::Span{startPos, m_inputPos}); + case '\'': { + const auto span = input::Span{startPos, m_inputPos}; + if (tooBig) { + return errLitCharTooBig(span); + } + if (invalidEscapeSequence) { + return errLitCharInvalidEscape(span); + } + return litCharToken(c, span); + } + default: + tooBig = true; + break; + } + } +} + auto LexerImpl::nextWordToken(const char startingChar) -> Token { const auto startPos = m_inputPos; std::string result(1, startingChar); diff --git a/src/lex/token.cpp b/src/lex/token.cpp index 743579ca..46f1a347 100644 --- a/src/lex/token.cpp +++ b/src/lex/token.cpp @@ -5,6 +5,7 @@ #include "lex/token_payload_id.hpp" #include "lex/token_payload_keyword.hpp" #include "lex/token_payload_lit_bool.hpp" +#include "lex/token_payload_lit_char.hpp" #include "lex/token_payload_lit_float.hpp" #include "lex/token_payload_lit_int.hpp" #include "lex/token_payload_lit_string.hpp" @@ -102,6 +103,10 @@ auto litStrToken(std::string val, const input::Span span) -> Token { return Token{TokenKind::LitString, std::make_unique(std::move(val)), span}; } +auto litCharToken(char val, const input::Span span) -> Token { + return Token{TokenKind::LitChar, std::make_unique(val), span}; +} + auto keywordToken(Keyword keyword, const input::Span span) -> Token { return Token{TokenKind::Keyword, std::make_unique(keyword), span}; } diff --git a/src/lex/token_cat.cpp b/src/lex/token_cat.cpp index 22d320da..7b012821 100644 --- a/src/lex/token_cat.cpp +++ b/src/lex/token_cat.cpp @@ -75,6 +75,7 @@ auto lookupCat(const TokenKind kind) -> TokenCat { case TokenKind::LitFloat: case TokenKind::LitBool: case TokenKind::LitString: + case TokenKind::LitChar: return TokenCat::Literal; case TokenKind::Keyword: return TokenCat::Keyword; diff --git a/src/lex/token_kind.cpp b/src/lex/token_kind.cpp index c5b3e6af..ee73719a 100644 --- a/src/lex/token_kind.cpp +++ b/src/lex/token_kind.cpp @@ -121,6 +121,9 @@ auto operator<<(std::ostream& out, const TokenKind& rhs) -> std::ostream& { case TokenKind::LitString: out << "string"; break; + case TokenKind::LitChar: + out << "char"; + break; case TokenKind::Keyword: out << "keyword"; break; diff --git a/src/lex/token_payload_lit_char.cpp b/src/lex/token_payload_lit_char.cpp new file mode 100644 index 00000000..5806c4ee --- /dev/null +++ b/src/lex/token_payload_lit_char.cpp @@ -0,0 +1,17 @@ +#include "lex/token_payload_lit_char.hpp" +#include "input/char_escape.hpp" + +namespace lex { + +auto LitCharTokenPayload::print(std::ostream& out) const -> std::ostream& { + out << '\''; + const auto escaped = input::escape(m_val); + if (escaped) { + out << '\\' << *escaped; + } else { + out << m_val; + } + return out << '\''; +} + +} // namespace lex diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 5cb77135..e43325e6 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -64,6 +64,7 @@ add_executable(tests lex/litint_test.cpp lex/litbool_test.cpp lex/litstr_test.cpp + lex/litchar_test.cpp lex/operators_test.cpp lex/seperators_test.cpp lex/utilities_test.cpp diff --git a/tests/lex/litchar_test.cpp b/tests/lex/litchar_test.cpp new file mode 100644 index 00000000..afdd9188 --- /dev/null +++ b/tests/lex/litchar_test.cpp @@ -0,0 +1,65 @@ +#include "catch2/catch.hpp" +#include "helpers.hpp" +#include "lex/error.hpp" + +namespace lex { + +TEST_CASE("Lexing character literals", "[lex]") { + + SECTION("Single values") { + CHECK_TOKENS("'a'", litCharToken('a')); + CHECK_TOKENS("'\"'", litCharToken('"')); + CHECK_TOKENS("'0'", litCharToken('0')); + CHECK_TOKENS("';'", litCharToken(';')); + CHECK_TOKENS("'.'", litCharToken('.')); + CHECK_TOKENS("'}'", litCharToken('}')); + CHECK_TOKENS("'?'", litCharToken('?')); + CHECK_TOKENS("'?'", litCharToken('?')); + CHECK_TOKENS("' '", litCharToken(' ')); + } + + SECTION("Escaping") { + CHECK_TOKENS("'\\''", litCharToken('\'')); + CHECK_TOKENS("'\\\"'", litCharToken('"')); + CHECK_TOKENS("'\\''", litCharToken('\'')); + CHECK_TOKENS("'\\\\'", litCharToken('\\')); + CHECK_TOKENS("'\\a'", litCharToken('\a')); + CHECK_TOKENS("'\\b'", litCharToken('\b')); + CHECK_TOKENS("'\\f'", litCharToken('\f')); + CHECK_TOKENS("'\\n'", litCharToken('\n')); + CHECK_TOKENS("'\\r'", litCharToken('\r')); + CHECK_TOKENS("'\\t'", litCharToken('\t')); + CHECK_TOKENS("'\\v'", litCharToken('\v')); + } + + SECTION("Sequences") { + CHECK_TOKENS( + "'h' 'e' 'l''l''o'", + litCharToken('h'), + litCharToken('e'), + litCharToken('l'), + litCharToken('l'), + litCharToken('o')); + CHECK_TOKENS("'/','.'", litCharToken('/'), basicToken(TokenKind::SepComma), litCharToken('.')); + } + + SECTION("Errors") { + CHECK_TOKENS("''", erLitCharEmpty()); + CHECK_TOKENS("'aa'", errLitCharTooBig()); + CHECK_TOKENS("'€'", errLitCharTooBig()); + CHECK_TOKENS("'你'", errLitCharTooBig()); + CHECK_TOKENS("'\\g'", errLitCharInvalidEscape()); + CHECK_TOKENS("'\\1'", errLitCharInvalidEscape()); + CHECK_TOKENS("'", errLitCharUnterminated()); + CHECK_TOKENS("'aaaa", errLitCharUnterminated()); + CHECK_TOKENS("'a\n", errLitCharUnterminated()); + } + + SECTION("Spans") { + CHECK_SPANS(" 'a' ", input::Span{1, 3}); + CHECK_SPANS(" 'asdfsd' ", input::Span{1, 8}); + CHECK_SPANS(" 'asdf ", input::Span{1, 6}); + } +} + +} // namespace lex From 6cef638d29c0e389e4e91904fc589ffeafb7ce05 Mon Sep 17 00:00:00 2001 From: Bastian Blokland Date: Wed, 8 Jan 2020 18:43:10 +0200 Subject: [PATCH 2/5] Add character literal to program --- apps/progdiag/get_expr_color.hpp | 2 + include/prog/expr/node_lit_char.hpp | 35 ++++++++++++++++ include/prog/expr/node_visitor.hpp | 2 + include/prog/expr/nodes.hpp | 1 + include/prog/program.hpp | 2 + include/prog/sym/type_kind.hpp | 1 + src/CMakeLists.txt | 1 + src/backend/internal/gen_expr.cpp | 4 ++ src/backend/internal/gen_expr.hpp | 1 + src/backend/internal/gen_type_eq.cpp | 1 + .../internal/check_union_exhaustiveness.cpp | 2 + .../internal/check_union_exhaustiveness.hpp | 1 + src/frontend/internal/get_expr.cpp | 5 +++ src/frontend/internal/typeinfer_expr.cpp | 15 ++++--- src/frontend/internal/utilities.cpp | 1 + src/prog/expr/node_lit_char.cpp | 41 +++++++++++++++++++ src/prog/program.cpp | 5 ++- src/prog/sym/type_kind.cpp | 3 ++ tests/frontend/get_lit_expr_test.cpp | 7 ++++ tests/frontend/typeinfer_user_funcs_test.cpp | 6 +++ 20 files changed, 127 insertions(+), 9 deletions(-) create mode 100644 include/prog/expr/node_lit_char.hpp create mode 100644 src/prog/expr/node_lit_char.cpp diff --git a/apps/progdiag/get_expr_color.hpp b/apps/progdiag/get_expr_color.hpp index 02c55ea0..a697d4e7 100644 --- a/apps/progdiag/get_expr_color.hpp +++ b/apps/progdiag/get_expr_color.hpp @@ -62,6 +62,8 @@ class GetExprColor final : public prog::expr::NodeVisitor { m_fg = rang::fg::cyan; } + auto visit(const prog::expr::LitCharNode & /*unused*/) -> void override { m_fg = rang::fg::cyan; } + private: rang::fg m_fg{}; }; diff --git a/include/prog/expr/node_lit_char.hpp b/include/prog/expr/node_lit_char.hpp new file mode 100644 index 00000000..c810a9c6 --- /dev/null +++ b/include/prog/expr/node_lit_char.hpp @@ -0,0 +1,35 @@ +#pragma once +#include "prog/expr/node.hpp" +#include "prog/program.hpp" + +namespace prog::expr { + +class LitCharNode final : public Node { + friend auto litCharNode(const Program& program, char val) -> NodePtr; + +public: + LitCharNode() = delete; + + auto operator==(const Node& rhs) const noexcept -> bool override; + auto operator!=(const Node& rhs) const noexcept -> bool override; + + [[nodiscard]] auto operator[](unsigned int i) const -> const Node& override; + [[nodiscard]] auto getChildCount() const -> unsigned int override; + [[nodiscard]] auto getType() const noexcept -> sym::TypeId override; + [[nodiscard]] auto toString() const -> std::string override; + + [[nodiscard]] auto getVal() const noexcept -> char; + + auto accept(NodeVisitor* visitor) const -> void override; + +private: + sym::TypeId m_type; + char m_val; + + LitCharNode(sym::TypeId type, char val); +}; + +// Factories. +auto litCharNode(const Program& program, char val) -> NodePtr; + +} // namespace prog::expr diff --git a/include/prog/expr/node_visitor.hpp b/include/prog/expr/node_visitor.hpp index 3b84b14b..d324bf41 100644 --- a/include/prog/expr/node_visitor.hpp +++ b/include/prog/expr/node_visitor.hpp @@ -18,6 +18,7 @@ class LitFloatNode; class LitFuncNode; class LitIntNode; class LitStringNode; +class LitCharNode; class NodeVisitor { public: @@ -37,6 +38,7 @@ class NodeVisitor { virtual auto visit(const LitFuncNode& n) -> void = 0; virtual auto visit(const LitIntNode& n) -> void = 0; virtual auto visit(const LitStringNode& n) -> void = 0; + virtual auto visit(const LitCharNode& n) -> void = 0; }; } // namespace prog::expr diff --git a/include/prog/expr/nodes.hpp b/include/prog/expr/nodes.hpp index fb122a52..d64670b0 100644 --- a/include/prog/expr/nodes.hpp +++ b/include/prog/expr/nodes.hpp @@ -7,6 +7,7 @@ #include "prog/expr/node_field.hpp" #include "prog/expr/node_group.hpp" #include "prog/expr/node_lit_bool.hpp" +#include "prog/expr/node_lit_char.hpp" #include "prog/expr/node_lit_float.hpp" #include "prog/expr/node_lit_func.hpp" #include "prog/expr/node_lit_int.hpp" diff --git a/include/prog/program.hpp b/include/prog/program.hpp index a685e2ce..ee159524 100644 --- a/include/prog/program.hpp +++ b/include/prog/program.hpp @@ -61,6 +61,7 @@ class Program final { [[nodiscard]] auto getFloat() const noexcept -> sym::TypeId; [[nodiscard]] auto getBool() const noexcept -> sym::TypeId; [[nodiscard]] auto getString() const noexcept -> sym::TypeId; + [[nodiscard]] auto getChar() const noexcept -> sym::TypeId; [[nodiscard]] auto lookupType(const std::string& name) const -> std::optional; [[nodiscard]] auto @@ -125,6 +126,7 @@ class Program final { sym::TypeId m_float; sym::TypeId m_bool; sym::TypeId m_string; + sym::TypeId m_char; }; } // namespace prog diff --git a/include/prog/sym/type_kind.hpp b/include/prog/sym/type_kind.hpp index 599b43a7..b1e227ed 100644 --- a/include/prog/sym/type_kind.hpp +++ b/include/prog/sym/type_kind.hpp @@ -8,6 +8,7 @@ enum class TypeKind { Float, Bool, String, + Char, UserStruct, UserUnion, UserDelegate, diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index eef45f13..f7814673 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -81,6 +81,7 @@ add_library(prog STATIC prog/expr/node_lit_func.cpp prog/expr/node_lit_int.cpp prog/expr/node_lit_string.cpp + prog/expr/node_lit_char.cpp prog/expr/node_switch.cpp prog/expr/node_union_check.cpp prog/expr/node_union_get.cpp diff --git a/src/backend/internal/gen_expr.cpp b/src/backend/internal/gen_expr.cpp index 8b7b733e..2b420890 100644 --- a/src/backend/internal/gen_expr.cpp +++ b/src/backend/internal/gen_expr.cpp @@ -404,6 +404,10 @@ auto GenExpr::visit(const prog::expr::LitStringNode& n) -> void { m_builder->addLoadLitString(n.getVal()); } +auto GenExpr::visit(const prog::expr::LitCharNode& n) -> void { + m_builder->addLoadLitInt(n.getVal()); +} + auto GenExpr::genSubExpr(const prog::expr::Node& n, bool tail) -> void { auto genExpr = GenExpr{m_program, m_builder, tail}; n.accept(&genExpr); diff --git a/src/backend/internal/gen_expr.hpp b/src/backend/internal/gen_expr.hpp index f60012ba..d2cfb45f 100644 --- a/src/backend/internal/gen_expr.hpp +++ b/src/backend/internal/gen_expr.hpp @@ -25,6 +25,7 @@ class GenExpr final : public prog::expr::NodeVisitor { auto visit(const prog::expr::LitFuncNode& n) -> void override; auto visit(const prog::expr::LitIntNode& n) -> void override; auto visit(const prog::expr::LitStringNode& n) -> void override; + auto visit(const prog::expr::LitCharNode& n) -> void override; private: const prog::Program& m_program; diff --git a/src/backend/internal/gen_type_eq.cpp b/src/backend/internal/gen_type_eq.cpp index 36e45285..75a8e3ee 100644 --- a/src/backend/internal/gen_type_eq.cpp +++ b/src/backend/internal/gen_type_eq.cpp @@ -7,6 +7,7 @@ static auto genTypeEquality(Builder* builder, const prog::sym::TypeDecl& typeDec switch (typeDecl.getKind()) { case prog::sym::TypeKind::Bool: case prog::sym::TypeKind::Int: + case prog::sym::TypeKind::Char: builder->addCheckEqInt(); break; case prog::sym::TypeKind::Float: diff --git a/src/frontend/internal/check_union_exhaustiveness.cpp b/src/frontend/internal/check_union_exhaustiveness.cpp index 3dea8f07..25357fb1 100644 --- a/src/frontend/internal/check_union_exhaustiveness.cpp +++ b/src/frontend/internal/check_union_exhaustiveness.cpp @@ -67,4 +67,6 @@ auto CheckUnionExhaustiveness::visit(const prog::expr::LitIntNode & /*unused*/) auto CheckUnionExhaustiveness::visit(const prog::expr::LitStringNode & /*unused*/) -> void {} +auto CheckUnionExhaustiveness::visit(const prog::expr::LitCharNode & /*unused*/) -> void {} + } // namespace frontend::internal diff --git a/src/frontend/internal/check_union_exhaustiveness.hpp b/src/frontend/internal/check_union_exhaustiveness.hpp index 447675e7..c1df28b1 100644 --- a/src/frontend/internal/check_union_exhaustiveness.hpp +++ b/src/frontend/internal/check_union_exhaustiveness.hpp @@ -27,6 +27,7 @@ class CheckUnionExhaustiveness final : public prog::expr::NodeVisitor { auto visit(const prog::expr::LitFuncNode& n) -> void override; auto visit(const prog::expr::LitIntNode& n) -> void override; auto visit(const prog::expr::LitStringNode& n) -> void override; + auto visit(const prog::expr::LitCharNode& n) -> void override; private: const Context& m_context; diff --git a/src/frontend/internal/get_expr.cpp b/src/frontend/internal/get_expr.cpp index a1af8d59..ef6e6796 100644 --- a/src/frontend/internal/get_expr.cpp +++ b/src/frontend/internal/get_expr.cpp @@ -5,6 +5,7 @@ #include "internal/get_identifier.hpp" #include "internal/utilities.hpp" #include "lex/token_payload_lit_bool.hpp" +#include "lex/token_payload_lit_char.hpp" #include "lex/token_payload_lit_float.hpp" #include "lex/token_payload_lit_int.hpp" #include "lex/token_payload_lit_string.hpp" @@ -419,6 +420,10 @@ auto GetExpr::visit(const parse::LitExprNode& n) -> void { m_expr = prog::expr::litStringNode( *m_context->getProg(), n.getVal().getPayload()->getValue()); return; + case lex::TokenKind::LitChar: + m_expr = prog::expr::litCharNode( + *m_context->getProg(), n.getVal().getPayload()->getValue()); + return; default: std::stringstream oss; oss << n.getVal().getKind(); diff --git a/src/frontend/internal/typeinfer_expr.cpp b/src/frontend/internal/typeinfer_expr.cpp index 89fc17ca..5086aea4 100644 --- a/src/frontend/internal/typeinfer_expr.cpp +++ b/src/frontend/internal/typeinfer_expr.cpp @@ -227,22 +227,21 @@ auto TypeInferExpr::visit(const parse::IsExprNode& n) -> void { auto TypeInferExpr::visit(const parse::LitExprNode& n) -> void { switch (n.getVal().getKind()) { - case lex::TokenKind::LitInt: { + case lex::TokenKind::LitInt: m_type = m_context->getProg()->getInt(); break; - } - case lex::TokenKind::LitFloat: { + case lex::TokenKind::LitFloat: m_type = m_context->getProg()->getFloat(); break; - } - case lex::TokenKind::LitBool: { + case lex::TokenKind::LitBool: m_type = m_context->getProg()->getBool(); break; - } - case lex::TokenKind::LitString: { + case lex::TokenKind::LitString: m_type = m_context->getProg()->getString(); break; - } + case lex::TokenKind::LitChar: + m_type = m_context->getProg()->getChar(); + break; default: break; } diff --git a/src/frontend/internal/utilities.cpp b/src/frontend/internal/utilities.cpp index 4be94faf..76a33303 100644 --- a/src/frontend/internal/utilities.cpp +++ b/src/frontend/internal/utilities.cpp @@ -148,6 +148,7 @@ auto isReservedTypeName(const std::string& name) -> bool { "float", "bool", "string", + "char", "delegate", }; return reservedTypes.find(name) != reservedTypes.end(); diff --git a/src/prog/expr/node_lit_char.cpp b/src/prog/expr/node_lit_char.cpp new file mode 100644 index 00000000..37a9184c --- /dev/null +++ b/src/prog/expr/node_lit_char.cpp @@ -0,0 +1,41 @@ +#include "prog/expr/node_lit_char.hpp" +#include +#include + +namespace prog::expr { + +LitCharNode::LitCharNode(sym::TypeId type, char val) : m_type{type}, m_val{val} {} + +auto LitCharNode::operator==(const Node& rhs) const noexcept -> bool { + const auto r = dynamic_cast(&rhs); + return r != nullptr && m_val == r->m_val; +} + +auto LitCharNode::operator!=(const Node& rhs) const noexcept -> bool { + return !LitCharNode::operator==(rhs); +} + +auto LitCharNode::operator[](unsigned int /*unused*/) const -> const Node& { + throw std::out_of_range{"No child at given index"}; +} + +auto LitCharNode::getChildCount() const -> unsigned int { return 0; } + +auto LitCharNode::getType() const noexcept -> sym::TypeId { return m_type; } + +auto LitCharNode::toString() const -> std::string { + auto oss = std::ostringstream{}; + oss << '\'' << m_val << '\''; + return oss.str(); +} + +auto LitCharNode::getVal() const noexcept -> char { return m_val; } + +auto LitCharNode::accept(NodeVisitor* visitor) const -> void { visitor->visit(*this); } + +// Factories. +auto litCharNode(const Program& program, char val) -> NodePtr { + return std::unique_ptr{new LitCharNode{program.getChar(), val}}; +} + +} // namespace prog::expr diff --git a/src/prog/program.cpp b/src/prog/program.cpp index e4b7ce5e..2a5ce716 100644 --- a/src/prog/program.cpp +++ b/src/prog/program.cpp @@ -22,7 +22,8 @@ Program::Program() : m_int{m_typeDecls.registerType(sym::TypeKind::Int, "int")}, m_float{m_typeDecls.registerType(sym::TypeKind::Float, "float")}, m_bool{m_typeDecls.registerType(sym::TypeKind::Bool, "bool")}, - m_string{m_typeDecls.registerType(sym::TypeKind::String, "string")} { + m_string{m_typeDecls.registerType(sym::TypeKind::String, "string")}, + m_char{m_typeDecls.registerType(sym::TypeKind::Char, "char")} { using fk = typename prog::sym::FuncKind; using ak = typename prog::sym::ActionKind; @@ -168,6 +169,8 @@ auto Program::getBool() const noexcept -> sym::TypeId { return m_bool; } auto Program::getString() const noexcept -> sym::TypeId { return m_string; } +auto Program::getChar() const noexcept -> sym::TypeId { return m_char; } + auto Program::lookupType(const std::string& name) const -> std::optional { return m_typeDecls.lookup(name); } diff --git a/src/prog/sym/type_kind.cpp b/src/prog/sym/type_kind.cpp index 49ae251f..1147bf6f 100644 --- a/src/prog/sym/type_kind.cpp +++ b/src/prog/sym/type_kind.cpp @@ -16,6 +16,9 @@ auto operator<<(std::ostream& out, const TypeKind& rhs) -> std::ostream& { case TypeKind::String: out << "string"; break; + case TypeKind::Char: + out << "char"; + break; case TypeKind::UserStruct: out << "struct"; break; diff --git a/tests/frontend/get_lit_expr_test.cpp b/tests/frontend/get_lit_expr_test.cpp index 96406bb8..aa56e671 100644 --- a/tests/frontend/get_lit_expr_test.cpp +++ b/tests/frontend/get_lit_expr_test.cpp @@ -2,6 +2,7 @@ #include "frontend/diag_defs.hpp" #include "helpers.hpp" #include "prog/expr/node_lit_bool.hpp" +#include "prog/expr/node_lit_char.hpp" #include "prog/expr/node_lit_float.hpp" #include "prog/expr/node_lit_int.hpp" #include "prog/expr/node_lit_string.hpp" @@ -38,6 +39,12 @@ TEST_CASE("Analyzing literal expressions", "[frontend]") { GET_FUNC_DEF(output, "f").getExpr() == *prog::expr::litStringNode(output.getProg(), "hello world")); } + + SECTION("Get character literal expression") { + const auto& output = ANALYZE("fun f() -> char \'a\'"); + REQUIRE(output.isSuccess()); + CHECK(GET_FUNC_DEF(output, "f").getExpr() == *prog::expr::litCharNode(output.getProg(), 'a')); + } } } // namespace frontend diff --git a/tests/frontend/typeinfer_user_funcs_test.cpp b/tests/frontend/typeinfer_user_funcs_test.cpp index 177e61de..d1fce2a3 100644 --- a/tests/frontend/typeinfer_user_funcs_test.cpp +++ b/tests/frontend/typeinfer_user_funcs_test.cpp @@ -30,6 +30,12 @@ TEST_CASE("Infer return type of user functions", "[frontend]") { CHECK(GET_FUNC_DECL(output, "f").getOutput() == GET_TYPE_ID(output, "string")); } + SECTION("Character literal") { + const auto& output = ANALYZE("fun f() 'a'"); + REQUIRE(output.isSuccess()); + CHECK(GET_FUNC_DECL(output, "f").getOutput() == GET_TYPE_ID(output, "char")); + } + SECTION("Function literal") { const auto& output = ANALYZE("fun f1(int i) i " "fun f() f1"); From 3f729de3b2d838e175e380bc0bf7093ddc132c55 Mon Sep 17 00:00:00 2001 From: Bastian Blokland Date: Wed, 8 Jan 2020 20:14:05 +0200 Subject: [PATCH 3/5] Add char operations --- include/backend/builder.hpp | 2 + include/prog/sym/func_kind.hpp | 2 + include/vm/opcode.hpp | 2 + src/CMakeLists.txt | 1 + src/backend/builder.cpp | 4 ++ src/backend/dasm/disassembler.cpp | 2 + src/backend/internal/gen_expr.cpp | 6 +++ src/prog/program.cpp | 18 ++++++++ src/vm/executor.cpp | 57 +++++++----------------- src/vm/internal/string_utilities.cpp | 66 ++++++++++++++++++++++++++++ src/vm/internal/string_utilities.hpp | 18 ++++++++ src/vm/opcode.cpp | 6 +++ tests/backend/helpers.hpp | 16 +++++++ tests/backend/literals_test.cpp | 4 ++ tests/vm/conv_test.cpp | 52 ++++++++++++++++++++++ 15 files changed, 216 insertions(+), 40 deletions(-) create mode 100644 src/vm/internal/string_utilities.cpp create mode 100644 src/vm/internal/string_utilities.hpp diff --git a/include/backend/builder.hpp b/include/backend/builder.hpp index 4e54dd15..ce0d1051 100644 --- a/include/backend/builder.hpp +++ b/include/backend/builder.hpp @@ -65,6 +65,8 @@ class Builder final { auto addConvIntString() -> void; auto addConvFloatString() -> void; auto addConvBoolString() -> void; + auto addConvCharString() -> void; + auto addConvIntChar() -> void; auto addMakeStruct(uint8_t fieldCount) -> void; auto addLoadStructField(uint8_t fieldIndex) -> void; diff --git a/include/prog/sym/func_kind.hpp b/include/prog/sym/func_kind.hpp index 0c864946..0effa5ed 100644 --- a/include/prog/sym/func_kind.hpp +++ b/include/prog/sym/func_kind.hpp @@ -50,6 +50,8 @@ enum class FuncKind { ConvIntString, ConvFloatString, ConvBoolString, + ConvCharString, + ConvIntChar, DefInt, DefFloat, diff --git a/include/vm/opcode.hpp b/include/vm/opcode.hpp index 69102298..717e0f36 100644 --- a/include/vm/opcode.hpp +++ b/include/vm/opcode.hpp @@ -53,6 +53,8 @@ enum class OpCode : uint8_t { ConvFloatInt = 92, ConvIntString = 93, ConvFloatString = 94, + ConvCharString = 95, + ConvIntChar = 96, MakeStruct = 100, LoadStructField = 101, diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index f7814673..07733643 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -197,6 +197,7 @@ add_library(vm STATIC vm/internal/call_stack.cpp vm/internal/const_stack.cpp vm/internal/exec_scope.cpp + vm/internal/string_utilities.cpp vm/io/memory_interface.cpp vm/io/terminal_interface.cpp vm/assembly.cpp diff --git a/src/backend/builder.cpp b/src/backend/builder.cpp index 8456c8a4..bf793b2a 100644 --- a/src/backend/builder.cpp +++ b/src/backend/builder.cpp @@ -144,6 +144,10 @@ auto Builder::addConvBoolString() -> void { label(endLabel); } +auto Builder::addConvCharString() -> void { writeOpCode(vm::OpCode::ConvCharString); } + +auto Builder::addConvIntChar() -> void { writeOpCode(vm::OpCode::ConvIntChar); } + auto Builder::addMakeStruct(uint8_t fieldCount) -> void { writeOpCode(vm::OpCode::MakeStruct); writeUInt8(fieldCount); diff --git a/src/backend/dasm/disassembler.cpp b/src/backend/dasm/disassembler.cpp index efb8b642..dfad398e 100644 --- a/src/backend/dasm/disassembler.cpp +++ b/src/backend/dasm/disassembler.cpp @@ -62,6 +62,8 @@ auto disassembleInstructions(const vm::Assembly& assembly) -> std::vector void { case prog::sym::FuncKind::ConvBoolString: m_builder->addConvBoolString(); break; + case prog::sym::FuncKind::ConvCharString: + m_builder->addConvCharString(); + break; + case prog::sym::FuncKind::ConvIntChar: + m_builder->addConvIntChar(); + break; case prog::sym::FuncKind::DefInt: m_builder->addLoadLitInt(0); diff --git a/src/prog/program.cpp b/src/prog/program.cpp index 2a5ce716..7809728e 100644 --- a/src/prog/program.cpp +++ b/src/prog/program.cpp @@ -111,6 +111,20 @@ Program::Program() : m_funcDecls.registerFunc( *this, fk::CheckNEqString, getFuncName(op::BangEq), sym::TypeSet{m_string, m_string}, m_bool); + // Register build-in binary char operators. + m_funcDecls.registerFunc( + *this, fk::CheckEqInt, getFuncName(op::EqEq), sym::TypeSet{m_char, m_char}, m_bool); + m_funcDecls.registerFunc( + *this, fk::CheckNEqInt, getFuncName(op::BangEq), sym::TypeSet{m_char, m_char}, m_bool); + m_funcDecls.registerFunc( + *this, fk::CheckLeInt, getFuncName(op::Le), sym::TypeSet{m_char, m_char}, m_bool); + m_funcDecls.registerFunc( + *this, fk::CheckLeEqInt, getFuncName(op::LeEq), sym::TypeSet{m_char, m_char}, m_bool); + m_funcDecls.registerFunc( + *this, fk::CheckGtInt, getFuncName(op::Gt), sym::TypeSet{m_char, m_char}, m_bool); + m_funcDecls.registerFunc( + *this, fk::CheckGtEqInt, getFuncName(op::GtEq), sym::TypeSet{m_char, m_char}, m_bool); + // Register build-in default constructors. m_funcDecls.registerFunc(*this, fk::DefInt, "int", sym::TypeSet{}, m_int); m_funcDecls.registerFunc(*this, fk::DefFloat, "float", sym::TypeSet{}, m_float); @@ -118,19 +132,23 @@ Program::Program() : m_funcDecls.registerFunc(*this, fk::DefString, "string", sym::TypeSet{}, m_string); // Register build-in implicit conversions. + m_funcDecls.registerFunc(*this, fk::NoOp, "int", sym::TypeSet{m_char}, m_int); m_funcDecls.registerFunc(*this, fk::ConvIntFloat, "float", sym::TypeSet{m_int}, m_float); m_funcDecls.registerFunc(*this, fk::ConvIntString, "string", sym::TypeSet{m_int}, m_string); m_funcDecls.registerFunc(*this, fk::ConvFloatString, "string", sym::TypeSet{m_float}, m_string); m_funcDecls.registerFunc(*this, fk::ConvBoolString, "string", sym::TypeSet{m_bool}, m_string); + m_funcDecls.registerFunc(*this, fk::ConvCharString, "string", sym::TypeSet{m_char}, m_string); // Register build-in identity conversions (turn into no-ops). m_funcDecls.registerFunc(*this, fk::NoOp, "int", sym::TypeSet{m_int}, m_int); m_funcDecls.registerFunc(*this, fk::NoOp, "float", sym::TypeSet{m_float}, m_float); m_funcDecls.registerFunc(*this, fk::NoOp, "bool", sym::TypeSet{m_bool}, m_bool); m_funcDecls.registerFunc(*this, fk::NoOp, "string", sym::TypeSet{m_string}, m_string); + m_funcDecls.registerFunc(*this, fk::NoOp, "char", sym::TypeSet{m_char}, m_char); // Register build-in explicit conversions. m_funcDecls.registerFunc(*this, fk::ConvFloatInt, "toInt", sym::TypeSet{m_float}, m_int); + m_funcDecls.registerFunc(*this, fk::ConvIntChar, "toChar", sym::TypeSet{m_int}, m_char); m_funcDecls.registerFunc(*this, fk::LengthString, "length", sym::TypeSet{m_string}, m_int); // Register build-in actions. diff --git a/src/vm/executor.cpp b/src/vm/executor.cpp index bd1a18fd..ade204f2 100644 --- a/src/vm/executor.cpp +++ b/src/vm/executor.cpp @@ -4,12 +4,11 @@ #include "internal/const_stack.hpp" #include "internal/eval_stack.hpp" #include "internal/ref_string.hpp" +#include "internal/string_utilities.hpp" #include "vm/exceptions/div_by_zero.hpp" #include "vm/exceptions/eval_stack_not_empty.hpp" #include "vm/exceptions/invalid_assembly.hpp" #include "vm/opcode.hpp" -#include -#include #include namespace vm { @@ -83,14 +82,9 @@ static auto execute(const Assembly& assembly, io::Interface* interface, uint32_t evalStack.push(internal::floatValue(a + b)); } break; case OpCode::AddString: { - auto b = getStringRef(evalStack.pop()); - auto a = getStringRef(evalStack.pop()); - - // Make a new string big enough to fit both and copy both there. - auto result = allocator.allocStr(a->getSize() + b->getSize()); - std::memcpy(result.second, a->getDataPtr(), a->getSize()); - std::memcpy(result.second + a->getSize(), b->getDataPtr(), b->getSize()); - evalStack.push(internal::refValue(result.first)); + auto b = evalStack.pop(); + auto a = evalStack.pop(); + evalStack.push(internal::concatString(&allocator, a, b)); } break; case OpCode::SubInt: { auto b = evalStack.pop().getInt(); @@ -174,8 +168,7 @@ static auto execute(const Assembly& assembly, io::Interface* interface, uint32_t evalStack.push(internal::intValue(a ^ b)); // NOLINT: Signed bitwise operand } break; case OpCode::LengthString: { - auto* strRef = getStringRef(evalStack.pop()); - evalStack.push(internal::intValue(strRef->getSize())); + evalStack.push(internal::intValue(internal::getStringLength(evalStack.pop()))); } break; case OpCode::CheckEqInt: { @@ -189,11 +182,9 @@ static auto execute(const Assembly& assembly, io::Interface* interface, uint32_t evalStack.push(internal::intValue(a == b ? 1 : 0)); } break; case OpCode::CheckEqString: { - auto b = getStringRef(evalStack.pop()); - auto a = getStringRef(evalStack.pop()); - auto eq = (a->getSize() == b->getSize()) && - std::memcmp(a->getDataPtr(), b->getDataPtr(), a->getSize()) == 0; - evalStack.push(internal::intValue(eq ? 1 : 0)); + auto b = evalStack.pop(); + auto a = evalStack.pop(); + evalStack.push(internal::intValue(internal::checkStringEq(a, b) ? 1 : 0)); } break; case OpCode::CheckEqIp: { auto b = evalStack.pop().getUInt(); @@ -240,31 +231,17 @@ static auto execute(const Assembly& assembly, io::Interface* interface, uint32_t evalStack.push(internal::intValue(static_cast(val))); } break; case OpCode::ConvIntString: { - static const auto maxCharSize = 11; - - const auto val = evalStack.pop().getInt(); - const auto strRefAlloc = allocator.allocStr(maxCharSize); - const auto convRes = std::to_chars(strRefAlloc.second, strRefAlloc.second + maxCharSize, val); - if (convRes.ec != std::errc()) { - throw std::logic_error{"Failed to convert integer to string"}; - } - strRefAlloc.first->updateSize(convRes.ptr - strRefAlloc.second); - evalStack.push(internal::refValue(strRefAlloc.first)); + evalStack.push(internal::toString(&allocator, evalStack.pop().getInt())); } break; case OpCode::ConvFloatString: { - const auto val = evalStack.pop().getFloat(); - - // NOLINTNEXTLINE: C-style var-arg func, needed because clang is missing std::to_chars(float). - const auto charSize = std::snprintf(nullptr, 0, "%.6g", val) + 1; // +1: null-terminator. - const auto strRefAlloc = allocator.allocStr(charSize); - - // NOLINTNEXTLINE: C-style var-arg func, needed because clang is missing std::to_chars(float). - std::snprintf(strRefAlloc.second, charSize, "%.6g", val); - - // Remove the null-terminator from the size. Our strings don't use a null-terminator but - // snprintf always outputs one. - strRefAlloc.first->updateSize(charSize - 1); - evalStack.push(internal::refValue(strRefAlloc.first)); + evalStack.push(internal::toString(&allocator, evalStack.pop().getFloat())); + } break; + case OpCode::ConvCharString: { + evalStack.push(internal::toString(&allocator, static_cast(evalStack.pop().getInt()))); + } break; + case OpCode::ConvIntChar: { + auto c = static_cast(evalStack.pop().getInt()); + evalStack.push(internal::intValue(c)); } break; case OpCode::MakeStruct: { diff --git a/src/vm/internal/string_utilities.cpp b/src/vm/internal/string_utilities.cpp new file mode 100644 index 00000000..d79261f1 --- /dev/null +++ b/src/vm/internal/string_utilities.cpp @@ -0,0 +1,66 @@ +#include "internal/string_utilities.hpp" +#include +#include + +namespace vm::internal { + +auto toString(Allocator* allocator, int32_t val) -> Value { + static const auto maxCharSize = 11; + + const auto strRefAlloc = allocator->allocStr(maxCharSize); + const auto convRes = std::to_chars(strRefAlloc.second, strRefAlloc.second + maxCharSize, val); + if (convRes.ec != std::errc()) { + throw std::logic_error{"Failed to convert integer to string"}; + } + strRefAlloc.first->updateSize(convRes.ptr - strRefAlloc.second); + return internal::refValue(strRefAlloc.first); +} + +auto toString(Allocator* allocator, float val) -> Value { + // NOLINTNEXTLINE: C-style var-arg func, needed because clang is missing std::to_chars(float). + const auto charSize = std::snprintf(nullptr, 0, "%.6g", val) + 1; // +1: null-terminator. + const auto strRefAlloc = allocator->allocStr(charSize); + + // NOLINTNEXTLINE: C-style var-arg func, needed because clang is missing std::to_chars(float). + std::snprintf(strRefAlloc.second, charSize, "%.6g", val); + + // Remove the null-terminator from the size. Our strings don't use a null-terminator but + // snprintf always outputs one. + strRefAlloc.first->updateSize(charSize - 1); + + return internal::refValue(strRefAlloc.first); +} + +auto toString(Allocator* allocator, char val) -> Value { + const auto strRefAlloc = allocator->allocStr(1); + *strRefAlloc.second = val; + return internal::refValue(strRefAlloc.first); +} + +auto getStringLength(Value val) -> int32_t { + auto* str = getStringRef(val); + return str->getSize(); +} + +auto checkStringEq(Value a, Value b) -> bool { + auto* strA = getStringRef(a); + auto* strB = getStringRef(b); + + return (strA->getSize() == strB->getSize()) && + std::memcmp(strA->getDataPtr(), strB->getDataPtr(), strA->getSize()) == 0; +} + +auto concatString(Allocator* allocator, Value a, Value b) -> Value { + auto* strA = getStringRef(a); + auto* strB = getStringRef(b); + + // Make a new string big enough to fit both and copy both there. + auto result = allocator->allocStr(strA->getSize() + strB->getSize()); + + std::memcpy(result.second, strA->getDataPtr(), strA->getSize()); + std::memcpy(result.second + strA->getSize(), strB->getDataPtr(), strB->getSize()); + + return internal::refValue(result.first); +} + +} // namespace vm::internal diff --git a/src/vm/internal/string_utilities.hpp b/src/vm/internal/string_utilities.hpp new file mode 100644 index 00000000..e769992e --- /dev/null +++ b/src/vm/internal/string_utilities.hpp @@ -0,0 +1,18 @@ +#pragma once +#include "internal/allocator.hpp" + +namespace vm::internal { + +[[nodiscard]] auto toString(Allocator* allocator, int32_t val) -> Value; + +[[nodiscard]] auto toString(Allocator* allocator, float val) -> Value; + +[[nodiscard]] auto toString(Allocator* allocator, char val) -> Value; + +[[nodiscard]] auto getStringLength(Value val) -> int32_t; + +[[nodiscard]] auto checkStringEq(Value a, Value b) -> bool; + +[[nodiscard]] auto concatString(Allocator* allocator, Value a, Value b) -> Value; + +} // namespace vm::internal diff --git a/src/vm/opcode.cpp b/src/vm/opcode.cpp index cc23ec5c..64c35e34 100644 --- a/src/vm/opcode.cpp +++ b/src/vm/opcode.cpp @@ -137,6 +137,12 @@ auto operator<<(std::ostream& out, const OpCode& rhs) -> std::ostream& { case OpCode::ConvFloatString: out << "conv-float-string"; break; + case OpCode::ConvCharString: + out << "conv-char-string"; + break; + case OpCode::ConvIntChar: + out << "conv-int-char"; + break; case OpCode::MakeStruct: out << "make-struct"; diff --git a/tests/backend/helpers.hpp b/tests/backend/helpers.hpp index 41bc9d3f..50e4678d 100644 --- a/tests/backend/helpers.hpp +++ b/tests/backend/helpers.hpp @@ -59,6 +59,19 @@ inline auto buildAssemblyExprString(const std::function return builder.close(); } +inline auto buildAssemblyExprChar(const std::function& build) + -> vm::Assembly { + auto builder = backend::Builder{}; + builder.label("print"); + build(&builder); + builder.addConvCharString(); + builder.addPrintString(); + builder.addRet(); + builder.addFail(); + builder.addEntryPoint("print"); + return builder.close(); +} + inline auto buildAssembly(const std::function& build) -> vm::Assembly { auto builder = backend::Builder{}; build(&builder); @@ -87,6 +100,9 @@ inline auto buildAssembly(const std::function& build) - #define CHECK_EXPR_STRING(INPUT, BUILD_EXPECTED_ASM) \ CHECK_ASM("print(" + std::string(INPUT) + ")", buildAssemblyExprString(BUILD_EXPECTED_ASM)) +#define CHECK_EXPR_CHAR(INPUT, BUILD_EXPECTED_ASM) \ + CHECK_ASM("print(" + std::string(INPUT) + ")", buildAssemblyExprChar(BUILD_EXPECTED_ASM)) + #define CHECK_PROG(INPUT, BUILD_EXPECTED_ASM) CHECK_ASM(INPUT, buildAssembly(BUILD_EXPECTED_ASM)) } // namespace backend diff --git a/tests/backend/literals_test.cpp b/tests/backend/literals_test.cpp index 7333e6aa..2e53f3f0 100644 --- a/tests/backend/literals_test.cpp +++ b/tests/backend/literals_test.cpp @@ -27,6 +27,10 @@ TEST_CASE("Generate assembly for literals", "[backend]") { CHECK_EXPR_STRING( "\"\"", [](backend::Builder* builder) -> void { builder->addLoadLitString(""); }); } + + SECTION("Char literals") { + CHECK_EXPR_CHAR("'a'", [](backend::Builder* builder) -> void { builder->addLoadLitInt('a'); }); + } } } // namespace backend diff --git a/tests/vm/conv_test.cpp b/tests/vm/conv_test.cpp index ed5764d8..439dfcf8 100644 --- a/tests/vm/conv_test.cpp +++ b/tests/vm/conv_test.cpp @@ -168,6 +168,58 @@ TEST_CASE("Execute conversions", "[vm]") { }, "-1e+10"); // Rounding error. } + + SECTION("Char to String") { + CHECK_EXPR( + [](backend::Builder* builder) -> void { + builder->addLoadLitInt(48); + builder->addConvCharString(); + builder->addPrintString(); + }, + "0"); + CHECK_EXPR( + [](backend::Builder* builder) -> void { + builder->addLoadLitInt(10); + builder->addConvCharString(); + builder->addPrintString(); + }, + "\n"); + CHECK_EXPR( + [](backend::Builder* builder) -> void { + builder->addLoadLitInt(38); + builder->addConvCharString(); + builder->addPrintString(); + }, + "&"); + CHECK_EXPR( + [](backend::Builder* builder) -> void { + builder->addLoadLitInt(32); + builder->addConvCharString(); + builder->addPrintString(); + }, + " "); + } + + SECTION("Int to Char") { + CHECK_EXPR( + [](backend::Builder* builder) -> void { + builder->addLoadLitInt(0); + builder->addConvIntChar(); + + builder->addConvIntString(); + builder->addPrintString(); + }, + "0"); + CHECK_EXPR( + [](backend::Builder* builder) -> void { + builder->addLoadLitInt(42); + builder->addConvIntChar(); + + builder->addConvIntString(); + builder->addPrintString(); + }, + "42"); + } } } // namespace vm From df32a098fbabdb88b6ab5762d5313060ef8bd522 Mon Sep 17 00:00:00 2001 From: Bastian Blokland Date: Wed, 8 Jan 2020 20:51:59 +0200 Subject: [PATCH 4/5] Make char literals unsigned --- include/lex/token.hpp | 2 +- include/lex/token_payload_lit_char.hpp | 4 +-- include/prog/expr/node_lit_char.hpp | 10 +++---- src/lex/lexer.cpp | 2 +- src/lex/token.cpp | 2 +- src/prog/expr/node_lit_char.cpp | 6 ++--- src/vm/executor.cpp | 5 ++-- src/vm/internal/string_utilities.cpp | 2 +- src/vm/internal/string_utilities.hpp | 2 +- tests/vm/conv_test.cpp | 36 ++++++++++++++++++++++++++ 10 files changed, 54 insertions(+), 17 deletions(-) diff --git a/include/lex/token.hpp b/include/lex/token.hpp index 46abd5b6..74157405 100644 --- a/include/lex/token.hpp +++ b/include/lex/token.hpp @@ -67,7 +67,7 @@ auto litBoolToken(bool val, input::Span span = input::Span{0}) -> Token; auto litStrToken(std::string val, input::Span span = input::Span{0}) -> Token; -auto litCharToken(char val, input::Span span = input::Span{0}) -> Token; +auto litCharToken(uint8_t val, input::Span span = input::Span{0}) -> Token; auto keywordToken(Keyword keyword, input::Span span = input::Span{0}) -> Token; diff --git a/include/lex/token_payload_lit_char.hpp b/include/lex/token_payload_lit_char.hpp index ef547b90..1f7b786e 100644 --- a/include/lex/token_payload_lit_char.hpp +++ b/include/lex/token_payload_lit_char.hpp @@ -6,7 +6,7 @@ namespace lex { class LitCharTokenPayload final : public TokenPayload { public: LitCharTokenPayload() = delete; - explicit LitCharTokenPayload(const char val) : m_val{val} {} + explicit LitCharTokenPayload(const uint8_t val) : m_val{val} {} auto operator==(const TokenPayload& rhs) const noexcept -> bool override { const auto castedRhs = dynamic_cast(&rhs); @@ -24,7 +24,7 @@ class LitCharTokenPayload final : public TokenPayload { [[nodiscard]] auto getValue() const noexcept { return m_val; } private: - const char m_val; + const uint8_t m_val; auto print(std::ostream& out) const -> std::ostream& override; }; diff --git a/include/prog/expr/node_lit_char.hpp b/include/prog/expr/node_lit_char.hpp index c810a9c6..e4077a08 100644 --- a/include/prog/expr/node_lit_char.hpp +++ b/include/prog/expr/node_lit_char.hpp @@ -5,7 +5,7 @@ namespace prog::expr { class LitCharNode final : public Node { - friend auto litCharNode(const Program& program, char val) -> NodePtr; + friend auto litCharNode(const Program& program, uint8_t val) -> NodePtr; public: LitCharNode() = delete; @@ -18,18 +18,18 @@ class LitCharNode final : public Node { [[nodiscard]] auto getType() const noexcept -> sym::TypeId override; [[nodiscard]] auto toString() const -> std::string override; - [[nodiscard]] auto getVal() const noexcept -> char; + [[nodiscard]] auto getVal() const noexcept -> uint8_t; auto accept(NodeVisitor* visitor) const -> void override; private: sym::TypeId m_type; - char m_val; + uint8_t m_val; - LitCharNode(sym::TypeId type, char val); + LitCharNode(sym::TypeId type, uint8_t val); }; // Factories. -auto litCharNode(const Program& program, char val) -> NodePtr; +auto litCharNode(const Program& program, uint8_t val) -> NodePtr; } // namespace prog::expr diff --git a/src/lex/lexer.cpp b/src/lex/lexer.cpp index 4e32ac01..3f27af3f 100644 --- a/src/lex/lexer.cpp +++ b/src/lex/lexer.cpp @@ -440,7 +440,7 @@ auto LexerImpl::nextLitChar() -> Token { auto tooBig = false; auto invalidEscapeSequence = false; - char c = consumeChar(); + uint8_t c = consumeChar(); if (c == '\'') { return erLitCharEmpty(input::Span{startPos, m_inputPos}); } diff --git a/src/lex/token.cpp b/src/lex/token.cpp index 46f1a347..8933a938 100644 --- a/src/lex/token.cpp +++ b/src/lex/token.cpp @@ -103,7 +103,7 @@ auto litStrToken(std::string val, const input::Span span) -> Token { return Token{TokenKind::LitString, std::make_unique(std::move(val)), span}; } -auto litCharToken(char val, const input::Span span) -> Token { +auto litCharToken(uint8_t val, const input::Span span) -> Token { return Token{TokenKind::LitChar, std::make_unique(val), span}; } diff --git a/src/prog/expr/node_lit_char.cpp b/src/prog/expr/node_lit_char.cpp index 37a9184c..ccb95140 100644 --- a/src/prog/expr/node_lit_char.cpp +++ b/src/prog/expr/node_lit_char.cpp @@ -4,7 +4,7 @@ namespace prog::expr { -LitCharNode::LitCharNode(sym::TypeId type, char val) : m_type{type}, m_val{val} {} +LitCharNode::LitCharNode(sym::TypeId type, uint8_t val) : m_type{type}, m_val{val} {} auto LitCharNode::operator==(const Node& rhs) const noexcept -> bool { const auto r = dynamic_cast(&rhs); @@ -29,12 +29,12 @@ auto LitCharNode::toString() const -> std::string { return oss.str(); } -auto LitCharNode::getVal() const noexcept -> char { return m_val; } +auto LitCharNode::getVal() const noexcept -> uint8_t { return m_val; } auto LitCharNode::accept(NodeVisitor* visitor) const -> void { visitor->visit(*this); } // Factories. -auto litCharNode(const Program& program, char val) -> NodePtr { +auto litCharNode(const Program& program, uint8_t val) -> NodePtr { return std::unique_ptr{new LitCharNode{program.getChar(), val}}; } diff --git a/src/vm/executor.cpp b/src/vm/executor.cpp index ade204f2..3d08adc5 100644 --- a/src/vm/executor.cpp +++ b/src/vm/executor.cpp @@ -237,10 +237,11 @@ static auto execute(const Assembly& assembly, io::Interface* interface, uint32_t evalStack.push(internal::toString(&allocator, evalStack.pop().getFloat())); } break; case OpCode::ConvCharString: { - evalStack.push(internal::toString(&allocator, static_cast(evalStack.pop().getInt()))); + evalStack.push( + internal::toString(&allocator, static_cast(evalStack.pop().getInt()))); } break; case OpCode::ConvIntChar: { - auto c = static_cast(evalStack.pop().getInt()); + auto c = static_cast(evalStack.pop().getInt()); evalStack.push(internal::intValue(c)); } break; diff --git a/src/vm/internal/string_utilities.cpp b/src/vm/internal/string_utilities.cpp index d79261f1..347b5ad7 100644 --- a/src/vm/internal/string_utilities.cpp +++ b/src/vm/internal/string_utilities.cpp @@ -31,7 +31,7 @@ auto toString(Allocator* allocator, float val) -> Value { return internal::refValue(strRefAlloc.first); } -auto toString(Allocator* allocator, char val) -> Value { +auto toString(Allocator* allocator, uint8_t val) -> Value { const auto strRefAlloc = allocator->allocStr(1); *strRefAlloc.second = val; return internal::refValue(strRefAlloc.first); diff --git a/src/vm/internal/string_utilities.hpp b/src/vm/internal/string_utilities.hpp index e769992e..f3b365b6 100644 --- a/src/vm/internal/string_utilities.hpp +++ b/src/vm/internal/string_utilities.hpp @@ -7,7 +7,7 @@ namespace vm::internal { [[nodiscard]] auto toString(Allocator* allocator, float val) -> Value; -[[nodiscard]] auto toString(Allocator* allocator, char val) -> Value; +[[nodiscard]] auto toString(Allocator* allocator, uint8_t val) -> Value; [[nodiscard]] auto getStringLength(Value val) -> int32_t; diff --git a/tests/vm/conv_test.cpp b/tests/vm/conv_test.cpp index 439dfcf8..5d8fde40 100644 --- a/tests/vm/conv_test.cpp +++ b/tests/vm/conv_test.cpp @@ -219,6 +219,42 @@ TEST_CASE("Execute conversions", "[vm]") { builder->addPrintString(); }, "42"); + CHECK_EXPR( + [](backend::Builder* builder) -> void { + builder->addLoadLitInt(255); + builder->addConvIntChar(); + + builder->addConvIntString(); + builder->addPrintString(); + }, + "255"); + CHECK_EXPR( + [](backend::Builder* builder) -> void { + builder->addLoadLitInt(256); + builder->addConvIntChar(); + + builder->addConvIntString(); + builder->addPrintString(); + }, + "0"); + CHECK_EXPR( + [](backend::Builder* builder) -> void { + builder->addLoadLitInt(-1); + builder->addConvIntChar(); + + builder->addConvIntString(); + builder->addPrintString(); + }, + "255"); + CHECK_EXPR( + [](backend::Builder* builder) -> void { + builder->addLoadLitInt(1337); + builder->addConvIntChar(); + + builder->addConvIntString(); + builder->addPrintString(); + }, + "57"); } } From 25b2afe330906ab96474fa5892bc8bfa585c7bcf Mon Sep 17 00:00:00 2001 From: Bastian Blokland Date: Wed, 8 Jan 2020 21:43:00 +0200 Subject: [PATCH 5/5] Add string indexing --- include/backend/builder.hpp | 1 + include/prog/sym/func_kind.hpp | 1 + include/vm/opcode.hpp | 1 + src/backend/builder.cpp | 2 + src/backend/dasm/disassembler.cpp | 1 + src/backend/internal/gen_expr.cpp | 3 ++ src/input/char_escape.cpp | 2 + src/prog/program.cpp | 4 ++ src/vm/executor.cpp | 5 +++ src/vm/internal/string_utilities.cpp | 16 ++++++-- src/vm/internal/string_utilities.hpp | 2 + src/vm/opcode.cpp | 3 ++ tests/backend/call_expr_test.cpp | 48 ++++++++++++++++++++++ tests/backend/helpers.hpp | 2 +- tests/frontend/get_index_expr_test.cpp | 8 +++- tests/lex/litchar_test.cpp | 1 + tests/vm/string_op_test.cpp | 57 ++++++++++++++++++++++++++ 17 files changed, 150 insertions(+), 7 deletions(-) diff --git a/include/backend/builder.hpp b/include/backend/builder.hpp index ce0d1051..685c6390 100644 --- a/include/backend/builder.hpp +++ b/include/backend/builder.hpp @@ -49,6 +49,7 @@ class Builder final { auto addOrInt() -> void; auto addXorInt() -> void; auto addLengthString() -> void; + auto addIndexString() -> void; auto addCheckEqInt() -> void; auto addCheckEqFloat() -> void; diff --git a/include/prog/sym/func_kind.hpp b/include/prog/sym/func_kind.hpp index 0effa5ed..a9f0bb9f 100644 --- a/include/prog/sym/func_kind.hpp +++ b/include/prog/sym/func_kind.hpp @@ -42,6 +42,7 @@ enum class FuncKind { AddString, LengthString, + IndexString, CheckEqString, CheckNEqString, diff --git a/include/vm/opcode.hpp b/include/vm/opcode.hpp index 717e0f36..661b5bc4 100644 --- a/include/vm/opcode.hpp +++ b/include/vm/opcode.hpp @@ -38,6 +38,7 @@ enum class OpCode : uint8_t { OrInt = 66, XorInt = 67, LengthString = 68, + IndexString = 69, CheckEqInt = 80, CheckEqFloat = 81, diff --git a/src/backend/builder.cpp b/src/backend/builder.cpp index bf793b2a..7143608a 100644 --- a/src/backend/builder.cpp +++ b/src/backend/builder.cpp @@ -103,6 +103,8 @@ auto Builder::addXorInt() -> void { writeOpCode(vm::OpCode::XorInt); } auto Builder::addLengthString() -> void { writeOpCode(vm::OpCode::LengthString); } +auto Builder::addIndexString() -> void { writeOpCode(vm::OpCode::IndexString); } + auto Builder::addCheckEqInt() -> void { writeOpCode(vm::OpCode::CheckEqInt); } auto Builder::addCheckEqFloat() -> void { writeOpCode(vm::OpCode::CheckEqFloat); } diff --git a/src/backend/dasm/disassembler.cpp b/src/backend/dasm/disassembler.cpp index dfad398e..f84c13dd 100644 --- a/src/backend/dasm/disassembler.cpp +++ b/src/backend/dasm/disassembler.cpp @@ -49,6 +49,7 @@ auto disassembleInstructions(const vm::Assembly& assembly) -> std::vector void { case prog::sym::FuncKind::LengthString: m_builder->addLengthString(); break; + case prog::sym::FuncKind::IndexString: + m_builder->addIndexString(); + break; case prog::sym::FuncKind::CheckEqString: m_builder->addCheckEqString(); break; diff --git a/src/input/char_escape.cpp b/src/input/char_escape.cpp index 64d92588..bad03bd2 100644 --- a/src/input/char_escape.cpp +++ b/src/input/char_escape.cpp @@ -8,6 +8,7 @@ auto escape(const char c) -> std::optional { {'"', '"'}, {'\'', '\''}, {'\\', '\\'}, + {'\0', '0'}, {'\a', 'a'}, {'\b', 'b'}, {'\f', 'f'}, @@ -46,6 +47,7 @@ auto unescape(const char c) -> std::optional { {'"', '"'}, {'\'', '\''}, {'\\', '\\'}, + {'0', '\0'}, {'a', '\a'}, {'b', '\b'}, {'f', '\f'}, diff --git a/src/prog/program.cpp b/src/prog/program.cpp index 7809728e..942b26ff 100644 --- a/src/prog/program.cpp +++ b/src/prog/program.cpp @@ -149,7 +149,11 @@ Program::Program() : // Register build-in explicit conversions. m_funcDecls.registerFunc(*this, fk::ConvFloatInt, "toInt", sym::TypeSet{m_float}, m_int); m_funcDecls.registerFunc(*this, fk::ConvIntChar, "toChar", sym::TypeSet{m_int}, m_char); + + // Register build-in functions. m_funcDecls.registerFunc(*this, fk::LengthString, "length", sym::TypeSet{m_string}, m_int); + m_funcDecls.registerFunc( + *this, fk::IndexString, getFuncName(op::SquareSquare), sym::TypeSet{m_string, m_int}, m_char); // Register build-in actions. m_actionDecls.registerAction(*this, ak::PrintString, "print", sym::TypeSet{m_string}); diff --git a/src/vm/executor.cpp b/src/vm/executor.cpp index 3d08adc5..ef05cd9b 100644 --- a/src/vm/executor.cpp +++ b/src/vm/executor.cpp @@ -170,6 +170,11 @@ static auto execute(const Assembly& assembly, io::Interface* interface, uint32_t case OpCode::LengthString: { evalStack.push(internal::intValue(internal::getStringLength(evalStack.pop()))); } break; + case OpCode::IndexString: { + auto index = evalStack.pop().getInt(); + auto str = evalStack.pop(); + evalStack.push(internal::intValue(internal::indexString(str, index))); + } break; case OpCode::CheckEqInt: { auto b = evalStack.pop().getInt(); diff --git a/src/vm/internal/string_utilities.cpp b/src/vm/internal/string_utilities.cpp index 347b5ad7..8d5e5c35 100644 --- a/src/vm/internal/string_utilities.cpp +++ b/src/vm/internal/string_utilities.cpp @@ -13,7 +13,7 @@ auto toString(Allocator* allocator, int32_t val) -> Value { throw std::logic_error{"Failed to convert integer to string"}; } strRefAlloc.first->updateSize(convRes.ptr - strRefAlloc.second); - return internal::refValue(strRefAlloc.first); + return refValue(strRefAlloc.first); } auto toString(Allocator* allocator, float val) -> Value { @@ -28,13 +28,13 @@ auto toString(Allocator* allocator, float val) -> Value { // snprintf always outputs one. strRefAlloc.first->updateSize(charSize - 1); - return internal::refValue(strRefAlloc.first); + return refValue(strRefAlloc.first); } auto toString(Allocator* allocator, uint8_t val) -> Value { const auto strRefAlloc = allocator->allocStr(1); *strRefAlloc.second = val; - return internal::refValue(strRefAlloc.first); + return refValue(strRefAlloc.first); } auto getStringLength(Value val) -> int32_t { @@ -50,6 +50,14 @@ auto checkStringEq(Value a, Value b) -> bool { std::memcmp(strA->getDataPtr(), strB->getDataPtr(), strA->getSize()) == 0; } +auto indexString(Value target, int32_t idx) -> char { + auto* strTgt = getStringRef(target); + if (idx < 0 || static_cast(idx) >= strTgt->getSize()) { + return 0; + } + return *(strTgt->getDataPtr() + idx); +} + auto concatString(Allocator* allocator, Value a, Value b) -> Value { auto* strA = getStringRef(a); auto* strB = getStringRef(b); @@ -60,7 +68,7 @@ auto concatString(Allocator* allocator, Value a, Value b) -> Value { std::memcpy(result.second, strA->getDataPtr(), strA->getSize()); std::memcpy(result.second + strA->getSize(), strB->getDataPtr(), strB->getSize()); - return internal::refValue(result.first); + return refValue(result.first); } } // namespace vm::internal diff --git a/src/vm/internal/string_utilities.hpp b/src/vm/internal/string_utilities.hpp index f3b365b6..bf3053b0 100644 --- a/src/vm/internal/string_utilities.hpp +++ b/src/vm/internal/string_utilities.hpp @@ -13,6 +13,8 @@ namespace vm::internal { [[nodiscard]] auto checkStringEq(Value a, Value b) -> bool; +[[nodiscard]] auto indexString(Value target, int32_t idx) -> char; + [[nodiscard]] auto concatString(Allocator* allocator, Value a, Value b) -> Value; } // namespace vm::internal diff --git a/src/vm/opcode.cpp b/src/vm/opcode.cpp index 64c35e34..0bacf7c6 100644 --- a/src/vm/opcode.cpp +++ b/src/vm/opcode.cpp @@ -96,6 +96,9 @@ auto operator<<(std::ostream& out, const OpCode& rhs) -> std::ostream& { case OpCode::LengthString: out << "length-string"; break; + case OpCode::IndexString: + out << "index-string"; + break; case OpCode::CheckEqInt: out << "check-eq-int"; diff --git a/tests/backend/call_expr_test.cpp b/tests/backend/call_expr_test.cpp index a0d8a7ec..a83b405a 100644 --- a/tests/backend/call_expr_test.cpp +++ b/tests/backend/call_expr_test.cpp @@ -190,6 +190,42 @@ TEST_CASE("Generate assembly for call expressions", "[backend]") { }); } + SECTION("Char checks") { + CHECK_EXPR_BOOL("'1' == '3'", [](backend::Builder* builder) -> void { + builder->addLoadLitInt('1'); + builder->addLoadLitInt('3'); + builder->addCheckEqInt(); + }); + CHECK_EXPR_BOOL("'1' != '3'", [](backend::Builder* builder) -> void { + builder->addLoadLitInt('1'); + builder->addLoadLitInt('3'); + builder->addCheckEqInt(); + builder->addLogicInvInt(); + }); + CHECK_EXPR_BOOL("'1' < '3'", [](backend::Builder* builder) -> void { + builder->addLoadLitInt('1'); + builder->addLoadLitInt('3'); + builder->addCheckLeInt(); + }); + CHECK_EXPR_BOOL("'1' <= '3'", [](backend::Builder* builder) -> void { + builder->addLoadLitInt('1'); + builder->addLoadLitInt('3'); + builder->addCheckGtInt(); + builder->addLogicInvInt(); + }); + CHECK_EXPR_BOOL("'1' > '3'", [](backend::Builder* builder) -> void { + builder->addLoadLitInt('1'); + builder->addLoadLitInt('3'); + builder->addCheckGtInt(); + }); + CHECK_EXPR_BOOL("'1' >= '3'", [](backend::Builder* builder) -> void { + builder->addLoadLitInt('1'); + builder->addLoadLitInt('3'); + builder->addCheckLeInt(); + builder->addLogicInvInt(); + }); + } + SECTION("Bool operations") { CHECK_EXPR_BOOL("!false", [](backend::Builder* builder) -> void { builder->addLoadLitInt(0); @@ -237,6 +273,12 @@ TEST_CASE("Generate assembly for call expressions", "[backend]") { builder->addLoadLitString("hello world"); builder->addLengthString(); }); + + CHECK_EXPR_CHAR("\"hello world\"[6]", [](backend::Builder* builder) -> void { + builder->addLoadLitString("hello world"); + builder->addLoadLitInt(6); + builder->addIndexString(); + }); } SECTION("String checks") { @@ -274,6 +316,10 @@ TEST_CASE("Generate assembly for call expressions", "[backend]") { builder->addLoadLitInt(1); builder->addConvBoolString(); }); + CHECK_EXPR_STRING("string('a')", [](backend::Builder* builder) -> void { + builder->addLoadLitInt('a'); + builder->addConvCharString(); + }); } SECTION("Default constructors") { @@ -324,6 +370,8 @@ TEST_CASE("Generate assembly for call expressions", "[backend]") { CHECK_EXPR_STRING("string(\"hello world\")", [](backend::Builder* builder) -> void { builder->addLoadLitString("hello world"); }); + CHECK_EXPR_CHAR( + "char('a')", [](backend::Builder* builder) -> void { builder->addLoadLitInt('a'); }); } } diff --git a/tests/backend/helpers.hpp b/tests/backend/helpers.hpp index 50e4678d..1de0ecf0 100644 --- a/tests/backend/helpers.hpp +++ b/tests/backend/helpers.hpp @@ -101,7 +101,7 @@ inline auto buildAssembly(const std::function& build) - CHECK_ASM("print(" + std::string(INPUT) + ")", buildAssemblyExprString(BUILD_EXPECTED_ASM)) #define CHECK_EXPR_CHAR(INPUT, BUILD_EXPECTED_ASM) \ - CHECK_ASM("print(" + std::string(INPUT) + ")", buildAssemblyExprChar(BUILD_EXPECTED_ASM)) + CHECK_ASM("print(string(" + std::string(INPUT) + "))", buildAssemblyExprChar(BUILD_EXPECTED_ASM)) #define CHECK_PROG(INPUT, BUILD_EXPECTED_ASM) CHECK_ASM(INPUT, buildAssembly(BUILD_EXPECTED_ASM)) diff --git a/tests/frontend/get_index_expr_test.cpp b/tests/frontend/get_index_expr_test.cpp index 2896521c..8be067e7 100644 --- a/tests/frontend/get_index_expr_test.cpp +++ b/tests/frontend/get_index_expr_test.cpp @@ -63,8 +63,12 @@ TEST_CASE("Analyzing index expressions", "[frontend]") { SECTION("Diagnostics") { CHECK_DIAG( - "fun f(int i) -> int i[0]", - errUndeclaredIndexOperator(src, {"int", "int"}, input::Span{20, 23})); + "fun f(int i) -> int i[.0]", + errUndeclaredIndexOperator(src, {"int", "float"}, input::Span{20, 24})); + CHECK_DIAG( + "struct test = int i " + "fun f(test t) -> int t[0]", + errUndeclaredIndexOperator(src, {"test", "int"}, input::Span{41, 44})); } } diff --git a/tests/lex/litchar_test.cpp b/tests/lex/litchar_test.cpp index afdd9188..b21227e1 100644 --- a/tests/lex/litchar_test.cpp +++ b/tests/lex/litchar_test.cpp @@ -23,6 +23,7 @@ TEST_CASE("Lexing character literals", "[lex]") { CHECK_TOKENS("'\\\"'", litCharToken('"')); CHECK_TOKENS("'\\''", litCharToken('\'')); CHECK_TOKENS("'\\\\'", litCharToken('\\')); + CHECK_TOKENS("'\\0'", litCharToken(0)); CHECK_TOKENS("'\\a'", litCharToken('\a')); CHECK_TOKENS("'\\b'", litCharToken('\b')); CHECK_TOKENS("'\\f'", litCharToken('\f')); diff --git a/tests/vm/string_op_test.cpp b/tests/vm/string_op_test.cpp index 10c8c4c2..51f49ef3 100644 --- a/tests/vm/string_op_test.cpp +++ b/tests/vm/string_op_test.cpp @@ -48,6 +48,63 @@ TEST_CASE("Execute string operations", "[vm]") { }, "11"); } + + SECTION("Indexing") { + CHECK_EXPR( + [](backend::Builder* builder) -> void { + builder->addLoadLitString("hello"); + builder->addLoadLitInt(0); + builder->addIndexString(); + builder->addConvCharString(); + builder->addPrintString(); + }, + "h"); + CHECK_EXPR( + [](backend::Builder* builder) -> void { + builder->addLoadLitString("hello"); + builder->addLoadLitInt(4); + builder->addIndexString(); + builder->addConvCharString(); + builder->addPrintString(); + }, + "o"); + CHECK_EXPR( + [](backend::Builder* builder) -> void { + builder->addLoadLitString("hello"); + builder->addLoadLitInt(-1); + builder->addIndexString(); + builder->addConvIntString(); // NOTE: Using int to string conversion, is '\0' char. + builder->addPrintString(); + }, + "0"); + CHECK_EXPR( + [](backend::Builder* builder) -> void { + builder->addLoadLitString("hello"); + builder->addLoadLitInt(5); + builder->addIndexString(); + builder->addConvIntString(); // NOTE: Using int to string conversion, is '\0' char. + builder->addPrintString(); + }, + "0"); + CHECK_EXPR( + [](backend::Builder* builder) -> void { + builder->addLoadLitString(""); + builder->addLoadLitInt(0); + builder->addIndexString(); + builder->addConvIntString(); // NOTE: Using int to string conversion, is '\0' char. + builder->addPrintString(); + }, + "0"); + CHECK_EXPR( + [](backend::Builder* builder) -> void { + builder->addLoadLitString("."); + builder->addLoadLitInt(0); + builder->addIndexString(); + builder->addConvCharString(); + builder->addPrintString(); + }, + "."); + } } } // namespace vm