diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 819b447..57ebd56 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -3,9 +3,11 @@ enable_testing() add_executable( ${PROJECT_NAME}_tests main_test.cpp - unit_tests.cpp + lexer_big_programs_tests.cpp test_functions.cpp test_suites/ProjectIntegrationTestSuite.cpp + test_suites/LexerUnitTestSuite.cpp + lexer_tests.cpp ) target_link_libraries( diff --git a/tests/lexer_big_programs_tests.cpp b/tests/lexer_big_programs_tests.cpp new file mode 100644 index 0000000..04bc390 --- /dev/null +++ b/tests/lexer_big_programs_tests.cpp @@ -0,0 +1,952 @@ +#include +#include +#include +#include +#include "lib/lexer/Lexer.hpp" +#include "test_suites/LexerUnitTestSuite.hpp" + +TEST(LexerUnitTestSuite, ExampleFundamentals) { + const std::string src = R"OVUM(fun ExampleFundamentals(): Void { + val i: int = 42 + val f: float = 3.14 + val b: byte = 255 + val c: char = 'A' + val bl: bool = true + val p: pointer = null + sys::Print(i.ToString()) + })OVUM"; + Lexer lexer(src); + auto tokens = lexer.Tokenize(); + auto items = LexerUnitTestSuite::ExtractLexemesAndTypes(tokens); + // clang-format off + std::vector expected_lexemes = { + "fun","ExampleFundamentals","(",")",":","Void","{","\\n", + "val","i",":","int","=","42","\\n", + "val","f",":","float","=","3.14","\\n", + "val","b",":","byte","=","255","\\n", + "val","c",":","char","=","'A'","\\n", + "val","bl",":","bool","=","true","\\n", + "val","p",":","pointer","=","null","\\n", + "sys","::", "Print","(","i",".","ToString","(",")",")","\\n", + "}" + }; + // clang-format on + std::vector expected_types = { + "KEYWORD", "IDENT", "PUNCT", "PUNCT", "PUNCT", "IDENT", "PUNCT", "NEWLINE", + "KEYWORD", "IDENT", "PUNCT", "IDENT", "OPERATOR", "LITERAL:Int", "NEWLINE", "KEYWORD", + "IDENT", "PUNCT", "IDENT", "OPERATOR", "LITERAL:Float", "NEWLINE", "KEYWORD", "IDENT", + "PUNCT", "IDENT", "OPERATOR", "LITERAL:Int", "NEWLINE", "KEYWORD", "IDENT", "PUNCT", + "IDENT", "OPERATOR", "LITERAL:Char", "NEWLINE", "KEYWORD", "IDENT", "PUNCT", "IDENT", + "OPERATOR", "LITERAL:Bool", "NEWLINE", "KEYWORD", "IDENT", "PUNCT", "IDENT", "OPERATOR", + "KEYWORD", "NEWLINE", "IDENT", "OPERATOR", "IDENT", "PUNCT", "IDENT", "OPERATOR", + "IDENT", "PUNCT", "PUNCT", "PUNCT", "NEWLINE", "PUNCT"}; + LexerUnitTestSuite::AssertLexemesAndTypesEqual(items, expected_lexemes, expected_types); +} +TEST(LexerUnitTestSuite, ExampleReferences) { + const std::string src = R"OVUM(fun ExampleReferences(): Void { +val I: Int = 42 +val F: Float = 3.14 +val B: Byte = 255 +val C: Char = 'A' +val Bl: Bool = true +val P: Pointer = null +val count: Int = 0 +val pi: Float = 3.14 +})OVUM"; + Lexer lexer(src); + auto tokens = lexer.Tokenize(); + auto items = LexerUnitTestSuite::ExtractLexemesAndTypes(tokens); + // clang-format off + std::vector expected_lexemes = { + "fun","ExampleReferences","(",")",":","Void","{","\\n", + "val","I",":","Int","=","42","\\n", + "val","F",":","Float","=","3.14","\\n", + "val","B",":","Byte","=","255","\\n", + "val","C",":","Char","=","'A'","\\n", + "val","Bl",":","Bool","=","true","\\n", + "val","P",":","Pointer","=","null","\\n", + "val","count",":","Int","=","0","\\n", + "val","pi",":","Float","=","3.14","\\n", + "}" + }; + // clang-format on + std::vector expected_types = { + "KEYWORD", "IDENT", "PUNCT", "PUNCT", "PUNCT", "IDENT", "PUNCT", + "NEWLINE", "KEYWORD", "IDENT", "PUNCT", "IDENT", "OPERATOR", "LITERAL:Int", + "NEWLINE", "KEYWORD", "IDENT", "PUNCT", "IDENT", "OPERATOR", "LITERAL:Float", + "NEWLINE", "KEYWORD", "IDENT", "PUNCT", "IDENT", "OPERATOR", "LITERAL:Int", + "NEWLINE", "KEYWORD", "IDENT", "PUNCT", "IDENT", "OPERATOR", "LITERAL:Char", + "NEWLINE", "KEYWORD", "IDENT", "PUNCT", "IDENT", "OPERATOR", "LITERAL:Bool", + "NEWLINE", "KEYWORD", "IDENT", "PUNCT", "IDENT", "OPERATOR", "KEYWORD", + "NEWLINE", "KEYWORD", "IDENT", "PUNCT", "IDENT", "OPERATOR", "LITERAL:Int", + "NEWLINE", "KEYWORD", "IDENT", "PUNCT", "IDENT", "OPERATOR", "LITERAL:Float", + "NEWLINE", "PUNCT"}; + LexerUnitTestSuite::AssertLexemesAndTypesEqual(items, expected_lexemes, expected_types); +} +TEST(LexerUnitTestSuite, ExampleNullable) { + const std::string src = R"OVUM(fun ExampleNullable(): Void { +val optInt: Int? = null +val optStr: String? = "Hello" +val optArr: IntArray? = null +val safeInt: Int? = 42 +})OVUM"; + Lexer lexer(src); + auto tokens = lexer.Tokenize(); + auto items = LexerUnitTestSuite::ExtractLexemesAndTypes(tokens); + // clang-format off + std::vector expected_lexemes = { + "fun", "ExampleNullable", "(", ")", ":", "Void", "{", "\\n", + "val", "optInt", ":", "Int", "?", "=", "null", "\\n", + "val", "optStr", ":", "String", "?", "=", "\"Hello\"", "\\n", + "val", "optArr", ":", "IntArray", "?", "=", "null", "\\n", + "val", "safeInt", ":", "Int", "?", "=", "42", "\\n", + "}"}; + // clang-format on + std::vector expected_types = { + "KEYWORD", "IDENT", "PUNCT", "PUNCT", "PUNCT", "IDENT", "PUNCT", "NEWLINE", "KEYWORD", + "IDENT", "PUNCT", "IDENT", "OPERATOR", "OPERATOR", "KEYWORD", "NEWLINE", "KEYWORD", "IDENT", + "PUNCT", "IDENT", "OPERATOR", "OPERATOR", "LITERAL:String", "NEWLINE", "KEYWORD", "IDENT", "PUNCT", + "IDENT", "OPERATOR", "OPERATOR", "KEYWORD", "NEWLINE", "KEYWORD", "IDENT", "PUNCT", "IDENT", + "OPERATOR", "OPERATOR", "LITERAL:Int", "NEWLINE", "PUNCT"}; + LexerUnitTestSuite::AssertLexemesAndTypesEqual(items, expected_lexemes, expected_types); +} +TEST(LexerUnitTestSuite, ExampleArrays) { + const std::string src = R"OVUM(fun ExampleArrays(): Void { +val intArr: IntArray = IntArray(5) +val strArr: StringArray = StringArray(3) +intArr[0] := 1 +intArr[1] := 2 +val emptyList: IntArray = IntArray(0) +})OVUM"; + Lexer lexer(src); + auto tokens = lexer.Tokenize(); + auto items = LexerUnitTestSuite::ExtractLexemesAndTypes(tokens); + // clang-format off + std::vector expected_lexemes = { + "fun","ExampleArrays","(",")",":","Void","{","\\n", + "val","intArr",":","IntArray","=","IntArray","(","5",")","\\n", + "val","strArr",":","StringArray","=","StringArray","(","3",")","\\n", + "intArr","[","0","]",":=","1","\\n", + "intArr","[","1","]",":=","2","\\n", + "val","emptyList",":","IntArray","=","IntArray","(","0",")","\\n", + "}" + }; + // clang-format on + std::vector expected_types = { + "KEYWORD", "IDENT", "PUNCT", "PUNCT", "PUNCT", "IDENT", "PUNCT", "NEWLINE", + "KEYWORD", "IDENT", "PUNCT", "IDENT", "OPERATOR", "IDENT", "PUNCT", "LITERAL:Int", + "PUNCT", "NEWLINE", "KEYWORD", "IDENT", "PUNCT", "IDENT", "OPERATOR", "IDENT", + "PUNCT", "LITERAL:Int", "PUNCT", "NEWLINE", "IDENT", "PUNCT", "LITERAL:Int", "PUNCT", + "OPERATOR", "LITERAL:Int", "NEWLINE", "IDENT", "PUNCT", "LITERAL:Int", "PUNCT", "OPERATOR", + "LITERAL:Int", "NEWLINE", "KEYWORD", "IDENT", "PUNCT", "IDENT", "OPERATOR", "IDENT", + "PUNCT", "LITERAL:Int", "PUNCT", "NEWLINE", "PUNCT"}; + LexerUnitTestSuite::AssertLexemesAndTypesEqual(items, expected_lexemes, expected_types); +} +TEST(LexerUnitTestSuite, ExampleObjects) { + const std::string src = R"OVUM(interface IShape { fun Area(): float } +class Circle implements IShape { +public val Radius: float +public fun Circle(r: float): Circle { this.Radius = r return this } +public override fun Area(): float { return 3.14 * Radius * Radius } +} +fun ExampleObjects(): Void { +val shape: IShape = Circle(5.0) +val circle: Circle? = Circle(10.0) +})OVUM"; + Lexer lexer(src); + auto tokens = lexer.Tokenize(); + auto items = LexerUnitTestSuite::ExtractLexemesAndTypes(tokens); + // clang-format off + std::vector expected_lexemes = { + "interface", "IShape", "{", "fun", "Area", "(", ")", ":", + "float", "}", "\\n", "class", "Circle", "implements", "IShape", "{", + "\\n", "public", "val", "Radius", ":", "float", "\\n", "public", + "fun", "Circle", "(", "r", ":", "float", ")", ":", + "Circle", "{", "this", ".", "Radius", "=", "r", "return", + "this", "}", "\\n", "public", "override", "fun", "Area", "(", + ")", ":", "float", "{", "return", "3.14", "*", "Radius", + "*", "Radius", "}", "\\n", "}", "\\n", "fun", "ExampleObjects", + "(", ")", ":", "Void", "{", "\\n", "val", "shape", + ":", "IShape", "=", "Circle", "(", "5.0", ")", "\\n", + "val", "circle", ":", "Circle", "?", "=", "Circle", "(", + "10.0", ")", "\\n", "}"}; + // clang-format on + std::vector expected_types = { + "KEYWORD", "IDENT", "PUNCT", "KEYWORD", "IDENT", "PUNCT", "PUNCT", "PUNCT", + "IDENT", "PUNCT", "NEWLINE", "KEYWORD", "IDENT", "KEYWORD", "IDENT", "PUNCT", + "NEWLINE", "KEYWORD", "KEYWORD", "IDENT", "PUNCT", "IDENT", "NEWLINE", "KEYWORD", + "KEYWORD", "IDENT", "PUNCT", "IDENT", "PUNCT", "IDENT", "PUNCT", "PUNCT", + "IDENT", "PUNCT", "IDENT", "OPERATOR", "IDENT", "OPERATOR", "IDENT", "KEYWORD", + "IDENT", "PUNCT", "NEWLINE", "KEYWORD", "KEYWORD", "KEYWORD", "IDENT", "PUNCT", + "PUNCT", "PUNCT", "IDENT", "PUNCT", "KEYWORD", "LITERAL:Float", "OPERATOR", "IDENT", + "OPERATOR", "IDENT", "PUNCT", "NEWLINE", "PUNCT", "NEWLINE", "KEYWORD", "IDENT", + "PUNCT", "PUNCT", "PUNCT", "IDENT", "PUNCT", "NEWLINE", "KEYWORD", "IDENT", + "PUNCT", "IDENT", "OPERATOR", "IDENT", "PUNCT", "LITERAL:Float", "PUNCT", "NEWLINE", + "KEYWORD", "IDENT", "PUNCT", "IDENT", "OPERATOR", "OPERATOR", "IDENT", "PUNCT", + "LITERAL:Float", "PUNCT", "NEWLINE", "PUNCT"}; + LexerUnitTestSuite::AssertLexemesAndTypesEqual(items, expected_lexemes, expected_types); +} +TEST(LexerUnitTestSuite, ExampleAccess) { + const std::string src = R"OVUM(class ExampleClass { +private val immutable: int = 10 +public var mutable: float = 20.5 +} +fun ExampleAccess(): Void { +val obj: ExampleClass = ExampleClass() +obj.mutable = 30.0 +})OVUM"; + Lexer lexer(src); + auto tokens = lexer.Tokenize(); + auto items = LexerUnitTestSuite::ExtractLexemesAndTypes(tokens); + // clang-format off + std::vector expected_lexemes = { + "class","ExampleClass","{","\\n", + "private","val","immutable",":","int","=","10","\\n", + "public","var","mutable",":","float","=","20.5","\\n", + "}","\\n", + "fun","ExampleAccess","(",")",":","Void","{","\\n", + "val","obj",":","ExampleClass","=","ExampleClass","(",")","\\n", + "obj",".","mutable","=","30.0","\\n", + "}" + }; + // clang-format on + std::vector expected_types = { + "KEYWORD", "IDENT", "PUNCT", "NEWLINE", "KEYWORD", "KEYWORD", "IDENT", "PUNCT", + "IDENT", "OPERATOR", "LITERAL:Int", "NEWLINE", "KEYWORD", "KEYWORD", "IDENT", "PUNCT", + "IDENT", "OPERATOR", "LITERAL:Float", "NEWLINE", "PUNCT", "NEWLINE", "KEYWORD", "IDENT", + "PUNCT", "PUNCT", "PUNCT", "IDENT", "PUNCT", "NEWLINE", "KEYWORD", "IDENT", + "PUNCT", "IDENT", "OPERATOR", "IDENT", "PUNCT", "PUNCT", "NEWLINE", "IDENT", + "OPERATOR", "IDENT", "OPERATOR", "LITERAL:Float", "NEWLINE", "PUNCT"}; + LexerUnitTestSuite::AssertLexemesAndTypesEqual(items, expected_lexemes, expected_types); +} +TEST(LexerUnitTestSuite, SimpleIf) { + const std::string src = R"OVUM(fun SimpleIf(x: int): Void { +if (x > 0) { sys::Print("Positive") } +else if (x < 0) { sys::Print("Negative") } +else { sys::Print("Zero") } +})OVUM"; + Lexer lexer(src); + auto tokens = lexer.Tokenize(); + auto items = LexerUnitTestSuite::ExtractLexemesAndTypes(tokens); + // clang-format off + std::vector expected_lexemes = { + "fun","SimpleIf","(","x",":","int",")",":","Void","{","\\n", + "if","(","x",">","0",")","{","sys","::", "Print","(","\"Positive\"",")","}","\\n", + "else","if","(","x","<","0",")","{","sys","::", "Print","(","\"Negative\"",")","}","\\n", + "else","{","sys","::", "Print","(","\"Zero\"",")","}","\\n", + "}" + }; + // clang-format on + std::vector expected_types = { + "KEYWORD", "IDENT", "PUNCT", "IDENT", "PUNCT", "IDENT", "PUNCT", "PUNCT", + "IDENT", "PUNCT", "NEWLINE", "KEYWORD", "PUNCT", "IDENT", "OPERATOR", "LITERAL:Int", + "PUNCT", "PUNCT", "IDENT", "OPERATOR", "IDENT", "PUNCT", "LITERAL:String", "PUNCT", + "PUNCT", "NEWLINE", "KEYWORD", "KEYWORD", "PUNCT", "IDENT", "OPERATOR", "LITERAL:Int", + "PUNCT", "PUNCT", "IDENT", "OPERATOR", "IDENT", "PUNCT", "LITERAL:String", "PUNCT", + "PUNCT", "NEWLINE", "KEYWORD", "PUNCT", "IDENT", "OPERATOR", "IDENT", "PUNCT", + "LITERAL:String", "PUNCT", "PUNCT", "NEWLINE", "PUNCT"}; + LexerUnitTestSuite::AssertLexemesAndTypesEqual(items, expected_lexemes, expected_types); +} +TEST(LexerUnitTestSuite, BooleanConditions) { + const std::string src = R"OVUM(fun BooleanConditions(a: bool, b: bool): Void { +if (a && b) { sys::Print("Both true") } +else if (a || b) { sys::Print("One true") } +else if (!a ^ b) { sys::Print("Exactly one false") } +else { sys::Print("Both false") } +})OVUM"; + Lexer lexer(src); + auto tokens = lexer.Tokenize(); + auto items = LexerUnitTestSuite::ExtractLexemesAndTypes(tokens); + // clang-format off + std::vector expected_lexemes = { + "fun","BooleanConditions","(","a",":","bool",",","b",":","bool",")",":","Void","{","\\n", + "if","(","a","&&","b",")","{","sys","::", "Print","(","\"Both true\"",")","}","\\n", + "else","if","(","a","||","b",")","{","sys","::", "Print","(","\"One true\"",")","}","\\n", + "else","if","(","!","a","^","b",")","{","sys","::", "Print","(","\"Exactly one false\"",")","}","\\n", + "else","{","sys","::", "Print","(","\"Both false\"",")","}","\\n", + "}" + }; + // clang-format on + std::vector expected_types = { + "KEYWORD", "IDENT", "PUNCT", "IDENT", "PUNCT", "IDENT", "PUNCT", "IDENT", + "PUNCT", "IDENT", "PUNCT", "PUNCT", "IDENT", "PUNCT", "NEWLINE", "KEYWORD", + "PUNCT", "IDENT", "OPERATOR", "IDENT", "PUNCT", "PUNCT", "IDENT", "OPERATOR", + "IDENT", "PUNCT", "LITERAL:String", "PUNCT", "PUNCT", "NEWLINE", "KEYWORD", "KEYWORD", + "PUNCT", "IDENT", "OPERATOR", "IDENT", "PUNCT", "PUNCT", "IDENT", "OPERATOR", + "IDENT", "PUNCT", "LITERAL:String", "PUNCT", "PUNCT", "NEWLINE", "KEYWORD", "KEYWORD", + "PUNCT", "OPERATOR", "IDENT", "OPERATOR", "IDENT", "PUNCT", "PUNCT", "IDENT", + "OPERATOR", "IDENT", "PUNCT", "LITERAL:String", "PUNCT", "PUNCT", "NEWLINE", "KEYWORD", + "PUNCT", "IDENT", "OPERATOR", "IDENT", "PUNCT", "LITERAL:String", "PUNCT", "PUNCT", + "NEWLINE", "PUNCT"}; + LexerUnitTestSuite::AssertLexemesAndTypesEqual(items, expected_lexemes, expected_types); +} + +TEST(LexerUnitTestSuite, TypeConditions) { + const std::string src = R"OVUM(fun TypeConditions(obj: Object): Void { +if (obj is String) { val str: String? = obj as String if (str != null) { sys::Print(str.Length().ToString()) } } +else if (obj is Int) { val num: Int = obj as Int sys::Print(num.ToString()) } +else { sys::Print("Unknown type") } +})OVUM"; + Lexer lexer(src); + auto tokens = lexer.Tokenize(); + auto items = LexerUnitTestSuite::ExtractLexemesAndTypes(tokens); + // clang-format off + std::vector expected_lexemes = { + "fun", "TypeConditions", "(", "obj", ":", "Object", ")", ":", "Void", "{", + "\\n", + "if", "(", "obj", "is", "String", ")", "{", "val", "str", ":", "String", "?", "=", "obj", "as", "String", "if", + "(", "str", "!=", "null", ")", "{", "sys", "::", "Print", "(", "str", ".", "Length", "(", ")", ".", "ToString", + "(", ")", ")", "}", "}", + "\\n", + "else", "if", "(", "obj", "is", "Int", ")", "{", "val", "num", ":", "Int", "=", "obj", "as", "Int", "sys", "::", + "Print", "(", "num", ".", "ToString", "(", ")", ")", "}", + "\\n", + "else", "{", "sys", "::", "Print", "(", "\"Unknown type\"", ")", "}", + "\\n", + "}" + }; + // clang-format on + std::vector expected_types = { + "KEYWORD", "IDENT", "PUNCT", "IDENT", "PUNCT", "IDENT", "PUNCT", "PUNCT", "IDENT", + "PUNCT", "NEWLINE", "KEYWORD", "PUNCT", "IDENT", "KEYWORD", "IDENT", "PUNCT", "PUNCT", + "KEYWORD", "IDENT", "PUNCT", "IDENT", "OPERATOR", "OPERATOR", "IDENT", "KEYWORD", "IDENT", + "KEYWORD", "PUNCT", "IDENT", "OPERATOR", "KEYWORD", "PUNCT", "PUNCT", "IDENT", "OPERATOR", + "IDENT", "PUNCT", "IDENT", "OPERATOR", "IDENT", "PUNCT", "PUNCT", "OPERATOR", "IDENT", + "PUNCT", "PUNCT", "PUNCT", "PUNCT", "PUNCT", "NEWLINE", "KEYWORD", "KEYWORD", "PUNCT", + "IDENT", "KEYWORD", "IDENT", "PUNCT", "PUNCT", "KEYWORD", "IDENT", "PUNCT", "IDENT", + "OPERATOR", "IDENT", "KEYWORD", "IDENT", "IDENT", "OPERATOR", "IDENT", "PUNCT", "IDENT", + "OPERATOR", "IDENT", "PUNCT", "PUNCT", "PUNCT", "PUNCT", "NEWLINE", "KEYWORD", "PUNCT", + "IDENT", "OPERATOR", "IDENT", "PUNCT", "LITERAL:String", "PUNCT", "PUNCT", "NEWLINE", "PUNCT"}; + LexerUnitTestSuite::AssertLexemesAndTypesEqual(items, expected_lexemes, expected_types); +} + +TEST(LexerUnitTestSuite, ElvisInIf) { + const std::string src = R"OVUM(fun ElvisInIf(opt: Int?): int { +val value: int = opt ?: 0 +if (value > 10) { return value * 2 } +else { return value } +})OVUM"; + Lexer lexer(src); + auto tokens = lexer.Tokenize(); + auto items = LexerUnitTestSuite::ExtractLexemesAndTypes(tokens); + // clang-format off + std::vector expected_lexemes = { + "fun", "ElvisInIf", "(", "opt", ":", "Int", "?", ")", ":", "int", "{", "\\n", "val", "value", + ":", "int", "=", "opt", "?:", "0", "\\n", "if", "(", "value", ">", "10", ")", "{", + "return", "value", "*", "2", "}", "\\n", "else", "{", "return", "value", "}", "\\n", "}"}; + // clang-format on + std::vector expected_types = { + "KEYWORD", "IDENT", "PUNCT", "IDENT", "PUNCT", "IDENT", "OPERATOR", "PUNCT", "PUNCT", + "IDENT", "PUNCT", "NEWLINE", "KEYWORD", "IDENT", "PUNCT", "IDENT", "OPERATOR", "IDENT", + "OPERATOR", "LITERAL:Int", "NEWLINE", "KEYWORD", "PUNCT", "IDENT", "OPERATOR", "LITERAL:Int", "PUNCT", + "PUNCT", "KEYWORD", "IDENT", "OPERATOR", "LITERAL:Int", "PUNCT", "NEWLINE", "KEYWORD", "PUNCT", + "KEYWORD", "IDENT", "PUNCT", "NEWLINE", "PUNCT"}; + LexerUnitTestSuite::AssertLexemesAndTypesEqual(items, expected_lexemes, expected_types); +} +TEST(LexerUnitTestSuite, WhenLike) { + const std::string src = R"OVUM(fun WhenLike(x: int): String { +if (x == 1) return "One" +else if (x == 2) return "Two" +else if (x > 10) return "Big" +else return "Other" +})OVUM"; + Lexer lexer(src); + auto tokens = lexer.Tokenize(); + auto items = LexerUnitTestSuite::ExtractLexemesAndTypes(tokens); + // clang-format off + std::vector expected_lexemes = { + "fun", "WhenLike", "(", "x", ":", "int", ")", ":", "String", "{", "\\n", "if", + "(", "x", "==", "1", ")", "return", "\"One\"", "\\n", "else", "if", "(", "x", + "==", "2", ")", "return", "\"Two\"", "\\n", "else", "if", "(", "x", ">", "10", + ")", "return", "\"Big\"", "\\n", "else", "return", "\"Other\"", "\\n", "}"}; + // clang-format on + std::vector expected_types = { + "KEYWORD", "IDENT", "PUNCT", "IDENT", "PUNCT", "IDENT", "PUNCT", + "PUNCT", "IDENT", "PUNCT", "NEWLINE", "KEYWORD", "PUNCT", "IDENT", + "OPERATOR", "LITERAL:Int", "PUNCT", "KEYWORD", "LITERAL:String", "NEWLINE", "KEYWORD", + "KEYWORD", "PUNCT", "IDENT", "OPERATOR", "LITERAL:Int", "PUNCT", "KEYWORD", + "LITERAL:String", "NEWLINE", "KEYWORD", "KEYWORD", "PUNCT", "IDENT", "OPERATOR", + "LITERAL:Int", "PUNCT", "KEYWORD", "LITERAL:String", "NEWLINE", "KEYWORD", "KEYWORD", + "LITERAL:String", "NEWLINE", "PUNCT"}; + LexerUnitTestSuite::AssertLexemesAndTypesEqual(items, expected_lexemes, expected_types); +} + +TEST(LexerUnitTestSuite, WhileExample) { + const std::string src = R"OVUM(fun WhileExample(n: int): int { +var counter: int = 0 +while (counter < n) { + counter = counter + 1 + if (counter % 2 == 0) continue + sys::Print(counter.ToString()) +} +return counter +})OVUM"; + Lexer lexer(src); + auto tokens = lexer.Tokenize(); + auto items = LexerUnitTestSuite::ExtractLexemesAndTypes(tokens); + // clang-format off + std::vector expected_lexemes = { + "fun", "WhileExample", "(", "n", ":", "int", ")", ":", "int", "{", + "\\n", + "var", "counter", ":", "int", "=", "0", + "\\n", + "while", "(", "counter", "<", "n", ")", "{", + "\\n", + "counter", "=", "counter", "+", "1", + "\\n", + "if", "(", "counter", "%", "2", "==", "0", ")", "continue", + "\\n", + "sys", "::", "Print", "(", "counter", ".", "ToString", "(", ")", ")", + "\\n", + "}", + "\\n", + "return", "counter", + "\\n", + "}" + }; + // clang-format on + std::vector expected_types = { + "KEYWORD", "IDENT", "PUNCT", "IDENT", "PUNCT", "IDENT", "PUNCT", "PUNCT", + "IDENT", "PUNCT", "NEWLINE", "KEYWORD", "IDENT", "PUNCT", "IDENT", "OPERATOR", + "LITERAL:Int", "NEWLINE", "KEYWORD", "PUNCT", "IDENT", "OPERATOR", "IDENT", "PUNCT", + "PUNCT", "NEWLINE", "IDENT", "OPERATOR", "IDENT", "OPERATOR", "LITERAL:Int", "NEWLINE", + "KEYWORD", "PUNCT", "IDENT", "OPERATOR", "LITERAL:Int", "OPERATOR", "LITERAL:Int", "PUNCT", + "KEYWORD", "NEWLINE", "IDENT", "OPERATOR", "IDENT", "PUNCT", "IDENT", "OPERATOR", + "IDENT", "PUNCT", "PUNCT", "PUNCT", "NEWLINE", "PUNCT", "NEWLINE", "KEYWORD", + "IDENT", "NEWLINE", "PUNCT"}; + LexerUnitTestSuite::AssertLexemesAndTypesEqual(items, expected_lexemes, expected_types); +} + +TEST(LexerUnitTestSuite, ForExample) { + const std::string src = R"OVUM(fun ForExample(arr: IntArray): int { +var sum: int = 0 +for (num in arr) { if (num < 0) break sum = sum + num } +return sum +})OVUM"; + Lexer lexer(src); + auto tokens = lexer.Tokenize(); + auto items = LexerUnitTestSuite::ExtractLexemesAndTypes(tokens); + // clang-format off + std::vector expected_lexemes = { + "fun", "ForExample", "(", "arr", ":", "IntArray", ")", ":", "int", "{", "\\n", + "var", "sum", ":", "int", "=", "0", "\\n", "for", "(", "num", "in", + "arr", ")", "{", "if", "(", "num", "<", "0", ")", "break", "sum", + "=", "sum", "+", "num", "}", "\\n", "return", "sum", "\\n", "}"}; + // clang-format on + std::vector expected_types = { + "KEYWORD", "IDENT", "PUNCT", "IDENT", "PUNCT", "IDENT", "PUNCT", "PUNCT", "IDENT", + "PUNCT", "NEWLINE", "KEYWORD", "IDENT", "PUNCT", "IDENT", "OPERATOR", "LITERAL:Int", "NEWLINE", + "KEYWORD", "PUNCT", "IDENT", "KEYWORD", "IDENT", "PUNCT", "PUNCT", "KEYWORD", "PUNCT", + "IDENT", "OPERATOR", "LITERAL:Int", "PUNCT", "KEYWORD", "IDENT", "OPERATOR", "IDENT", "OPERATOR", + "IDENT", "PUNCT", "NEWLINE", "KEYWORD", "IDENT", "NEWLINE", "PUNCT"}; + LexerUnitTestSuite::AssertLexemesAndTypesEqual(items, expected_lexemes, expected_types); +} + +TEST(LexerUnitTestSuite, NestedLoops) { + const std::string src = R"OVUM(fun NestedLoops(matrix: IntArrayArray): Void { +for (row in matrix) { +var i: int = 0 +while (i < row.Length()) { +if (row[i] == 0) { continue } +sys::Print(row[i].ToString()) +i = i + 1 +if (i > 5) break } +} +})OVUM"; + Lexer lexer(src); + auto tokens = lexer.Tokenize(); + auto items = LexerUnitTestSuite::ExtractLexemesAndTypes(tokens); + // clang-format off + std::vector expected_lexemes = { + "fun", "NestedLoops", "(", "matrix", ":", "IntArrayArray", ")", ":", "Void", "{", + "\\n", + "for", "(", "row", "in", "matrix", ")", "{", + "\\n", + "var", "i", ":", "int", "=", "0", + "\\n", + "while", "(", "i", "<", "row", ".", "Length", "(", ")", ")", "{", + "\\n", + "if", "(", "row", "[", "i", "]", "==", "0", ")", "{", "continue", "}", + "\\n", + "sys", "::", "Print", "(", "row", "[", "i", "]", ".", "ToString", "(", ")", ")", + "\\n", + "i", "=", "i", "+", "1", + "\\n", + "if", "(", "i", ">", "5", ")", "break", + "}", + "\\n", + "}", + "\\n", + "}" + }; + // clang-format on + std::vector expected_types = { + "KEYWORD", "IDENT", "PUNCT", "IDENT", "PUNCT", "IDENT", "PUNCT", "PUNCT", + "IDENT", "PUNCT", "NEWLINE", "KEYWORD", "PUNCT", "IDENT", "KEYWORD", "IDENT", + "PUNCT", "PUNCT", "NEWLINE", "KEYWORD", "IDENT", "PUNCT", "IDENT", "OPERATOR", + "LITERAL:Int", "NEWLINE", "KEYWORD", "PUNCT", "IDENT", "OPERATOR", "IDENT", "OPERATOR", + "IDENT", "PUNCT", "PUNCT", "PUNCT", "PUNCT", "NEWLINE", "KEYWORD", "PUNCT", + "IDENT", "PUNCT", "IDENT", "PUNCT", "OPERATOR", "LITERAL:Int", "PUNCT", "PUNCT", + "KEYWORD", "PUNCT", "NEWLINE", "IDENT", "OPERATOR", "IDENT", "PUNCT", "IDENT", + "PUNCT", "IDENT", "PUNCT", "OPERATOR", "IDENT", "PUNCT", "PUNCT", "PUNCT", + "NEWLINE", "IDENT", "OPERATOR", "IDENT", "OPERATOR", "LITERAL:Int", "NEWLINE", "KEYWORD", + "PUNCT", "IDENT", "OPERATOR", "LITERAL:Int", "PUNCT", "KEYWORD", "PUNCT", "NEWLINE", + "PUNCT", "NEWLINE", "PUNCT"}; + LexerUnitTestSuite::AssertLexemesAndTypesEqual(items, expected_lexemes, expected_types); +} + +TEST(LexerUnitTestSuite, ForWithIndex) { + const std::string src = R"OVUM(fun ForWithIndex(arr: IntArray): Void { +var i: int = 0 +while (i < arr.Length()) { +sys::Print((i.ToString() + ": " + arr[i].ToString())) +i = i + 1 }})OVUM"; + Lexer lexer(src); + auto tokens = lexer.Tokenize(); + auto items = LexerUnitTestSuite::ExtractLexemesAndTypes(tokens); + // clang-format off + std::vector expected_lexemes = { + "fun", "ForWithIndex", "(", "arr", ":", "IntArray", ")", ":", "Void", "{", + "\\n", + "var", "i", ":", "int", "=", "0", + "\\n", + "while", "(", "i", "<", "arr", ".", "Length", "(", ")", ")", "{", + "\\n", + "sys", "::", "Print", "(", "(", "i", ".", "ToString", "(", ")", "+", "\": \"", "+", "arr", "[", "i", "]", ".", + "ToString", "(", ")", ")", ")", + "\\n", + "i", "=", "i", "+", "1", "}", "}" + }; + // clang-format on + std::vector expected_types = { + "KEYWORD", "IDENT", "PUNCT", "IDENT", "PUNCT", "IDENT", "PUNCT", "PUNCT", + "IDENT", "PUNCT", "NEWLINE", "KEYWORD", "IDENT", "PUNCT", "IDENT", "OPERATOR", + "LITERAL:Int", "NEWLINE", "KEYWORD", "PUNCT", "IDENT", "OPERATOR", "IDENT", "OPERATOR", + "IDENT", "PUNCT", "PUNCT", "PUNCT", "PUNCT", "NEWLINE", "IDENT", "OPERATOR", + "IDENT", "PUNCT", "PUNCT", "IDENT", "OPERATOR", "IDENT", "PUNCT", "PUNCT", + "OPERATOR", "LITERAL:String", "OPERATOR", "IDENT", "PUNCT", "IDENT", "PUNCT", "OPERATOR", + "IDENT", "PUNCT", "PUNCT", "PUNCT", "PUNCT", "NEWLINE", "IDENT", "OPERATOR", + "IDENT", "OPERATOR", "LITERAL:Int", "PUNCT", "PUNCT"}; + LexerUnitTestSuite::AssertLexemesAndTypesEqual(items, expected_lexemes, expected_types); +} + +TEST(LexerUnitTestSuite, ListCreation) { + const std::string src = R"OVUM(fun ListCreation(): Void { +val list: IntArray = IntArray(4) +list[0] := 10 +list[1] := 20 +list[2] := 30 +list[3] := 40 +val fromLiteral: IntArray = IntArray(2) +fromLiteral[0] := 1 +fromLiteral[1] := 2 +})OVUM"; + Lexer lexer(src); + auto tokens = lexer.Tokenize(); + auto items = LexerUnitTestSuite::ExtractLexemesAndTypes(tokens); + // clang-format off + std::vector expected_lexemes = { + "fun", "ListCreation", "(", ")", ":", "Void", "{", + "\\n", + "val", "list", ":", "IntArray", "=", "IntArray", "(", "4", ")", + "\\n", + "list", "[", "0", "]", ":=", "10", + "\\n", + "list", "[", "1", "]", ":=", "20", + "\\n", + "list", "[", "2", "]", ":=", "30", + "\\n", + "list", "[", "3", "]", ":=", "40", + "\\n", + "val", "fromLiteral", ":", "IntArray", "=", "IntArray", "(", "2", ")", + "\\n", + "fromLiteral", "[", "0", "]", ":=", "1", + "\\n", + "fromLiteral", "[", "1", "]", ":=", "2", + "\\n", + "}" + }; + // clang-format on + std::vector expected_types = { + "KEYWORD", "IDENT", "PUNCT", "PUNCT", "PUNCT", "IDENT", "PUNCT", + "NEWLINE", "KEYWORD", "IDENT", "PUNCT", "IDENT", "OPERATOR", "IDENT", + "PUNCT", "LITERAL:Int", "PUNCT", "NEWLINE", "IDENT", "PUNCT", "LITERAL:Int", + "PUNCT", "OPERATOR", "LITERAL:Int", "NEWLINE", "IDENT", "PUNCT", "LITERAL:Int", + "PUNCT", "OPERATOR", "LITERAL:Int", "NEWLINE", "IDENT", "PUNCT", "LITERAL:Int", + "PUNCT", "OPERATOR", "LITERAL:Int", "NEWLINE", "IDENT", "PUNCT", "LITERAL:Int", + "PUNCT", "OPERATOR", "LITERAL:Int", "NEWLINE", "KEYWORD", "IDENT", "PUNCT", + "IDENT", "OPERATOR", "IDENT", "PUNCT", "LITERAL:Int", "PUNCT", "NEWLINE", + "IDENT", "PUNCT", "LITERAL:Int", "PUNCT", "OPERATOR", "LITERAL:Int", "NEWLINE", + "IDENT", "PUNCT", "LITERAL:Int", "PUNCT", "OPERATOR", "LITERAL:Int", "NEWLINE", + "PUNCT"}; + LexerUnitTestSuite::AssertLexemesAndTypesEqual(items, expected_lexemes, expected_types); +} + +TEST(LexerUnitTestSuite, ListIteration) { + const std::string src = R"OVUM(fun ListIteration(list: IntArray): int { +var total: int = 0 +var idx: int = 0 +while (idx < list.Length()) { total = total + list[idx] +if (list[idx] % 2 == 0) { list[idx] := list[idx] * 2 } idx = idx + 1 } +return total +})OVUM"; + Lexer lexer(src); + auto tokens = lexer.Tokenize(); + auto items = LexerUnitTestSuite::ExtractLexemesAndTypes(tokens); + // clang-format off + std::vector expected_lexemes = { + "fun", "ListIteration", "(", "list", ":", "IntArray", ")", ":", "int", "{", + "\\n", + "var", "total", ":", "int", "=", "0", + "\\n", + "var", "idx", ":", "int", "=", "0", + "\\n", + "while", "(", "idx", "<", "list", ".", "Length", "(", ")", ")", "{", "total", "=", "total", "+", "list", "[", + "idx", "]", "\\n", + "if", "(", "list", "[", "idx", "]", "%", "2", "==", "0", ")", "{", "list", "[", "idx", "]", ":=", + "list", "[", "idx", "]", "*", "2", "}", "idx", "=", "idx", "+", "1", "}", + "\\n", + "return", "total", + "\\n", + "}" + }; + // clang-format on + std::vector expected_types = { + "KEYWORD", "IDENT", "PUNCT", "IDENT", "PUNCT", "IDENT", "PUNCT", "PUNCT", + "IDENT", "PUNCT", "NEWLINE", "KEYWORD", "IDENT", "PUNCT", "IDENT", "OPERATOR", + "LITERAL:Int", "NEWLINE", "KEYWORD", "IDENT", "PUNCT", "IDENT", "OPERATOR", "LITERAL:Int", + "NEWLINE", "KEYWORD", "PUNCT", "IDENT", "OPERATOR", "IDENT", "OPERATOR", "IDENT", + "PUNCT", "PUNCT", "PUNCT", "PUNCT", "IDENT", "OPERATOR", "IDENT", "OPERATOR", + "IDENT", "PUNCT", "IDENT", "PUNCT", "NEWLINE", "KEYWORD", "PUNCT", "IDENT", + "PUNCT", "IDENT", "PUNCT", "OPERATOR", "LITERAL:Int", "OPERATOR", "LITERAL:Int", "PUNCT", + "PUNCT", "IDENT", "PUNCT", "IDENT", "PUNCT", "OPERATOR", "IDENT", "PUNCT", + "IDENT", "PUNCT", "OPERATOR", "LITERAL:Int", "PUNCT", "IDENT", "OPERATOR", "IDENT", + "OPERATOR", "LITERAL:Int", "PUNCT", "NEWLINE", "KEYWORD", "IDENT", "NEWLINE", "PUNCT"}; + LexerUnitTestSuite::AssertLexemesAndTypesEqual(items, expected_lexemes, expected_types); +} + +TEST(LexerUnitTestSuite, ListOps) { + const std::string src = R"OVUM(fun ListOps(original: IntArray): IntArray { +val newList: IntArray = IntArray(original.Length() + 1) +var i: int = 0 +while (i < original.Length()) { newList[i] := original[i] i = i + 1 } +newList[original.Length()] := 100 +return newList +})OVUM"; + Lexer lexer(src); + auto tokens = lexer.Tokenize(); + auto items = LexerUnitTestSuite::ExtractLexemesAndTypes(tokens); + // clang-format off + std::vector expected_lexemes = { + "fun", "ListOps", "(", "original", ":", "IntArray", ")", ":", "IntArray", "{", "\\n", + "val", "newList", ":", "IntArray", "=", "IntArray", "(", "original", ".", "Length", "(", + ")", "+", "1", ")", "\\n", "var", "i", ":", "int", "=", "0", + "\\n", "while", "(", "i", "<", "original", ".", "Length", "(", ")", ")", + "{", "newList", "[", "i", "]", ":=", "original", "[", "i", "]", "i", + "=", "i", "+", "1", "}", "\\n", "newList", "[", "original", ".", "Length", + "(", ")", "]", ":=", "100", "\\n", "return", "newList", "\\n", "}"}; + // clang-format on + std::vector expected_types = { + "KEYWORD", "IDENT", "PUNCT", "IDENT", "PUNCT", "IDENT", "PUNCT", "PUNCT", + "IDENT", "PUNCT", "NEWLINE", "KEYWORD", "IDENT", "PUNCT", "IDENT", "OPERATOR", + "IDENT", "PUNCT", "IDENT", "OPERATOR", "IDENT", "PUNCT", "PUNCT", "OPERATOR", + "LITERAL:Int", "PUNCT", "NEWLINE", "KEYWORD", "IDENT", "PUNCT", "IDENT", "OPERATOR", + "LITERAL:Int", "NEWLINE", "KEYWORD", "PUNCT", "IDENT", "OPERATOR", "IDENT", "OPERATOR", + "IDENT", "PUNCT", "PUNCT", "PUNCT", "PUNCT", "IDENT", "PUNCT", "IDENT", + "PUNCT", "OPERATOR", "IDENT", "PUNCT", "IDENT", "PUNCT", "IDENT", "OPERATOR", + "IDENT", "OPERATOR", "LITERAL:Int", "PUNCT", "NEWLINE", "IDENT", "PUNCT", "IDENT", + "OPERATOR", "IDENT", "PUNCT", "PUNCT", "PUNCT", "OPERATOR", "LITERAL:Int", "NEWLINE", + "KEYWORD", "IDENT", "NEWLINE", "PUNCT"}; + LexerUnitTestSuite::AssertLexemesAndTypesEqual(items, expected_lexemes, expected_types); +} + +TEST(LexerUnitTestSuite, FindInList) { + const std::string src = R"OVUM(fun FindInList(list: IntArray, target: int): Int? { +for (num in list) { if (num == target) { return Int(num) } } +return null +})OVUM"; + Lexer lexer(src); + auto tokens = lexer.Tokenize(); + auto items = LexerUnitTestSuite::ExtractLexemesAndTypes(tokens); + // clang-format off + std::vector expected_lexemes = { + "fun", "FindInList", "(", "list", ":", "IntArray", ",", "target", ":", "int", ")", ":", "Int", "?", "{", + "\\n", + "for", "(", "num", "in", "list", ")", "{", "if", "(", "num", "==", "target", ")", "{", "return", "Int", + "(", "num", ")", "}", "}", + "\\n", + "return", "null", + "\\n", + "}" + }; + // clang-format on + std::vector expected_types = { + "KEYWORD", "IDENT", "PUNCT", "IDENT", "PUNCT", "IDENT", "PUNCT", "IDENT", "PUNCT", + "IDENT", "PUNCT", "PUNCT", "IDENT", "OPERATOR", "PUNCT", "NEWLINE", "KEYWORD", "PUNCT", + "IDENT", "KEYWORD", "IDENT", "PUNCT", "PUNCT", "KEYWORD", "PUNCT", "IDENT", "OPERATOR", + "IDENT", "PUNCT", "PUNCT", "KEYWORD", "IDENT", "PUNCT", "IDENT", "PUNCT", "PUNCT", + "PUNCT", "NEWLINE", "KEYWORD", "KEYWORD", "NEWLINE", "PUNCT"}; + LexerUnitTestSuite::AssertLexemesAndTypesEqual(items, expected_lexemes, expected_types); +} + +TEST(LexerUnitTestSuite, SafeCall) { + const std::string src = R"OVUM(fun SafeCall(obj: String?): Void { +val length: int = obj?.Length() ?: 0 +sys::Print(length.ToString()) +})OVUM"; + Lexer lexer(src); + auto tokens = lexer.Tokenize(); + auto items = LexerUnitTestSuite::ExtractLexemesAndTypes(tokens); + // clang-format off + std::vector expected_lexemes = { + "fun", "SafeCall", "(", "obj", ":", "String", "?", ")", ":", "Void", "{", "\\n", "val", + "length", ":", "int", "=", "obj", "?.", "Length", "(", ")", "?:", "0", "\\n", "sys", + "::", "Print", "(", "length", ".", "ToString", "(", ")", ")", "\\n", "}"}; + // clang-format on + std::vector expected_types = { + "KEYWORD", "IDENT", "PUNCT", "IDENT", "PUNCT", "IDENT", "OPERATOR", "PUNCT", "PUNCT", "IDENT", + "PUNCT", "NEWLINE", "KEYWORD", "IDENT", "PUNCT", "IDENT", "OPERATOR", "IDENT", "OPERATOR", "IDENT", + "PUNCT", "PUNCT", "OPERATOR", "LITERAL:Int", "NEWLINE", "IDENT", "OPERATOR", "IDENT", "PUNCT", "IDENT", + "OPERATOR", "IDENT", "PUNCT", "PUNCT", "PUNCT", "NEWLINE", "PUNCT"}; + LexerUnitTestSuite::AssertLexemesAndTypesEqual(items, expected_lexemes, expected_types); +} + +TEST(LexerUnitTestSuite, ElvisExamples) { + const std::string src = R"OVUM(fun ElvisExamples(optInt: Int?): int { +val default: int = optInt ?: 42 +val str: String = optInt?.ToString() ?: "Unknown" +return default +})OVUM"; + Lexer lexer(src); + auto tokens = lexer.Tokenize(); + auto items = LexerUnitTestSuite::ExtractLexemesAndTypes(tokens); + // clang-format off + std::vector expected_lexemes = { + "fun", "ElvisExamples", "(", "optInt", ":", "Int", "?", ")", ":", "int", "{", + "\\n", + "val", "default", ":", "int", "=", "optInt", "?:", "42", + "\\n", + "val", "str", ":", "String", "=", "optInt", "?.", "ToString", "(", ")", "?:", "\"Unknown\"", + "\\n", + "return", "default", + "\\n", + "}" + }; + // clang-format on + std::vector expected_types = { + "KEYWORD", "IDENT", "PUNCT", "IDENT", "PUNCT", "IDENT", "OPERATOR", "PUNCT", + "PUNCT", "IDENT", "PUNCT", "NEWLINE", "KEYWORD", "IDENT", "PUNCT", "IDENT", + "OPERATOR", "IDENT", "OPERATOR", "LITERAL:Int", "NEWLINE", "KEYWORD", "IDENT", "PUNCT", + "IDENT", "OPERATOR", "IDENT", "OPERATOR", "IDENT", "PUNCT", "PUNCT", "OPERATOR", + "LITERAL:String", "NEWLINE", "KEYWORD", "IDENT", "NEWLINE", "PUNCT"}; + LexerUnitTestSuite::AssertLexemesAndTypesEqual(items, expected_lexemes, expected_types); +} + +TEST(LexerUnitTestSuite, NullChecks) { + const std::string src = R"OVUM(fun NullChecks(opt: Object?): Void { +if (opt != null) { sys::Print("Not null") val nonNull: Object = opt } +val safe: Object? = opt +if (safe == null) { sys::Print("Is null") } +})OVUM"; + Lexer lexer(src); + auto tokens = lexer.Tokenize(); + auto items = LexerUnitTestSuite::ExtractLexemesAndTypes(tokens); + // clang-format off + std::vector expected_lexemes = { + "fun", "NullChecks", "(", "opt", ":", "Object", "?", ")", ":", + "Void", "{", "\\n", "if", "(", "opt", "!=", "null", ")", + "{", "sys", "::", "Print", "(", "\"Not null\"", ")", "val", "nonNull", + ":", "Object", "=", "opt", "}", "\\n", "val", "safe", ":", + "Object", "?", "=", "opt", "\\n", "if", "(", "safe", "==", + "null", ")", "{", "sys", "::", "Print", "(", "\"Is null\"", ")", + "}", "\\n", "}"}; + // clang-format on + std::vector expected_types = { + "KEYWORD", "IDENT", "PUNCT", "IDENT", "PUNCT", "IDENT", "OPERATOR", "PUNCT", + "PUNCT", "IDENT", "PUNCT", "NEWLINE", "KEYWORD", "PUNCT", "IDENT", "OPERATOR", + "KEYWORD", "PUNCT", "PUNCT", "IDENT", "OPERATOR", "IDENT", "PUNCT", "LITERAL:String", + "PUNCT", "KEYWORD", "IDENT", "PUNCT", "IDENT", "OPERATOR", "IDENT", "PUNCT", + "NEWLINE", "KEYWORD", "IDENT", "PUNCT", "IDENT", "OPERATOR", "OPERATOR", "IDENT", + "NEWLINE", "KEYWORD", "PUNCT", "IDENT", "OPERATOR", "KEYWORD", "PUNCT", "PUNCT", + "IDENT", "OPERATOR", "IDENT", "PUNCT", "LITERAL:String", "PUNCT", "PUNCT", "NEWLINE", + "PUNCT"}; + LexerUnitTestSuite::AssertLexemesAndTypesEqual(items, expected_lexemes, expected_types); +} + +TEST(LexerUnitTestSuite, CastNullable) { + const std::string src = R"OVUM(fun CastNullable(obj: Object?): Void { +if (obj is Int?) { +val casted: Int? = obj as Int? +val value: int = casted ?: 0 +sys::Print(value.ToString()) } +val risky: Int = obj as Int +})OVUM"; + Lexer lexer(src); + auto tokens = lexer.Tokenize(); + auto items = LexerUnitTestSuite::ExtractLexemesAndTypes(tokens); + // clang-format off + std::vector expected_lexemes = { + "fun", "CastNullable", "(", "obj", ":", "Object", "?", ")", ":", "Void", "{", + "\\n", + "if", "(", "obj", "is", "Int", "?", ")", "{", + "\\n", + "val", "casted", ":", "Int", "?", "=", "obj", "as", "Int", "?", + "\\n", + "val", "value", ":", "int", "=", "casted", "?:", "0", + "\\n", + "sys", "::", "Print", "(", "value", ".", "ToString", "(", ")", ")", "}", + "\\n", + "val", "risky", ":", "Int", "=", "obj", "as", "Int", + "\\n", + "}" + }; + // clang-format on + std::vector expected_types = { + "KEYWORD", "IDENT", "PUNCT", "IDENT", "PUNCT", "IDENT", "OPERATOR", "PUNCT", "PUNCT", + "IDENT", "PUNCT", "NEWLINE", "KEYWORD", "PUNCT", "IDENT", "KEYWORD", "IDENT", "OPERATOR", + "PUNCT", "PUNCT", "NEWLINE", "KEYWORD", "IDENT", "PUNCT", "IDENT", "OPERATOR", "OPERATOR", + "IDENT", "KEYWORD", "IDENT", "OPERATOR", "NEWLINE", "KEYWORD", "IDENT", "PUNCT", "IDENT", + "OPERATOR", "IDENT", "OPERATOR", "LITERAL:Int", "NEWLINE", "IDENT", "OPERATOR", "IDENT", "PUNCT", + "IDENT", "OPERATOR", "IDENT", "PUNCT", "PUNCT", "PUNCT", "PUNCT", "NEWLINE", "KEYWORD", + "IDENT", "PUNCT", "IDENT", "OPERATOR", "IDENT", "KEYWORD", "IDENT", "NEWLINE", "PUNCT"}; + LexerUnitTestSuite::AssertLexemesAndTypesEqual(items, expected_lexemes, expected_types); +} + +TEST(LexerUnitTestSuite, CopyNullable) { + const std::string src = R"OVUM(fun CopyNullable(src: IntArray?): IntArray? { +if (src == null) return null +val copy: IntArray? = IntArray(src.Length()) +var i: int = 0 +while (i < src.Length()) { copy[i] := src[i] i = i + 1 } +return copy +})OVUM"; + Lexer lexer(src); + auto tokens = lexer.Tokenize(); + auto items = LexerUnitTestSuite::ExtractLexemesAndTypes(tokens); + // clang-format off + std::vector expected_lexemes = { + "fun", "CopyNullable", "(", "src", ":", "IntArray", "?", ")", ":", "IntArray", "?", "{", + "\\n", + "if", "(", "src", "==", "null", ")", "return", "null", + "\\n", + "val", "copy", ":", "IntArray", "?", "=", "IntArray", "(", "src", ".", "Length", "(", ")", ")", + "\\n", + "var", "i", ":", "int", "=", "0", + "\\n", + "while", "(", "i", "<", "src", ".", "Length", "(", ")", ")", "{", "copy", "[", "i", "]", ":=", "src", + "[", "i", "]", "i", "=", "i", "+", "1", "}", + "\\n", + "return", "copy", + "\\n", + "}" + }; + // clang-format on + std::vector expected_types = { + "KEYWORD", "IDENT", "PUNCT", "IDENT", "PUNCT", "IDENT", "OPERATOR", "PUNCT", "PUNCT", + "IDENT", "OPERATOR", "PUNCT", "NEWLINE", "KEYWORD", "PUNCT", "IDENT", "OPERATOR", "KEYWORD", + "PUNCT", "KEYWORD", "KEYWORD", "NEWLINE", "KEYWORD", "IDENT", "PUNCT", "IDENT", "OPERATOR", + "OPERATOR", "IDENT", "PUNCT", "IDENT", "OPERATOR", "IDENT", "PUNCT", "PUNCT", "PUNCT", + "NEWLINE", "KEYWORD", "IDENT", "PUNCT", "IDENT", "OPERATOR", "LITERAL:Int", "NEWLINE", "KEYWORD", + "PUNCT", "IDENT", "OPERATOR", "IDENT", "OPERATOR", "IDENT", "PUNCT", "PUNCT", "PUNCT", + "PUNCT", "IDENT", "PUNCT", "IDENT", "PUNCT", "OPERATOR", "IDENT", "PUNCT", "IDENT", + "PUNCT", "IDENT", "OPERATOR", "IDENT", "OPERATOR", "LITERAL:Int", "PUNCT", "NEWLINE", "KEYWORD", + "IDENT", "NEWLINE", "PUNCT"}; + LexerUnitTestSuite::AssertLexemesAndTypesEqual(items, expected_lexemes, expected_types); +} + +TEST(LexerUnitTestSuite, ChainSafe) { + const std::string src = R"OVUM(class Nested { +public val Inner: String? +public fun Nested(s: String?): Nested { this.Inner = s return this } } +fun ChainSafe(nested: Nested?): Void { +val len: int = nested?.Inner?.Length() ?: 0 +sys::Print(len.ToString()) +})OVUM"; + Lexer lexer(src); + auto tokens = lexer.Tokenize(); + auto items = LexerUnitTestSuite::ExtractLexemesAndTypes(tokens); + // clang-format off + std::vector expected_lexemes = { + "class", "Nested", "{", + "\\n", + "public", "val", "Inner", ":", "String", "?", + "\\n", + "public", "fun", "Nested", "(", "s", ":", "String", "?", ")", ":", "Nested", "{", "this", ".", + "Inner", "=", "s", "return", "this", "}","}", + "\\n", + "fun", "ChainSafe", "(", "nested", ":", "Nested", "?", ")", ":", "Void", "{", + "\\n", + "val", "len", ":", "int", "=", "nested", "?.", "Inner", "?.", "Length", "(", ")", "?:", "0", + "\\n", + "sys", "::", "Print", "(", "len", ".", "ToString", "(", ")", ")", + "\\n", + "}" + }; + // clang-format on + std::vector expected_types = { + "KEYWORD", "IDENT", "PUNCT", "NEWLINE", "KEYWORD", "KEYWORD", "IDENT", "PUNCT", "IDENT", + "OPERATOR", "NEWLINE", "KEYWORD", "KEYWORD", "IDENT", "PUNCT", "IDENT", "PUNCT", "IDENT", + "OPERATOR", "PUNCT", "PUNCT", "IDENT", "PUNCT", "IDENT", "OPERATOR", "IDENT", "OPERATOR", + "IDENT", "KEYWORD", "IDENT", "PUNCT", "PUNCT", "NEWLINE", "KEYWORD", "IDENT", "PUNCT", + "IDENT", "PUNCT", "IDENT", "OPERATOR", "PUNCT", "PUNCT", "IDENT", "PUNCT", "NEWLINE", + "KEYWORD", "IDENT", "PUNCT", "IDENT", "OPERATOR", "IDENT", "OPERATOR", "IDENT", "OPERATOR", + "IDENT", "PUNCT", "PUNCT", "OPERATOR", "LITERAL:Int", "NEWLINE", "IDENT", "OPERATOR", "IDENT", + "PUNCT", "IDENT", "OPERATOR", "IDENT", "PUNCT", "PUNCT", "PUNCT", "NEWLINE", "PUNCT"}; + LexerUnitTestSuite::AssertLexemesAndTypesEqual(items, expected_lexemes, expected_types); +} + +TEST(LexerUnitTestSuite, SafeAdd) { + const std::string src = R"OVUM(pure fun SafeAdd(a: Int?, b: Int?): Int { +val aVal: int = a ?: 0 +val bVal: int = b ?: 0 +return Int(aVal + bVal) +})OVUM"; + Lexer lexer(src); + auto tokens = lexer.Tokenize(); + auto items = LexerUnitTestSuite::ExtractLexemesAndTypes(tokens); + // clang-format off + std::vector expected_lexemes = { + "pure", "fun", "SafeAdd", "(", "a", ":", "Int", "?", ",", "b", ":", "Int", "?", ")", ":", + "Int", "{", "\\n", "val", "aVal", ":", "int", "=", "a", "?:", "0", "\\n", "val", "bVal", ":", + "int", "=", "b", "?:", "0", "\\n", "return", "Int", "(", "aVal", "+", "bVal", ")", "\\n", "}"}; + // clang-format on + std::vector expected_types = { + "KEYWORD", "KEYWORD", "IDENT", "PUNCT", "IDENT", "PUNCT", "IDENT", "OPERATOR", "PUNCT", + "IDENT", "PUNCT", "IDENT", "OPERATOR", "PUNCT", "PUNCT", "IDENT", "PUNCT", "NEWLINE", + "KEYWORD", "IDENT", "PUNCT", "IDENT", "OPERATOR", "IDENT", "OPERATOR", "LITERAL:Int", "NEWLINE", + "KEYWORD", "IDENT", "PUNCT", "IDENT", "OPERATOR", "IDENT", "OPERATOR", "LITERAL:Int", "NEWLINE", + "KEYWORD", "IDENT", "PUNCT", "IDENT", "OPERATOR", "IDENT", "PUNCT", "NEWLINE", "PUNCT"}; + LexerUnitTestSuite::AssertLexemesAndTypesEqual(items, expected_lexemes, expected_types); +} + +TEST(LexerUnitTestSuite, UnsafeNullable) { + const std::string src = R"OVUM(fun UnsafeNullable(ptr: Pointer?): Void { + unsafe { +if (ptr != null) { +val bytes: ByteArray = (ptr as ByteArray) +} }})OVUM"; + Lexer lexer(src); + auto tokens = lexer.Tokenize(); + auto items = LexerUnitTestSuite::ExtractLexemesAndTypes(tokens); + // clang-format off + std::vector expected_lexemes = { + "fun", "UnsafeNullable", "(", "ptr", ":", "Pointer", "?", ")", ":", "Void", "{", + "\\n", + "unsafe", "{", + "\\n", + "if", "(", "ptr", "!=", "null", ")", "{", + "\\n", + "val", "bytes", ":", "ByteArray", "=", "(", "ptr", "as", "ByteArray", ")", + "\\n", + "}", + "}", + "}" + }; + // clang-format on + std::vector expected_types = { + "KEYWORD", "IDENT", "PUNCT", "IDENT", "PUNCT", "IDENT", "OPERATOR", "PUNCT", "PUNCT", "IDENT", + "PUNCT", "NEWLINE", "KEYWORD", "PUNCT", "NEWLINE", "KEYWORD", "PUNCT", "IDENT", "OPERATOR", "KEYWORD", + "PUNCT", "PUNCT", "NEWLINE", "KEYWORD", "IDENT", "PUNCT", "IDENT", "OPERATOR", "PUNCT", "IDENT", + "KEYWORD", "IDENT", "PUNCT", "NEWLINE", "PUNCT", "PUNCT", "PUNCT"}; + LexerUnitTestSuite::AssertLexemesAndTypesEqual(items, expected_lexemes, expected_types); +} diff --git a/tests/lexer_tests.cpp b/tests/lexer_tests.cpp new file mode 100644 index 0000000..f6a9fb4 --- /dev/null +++ b/tests/lexer_tests.cpp @@ -0,0 +1,410 @@ +#include +#include +#include +#include "lib/lexer/Lexer.hpp" +#include "test_suites/LexerUnitTestSuite.hpp" + +TEST(LexerUnitTestSuite, EmptyString) { + const std::string src = ""; + Lexer lexer(src); + auto tokens = lexer.Tokenize(); + auto items = LexerUnitTestSuite::ExtractLexemesAndTypes(tokens); + std::vector expected_lexemes = {}; + std::vector expected_types = {}; + LexerUnitTestSuite::AssertLexemesAndTypesEqual(items, expected_lexemes, expected_types); +} + +TEST(LexerUnitTestSuite, SingleCharacter) { + const std::string src = "c"; + Lexer lexer(src); + auto tokens = lexer.Tokenize(); + auto items = LexerUnitTestSuite::ExtractLexemesAndTypes(tokens); + std::vector expected_lexemes = {"c"}; + std::vector expected_types = {"IDENT"}; + LexerUnitTestSuite::AssertLexemesAndTypesEqual(items, expected_lexemes, expected_types); +} + +TEST(LexerUnitTestSuite, Keywords) { + const std::string src = + "fun pure val var class interface implements override if else while for in return break continue unsafe is as " + "typealias"; + Lexer lexer(src); + auto tokens = lexer.Tokenize(); + auto items = LexerUnitTestSuite::ExtractLexemesAndTypes(tokens); + std::vector expected_lexemes = { + "fun", "pure", "val", "var", "class", "interface", "implements", "override", "if", "else", + "while", "for", "in", "return", "break", "continue", "unsafe", "is", "as", "typealias"}; + std::vector expected_types(expected_lexemes.size(), "KEYWORD"); + LexerUnitTestSuite::AssertLexemesAndTypesEqual(items, expected_lexemes, expected_types); +} + +TEST(LexerUnitTestSuite, Punctuation) { + const std::string src = "{ } ( ) [ ] , : ;"; + Lexer lexer(src); + auto tokens = lexer.Tokenize(); + auto items = LexerUnitTestSuite::ExtractLexemesAndTypes(tokens); + std::vector expected_lexemes = {"{", "}", "(", ")", "[", "]", ",", ":", ";"}; + std::vector expected_types(expected_lexemes.size(), "PUNCT"); + LexerUnitTestSuite::AssertLexemesAndTypesEqual(items, expected_lexemes, expected_types); +} + +TEST(LexerUnitTestSuite, Identifiers) { + const std::string src = "abc ABC _abc ovum ExampleFundamentals Void String IntArray"; + Lexer lexer(src); + auto tokens = lexer.Tokenize(); + auto items = LexerUnitTestSuite::ExtractLexemesAndTypes(tokens); + std::vector expected_lexemes = { + "abc", "ABC", "_abc", "ovum", "ExampleFundamentals", "Void", "String", "IntArray"}; + std::vector expected_types(expected_lexemes.size(), "IDENT"); + LexerUnitTestSuite::AssertLexemesAndTypesEqual(items, expected_lexemes, expected_types); +} + +TEST(LexerUnitTestSuite, OperatorsSimple) { + const std::string src = "+ - * / % < > ! . ?"; + Lexer lexer(src); + auto tokens = lexer.Tokenize(); + auto items = LexerUnitTestSuite::ExtractLexemesAndTypes(tokens); + std::vector expected_lexemes = {"+", "-", "*", "/", "%", "<", ">", "!", ".", "?"}; + std::vector expected_types(expected_lexemes.size(), "OPERATOR"); + LexerUnitTestSuite::AssertLexemesAndTypesEqual(items, expected_lexemes, expected_types); +} + +TEST(LexerUnitTestSuite, OperatorsMultiChar) { + const std::string src = "== != <= >= && || = := ::"; + Lexer lexer(src); + auto tokens = lexer.Tokenize(); + auto items = LexerUnitTestSuite::ExtractLexemesAndTypes(tokens); + std::vector expected_lexemes = {"==", "!=", "<=", ">=", "&&", "||", "=", ":=", "::"}; + std::vector expected_types(expected_lexemes.size(), "OPERATOR"); + LexerUnitTestSuite::AssertLexemesAndTypesEqual(items, expected_lexemes, expected_types); +} + +TEST(LexerUnitTestSuite, OperatorsNullSafe) { + const std::string src = "?. ?:"; + Lexer lexer(src); + auto tokens = lexer.Tokenize(); + auto items = LexerUnitTestSuite::ExtractLexemesAndTypes(tokens); + std::vector expected_lexemes = {"?.", "?:"}; + std::vector expected_types(expected_lexemes.size(), "OPERATOR"); + LexerUnitTestSuite::AssertLexemesAndTypesEqual(items, expected_lexemes, expected_types); +} + +TEST(LexerUnitTestSuite, OperatorXor) { + const std::string src = "^"; + Lexer lexer(src); + auto tokens = lexer.Tokenize(); + auto items = LexerUnitTestSuite::ExtractLexemesAndTypes(tokens); + std::vector expected_lexemes = {"^"}; + std::vector expected_types = {"OPERATOR"}; + LexerUnitTestSuite::AssertLexemesAndTypesEqual(items, expected_lexemes, expected_types); +} + +TEST(LexerUnitTestSuite, LiteralsInt) { + const std::string src = "0 42 255"; + Lexer lexer(src); + auto tokens = lexer.Tokenize(); + auto items = LexerUnitTestSuite::ExtractLexemesAndTypes(tokens); + std::vector expected_lexemes = {"0", "42", "255"}; + std::vector expected_types(expected_lexemes.size(), "LITERAL:Int"); + LexerUnitTestSuite::AssertLexemesAndTypesEqual(items, expected_lexemes, expected_types); +} + +TEST(LexerUnitTestSuite, LiteralsFloat) { + const std::string src = "3.14 1e3 .5 5. 2.0E-2"; + Lexer lexer(src); + auto tokens = lexer.Tokenize(); + auto items = LexerUnitTestSuite::ExtractLexemesAndTypes(tokens); + std::vector expected_lexemes = {"3.14", "1e3", ".5", "5.", "2.0E-2"}; + std::vector expected_types(expected_lexemes.size(), "LITERAL:Float"); + LexerUnitTestSuite::AssertLexemesAndTypesEqual(items, expected_lexemes, expected_types); +} + +TEST(LexerUnitTestSuite, LiteralsChar) { + const std::string src = "'A' '\\n'"; + Lexer lexer(src); + auto tokens = lexer.Tokenize(); + auto items = LexerUnitTestSuite::ExtractLexemesAndTypes(tokens); + std::vector expected_lexemes = {"'A'", "'\\n'"}; + std::vector expected_types(expected_lexemes.size(), "LITERAL:Char"); + LexerUnitTestSuite::AssertLexemesAndTypesEqual(items, expected_lexemes, expected_types); +} + +TEST(LexerUnitTestSuite, LiteralsBool) { + const std::string src = "true false"; + Lexer lexer(src); + auto tokens = lexer.Tokenize(); + auto items = LexerUnitTestSuite::ExtractLexemesAndTypes(tokens); + std::vector expected_lexemes = {"true", "false"}; + std::vector expected_types(expected_lexemes.size(), "LITERAL:Bool"); + LexerUnitTestSuite::AssertLexemesAndTypesEqual(items, expected_lexemes, expected_types); +} + +TEST(LexerUnitTestSuite, KeywordNull) { + const std::string src = "null"; + Lexer lexer(src); + auto tokens = lexer.Tokenize(); + auto items = LexerUnitTestSuite::ExtractLexemesAndTypes(tokens); + std::vector expected_lexemes = {"null"}; + std::vector expected_types = {"KEYWORD"}; + LexerUnitTestSuite::AssertLexemesAndTypesEqual(items, expected_lexemes, expected_types); +} + +TEST(LexerUnitTestSuite, LiteralsString) { + const std::string src = R"("hello" "world\n" "")"; + Lexer lexer(src); + auto tokens = lexer.Tokenize(); + auto items = LexerUnitTestSuite::ExtractLexemesAndTypes(tokens); + std::vector expected_lexemes = {R"("hello")", R"("world\n")", R"("")"}; + std::vector expected_types(expected_lexemes.size(), "LITERAL:String"); + LexerUnitTestSuite::AssertLexemesAndTypesEqual(items, expected_lexemes, expected_types); +} + +TEST(LexerUnitTestSuite, Preprocessor) { + const std::string src = "#import #define #ifdef"; + Lexer lexer(src); + auto tokens = lexer.Tokenize(); + auto items = LexerUnitTestSuite::ExtractLexemesAndTypes(tokens); + std::vector expected_lexemes = {"#import", "#define", "#ifdef"}; + std::vector expected_types(expected_lexemes.size(), "KEYWORD"); + LexerUnitTestSuite::AssertLexemesAndTypesEqual(items, expected_lexemes, expected_types); +} + +TEST(LexerUnitTestSuite, Newline) { + const std::string src = "\n"; + Lexer lexer(src); + auto tokens = lexer.Tokenize(); + auto items = LexerUnitTestSuite::ExtractLexemesAndTypes(tokens); + std::vector expected_lexemes = {"\\n"}; + std::vector expected_types = {"NEWLINE"}; + LexerUnitTestSuite::AssertLexemesAndTypesEqual(items, expected_lexemes, expected_types); +} + +TEST(LexerUnitTestSuite, LiteralsIntHex) { + const std::string src = "0x1A 0xFF"; + Lexer lexer(src); + auto tokens = lexer.Tokenize(); + auto items = LexerUnitTestSuite::ExtractLexemesAndTypes(tokens); + std::vector expected_lexemes = {"0x1A", "0xFF"}; + std::vector expected_types(expected_lexemes.size(), "LITERAL:Int"); + LexerUnitTestSuite::AssertLexemesAndTypesEqual(items, expected_lexemes, expected_types); +} + +TEST(LexerUnitTestSuite, LiteralsIntBinary) { + const std::string src = "0b1010 0b1111"; + Lexer lexer(src); + auto tokens = lexer.Tokenize(); + auto items = LexerUnitTestSuite::ExtractLexemesAndTypes(tokens); + std::vector expected_lexemes = {"0b1010", "0b1111"}; + std::vector expected_types(expected_lexemes.size(), "LITERAL:Int"); + LexerUnitTestSuite::AssertLexemesAndTypesEqual(items, expected_lexemes, expected_types); +} + +TEST(LexerUnitTestSuite, LiteralsIntNegative) { + const std::string src = "-17 -42"; + Lexer lexer(src); + auto tokens = lexer.Tokenize(); + auto items = LexerUnitTestSuite::ExtractLexemesAndTypes(tokens); + std::vector expected_lexemes = {"-", "17", "-", "42"}; + std::vector expected_types = {"OPERATOR", "LITERAL:Int", "OPERATOR", "LITERAL:Int"}; + LexerUnitTestSuite::AssertLexemesAndTypesEqual(items, expected_lexemes, expected_types); +} + +TEST(LexerUnitTestSuite, LiteralsFloatSpecial) { + const std::string src = "Infinity -Infinity NaN"; + Lexer lexer(src); + auto tokens = lexer.Tokenize(); + auto items = LexerUnitTestSuite::ExtractLexemesAndTypes(tokens); + std::vector expected_lexemes = {"Infinity", "-", "Infinity", "NaN"}; + std::vector expected_types = {"LITERAL:Float", "OPERATOR", "LITERAL:Float", "LITERAL:Float"}; + LexerUnitTestSuite::AssertLexemesAndTypesEqual(items, expected_lexemes, expected_types); +} + +TEST(LexerUnitTestSuite, LiteralsCharEscapes) { + const std::string src = R"('A' '\n' '\t' '\0' '\'')"; + Lexer lexer(src); + auto tokens = lexer.Tokenize(); + auto items = LexerUnitTestSuite::ExtractLexemesAndTypes(tokens); + std::vector expected_lexemes = {"'A'", "'\\n'", "'\\t'", "'\\0'", "'\\''"}; + std::vector expected_types(expected_lexemes.size(), "LITERAL:Char"); + LexerUnitTestSuite::AssertLexemesAndTypesEqual(items, expected_lexemes, expected_types); +} + +TEST(LexerUnitTestSuite, WhitespaceSkipping) { + const std::string src = "fun val \t if"; + Lexer lexer(src); + auto tokens = lexer.Tokenize(); + auto items = LexerUnitTestSuite::ExtractLexemesAndTypes(tokens); + std::vector expected_lexemes = {"fun", "val", "if"}; + std::vector expected_types(expected_lexemes.size(), "KEYWORD"); + LexerUnitTestSuite::AssertLexemesAndTypesEqual(items, expected_lexemes, expected_types); +} + +TEST(LexerUnitTestSuite, SingleLineComment) { + const std::string src = R"OVUM(// This is a single-line comment +fun Main)OVUM"; + Lexer lexer(src, true); + auto tokens = lexer.Tokenize(); + auto items = LexerUnitTestSuite::ExtractLexemesAndTypes(tokens); + std::vector expected_lexemes = {" This is a single-line comment", "\\n", "fun", "Main"}; + std::vector expected_types = {"COMMENT", "NEWLINE", "KEYWORD", "IDENT"}; + LexerUnitTestSuite::AssertLexemesAndTypesEqual(items, expected_lexemes, expected_types); +} + +TEST(LexerUnitTestSuite, SingleLineCommentNoSpace) { + const std::string src = R"OVUM(//This is a single-line comment without space +)OVUM"; + Lexer lexer(src, true); + auto tokens = lexer.Tokenize(); + auto items = LexerUnitTestSuite::ExtractLexemesAndTypes(tokens); + std::vector expected_lexemes = {"This is a single-line comment without space", "\\n"}; + std::vector expected_types = {"COMMENT", "NEWLINE"}; + LexerUnitTestSuite::AssertLexemesAndTypesEqual(items, expected_lexemes, expected_types); +} + +TEST(LexerUnitTestSuite, MultiLineCommentPositive) { + const std::string src = R"OVUM(/* This is a properly closed +multi-line comment */)OVUM"; + Lexer lexer(src, true); + auto tokens = lexer.Tokenize(); + auto items = LexerUnitTestSuite::ExtractLexemesAndTypes(tokens); + std::vector expected_lexemes = {" This is a properly closed\nmulti-line comment "}; + std::vector expected_types = {"COMMENT"}; + LexerUnitTestSuite::AssertLexemesAndTypesEqual(items, expected_lexemes, expected_types); +} + +TEST(LexerUnitTestSuite, MultiLineCommentWithSlashesAndStars) { + const std::string src = R"OVUM(/* This comment contains / and * signs: /* // */ +fun Main())OVUM"; + Lexer lexer(src, true); + auto tokens = lexer.Tokenize(); + auto items = LexerUnitTestSuite::ExtractLexemesAndTypes(tokens); + std::vector expected_lexemes = { + " This comment contains / and * signs: /* // ", "\\n", "fun", "Main", "(", ")"}; + std::vector expected_types = {"COMMENT", "NEWLINE", "KEYWORD", "IDENT", "PUNCT", "PUNCT"}; + LexerUnitTestSuite::AssertLexemesAndTypesEqual(items, expected_lexemes, expected_types); +} + +TEST(LexerUnitTestSuite, IncorrectCommentWithSlashesAndStars) { + const std::string src = R"OVUM(/* This comment contains / and * signs: /* // */ */)OVUM"; + Lexer lexer(src, true); + auto tokens = lexer.Tokenize(); + auto items = LexerUnitTestSuite::ExtractLexemesAndTypes(tokens); + std::vector expected_lexemes = {" This comment contains / and * signs: /* // ", "*", "/"}; + std::vector expected_types = {"COMMENT", "OPERATOR", "OPERATOR"}; + LexerUnitTestSuite::AssertLexemesAndTypesEqual(items, expected_lexemes, expected_types); +} + +TEST(LexerUnitTestSuite, MultiLineCommentWithMultipleNewlines) { + const std::string src = R"OVUM(/* First line + +Third line after empty line + +Fifth line */)OVUM"; + Lexer lexer(src, true); + auto tokens = lexer.Tokenize(); + auto items = LexerUnitTestSuite::ExtractLexemesAndTypes(tokens); + std::vector expected_lexemes = {" First line\n\nThird line after empty line\n\nFifth line "}; + std::vector expected_types = {"COMMENT"}; + LexerUnitTestSuite::AssertLexemesAndTypesEqual(items, expected_lexemes, expected_types); +} + +// Negative tests + +TEST(LexerUnitTestSuite, InvalidCharacter) { + const std::string src = "fun @main(): Void {}"; + Lexer lexer(src); + ASSERT_THROW(lexer.Tokenize(), std::runtime_error); +} + +TEST(LexerUnitTestSuite, InvalidCharacterInIdentifier) { + const std::string src = "val name$var: int = 0"; + Lexer lexer(src); + ASSERT_THROW(lexer.Tokenize(), std::runtime_error); +} + +TEST(LexerUnitTestSuite, InvalidIdentifierStartingWithDigit) { + const std::string src = "val 1abc: int = 0"; + Lexer lexer(src); + ASSERT_THROW(lexer.Tokenize(), std::runtime_error); +} + +TEST(LexerUnitTestSuite, InvalidHexNumber) { + const std::string src = "val x: int = 0xG"; + Lexer lexer(src); + ASSERT_THROW(lexer.Tokenize(), std::runtime_error); +} + +TEST(LexerUnitTestSuite, InvalidBinaryNumber) { + const std::string src = "val x: int = 0b2"; + Lexer lexer(src); + ASSERT_THROW(lexer.Tokenize(), std::runtime_error); +} + +TEST(LexerUnitTestSuite, InvalidFloatNumber) { + const std::string src = "val x: float = 1.2.3"; + Lexer lexer(src); + ASSERT_THROW(lexer.Tokenize(), std::runtime_error); +} + +TEST(LexerUnitTestSuite, InvalidFloatIncompleteExp) { + const std::string src = "val x: float = 1e"; + Lexer lexer(src); + ASSERT_THROW(lexer.Tokenize(), std::runtime_error); +} + +TEST(LexerUnitTestSuite, UnterminatedString) { + const std::string src = R"(val msg: String = "hello)"; + Lexer lexer(src); + ASSERT_THROW(lexer.Tokenize(), std::runtime_error); +} + +TEST(LexerUnitTestSuite, InvalidEscapeInString) { + const std::string src = R"(val msg: String = "\z")"; + Lexer lexer(src); + ASSERT_THROW(lexer.Tokenize(), std::runtime_error); +} + +TEST(LexerUnitTestSuite, UnterminatedChar) { + const std::string src = "val c: char = 'A"; + Lexer lexer(src); + ASSERT_THROW(lexer.Tokenize(), std::runtime_error); +} + +TEST(LexerUnitTestSuite, EmptyCharLiteral) { + const std::string src = R"(val c: char = '')"; + Lexer lexer(src); + ASSERT_THROW(lexer.Tokenize(), std::runtime_error); +} + +TEST(LexerUnitTestSuite, MultiCharLiteral) { + const std::string src = "val c: char = 'AB'"; + Lexer lexer(src); + ASSERT_THROW(lexer.Tokenize(), std::runtime_error); +} + +TEST(LexerUnitTestSuite, InvalidEscapeInChar) { + const std::string src = "val c: char = '\\z'"; + Lexer lexer(src); + ASSERT_THROW(lexer.Tokenize(), std::runtime_error); +} + +TEST(LexerUnitTestSuite, InvalidPreprocessor) { + const std::string src = "#invalid"; + Lexer lexer(src); + ASSERT_THROW(lexer.Tokenize(), std::runtime_error); +} + +TEST(LexerUnitTestSuite, MultipleErrorsInOneFile) { + const std::string src = R"(fun main@(): Void { val x = "unclosed; val y = 0xG })"; + Lexer lexer(src); + ASSERT_THROW(lexer.Tokenize(), std::runtime_error); +} + +TEST(LexerUnitTestSuite, MultiLineCommentNegativeUnclosed) { + const std::string src = R"OVUM(/* This is an unclosed multi-line comment +fun Main(): Void { + val x: int = 42 +})OVUM"; + Lexer lexer(src, true); + ASSERT_THROW(lexer.Tokenize(), std::runtime_error); +} diff --git a/tests/test_suites/LexerUnitTestSuite.cpp b/tests/test_suites/LexerUnitTestSuite.cpp new file mode 100644 index 0000000..1548e59 --- /dev/null +++ b/tests/test_suites/LexerUnitTestSuite.cpp @@ -0,0 +1,86 @@ +#include "LexerUnitTestSuite.hpp" + +void LexerUnitTestSuite::SetUp() { +} + +void LexerUnitTestSuite::TearDown() { +} + +std::vector> LexerUnitTestSuite::ExtractLexemesAndTypes( + const std::vector& tokens) { + std::vector> out; + if (tokens.empty()) + return out; + // excluding the last token (EOF) + for (size_t i = 0; i + 1 < tokens.size(); ++i) { + out.emplace_back(tokens[i]->GetLexeme(), tokens[i]->GetStringType()); + } + return out; +} + +void LexerUnitTestSuite::PrintLexemesAndTypes(const std::vector>& items) { + std::cout << "Lexemes and Types:" << '\n'; + for (const auto& item : items) { + std::cout << item.first << " (" << item.second << ")" << '\n'; + } + std::cout << "End of output" << '\n'; +} + +void LexerUnitTestSuite::AssertLexemesAndTypesEqual(const std::vector>& actual, + const std::vector& expected_lexemes, + const std::vector& expected_type_substrs) { + bool has_mismatch = false; + + if (actual.size() != expected_lexemes.size() || actual.size() != expected_type_substrs.size()) { + has_mismatch = true; + } else { + for (size_t i = 0; i < actual.size(); ++i) { + if (actual[i].first != expected_lexemes[i]) { + has_mismatch = true; + break; + } + const auto& type_str = actual[i].second; + const auto& need_sub = expected_type_substrs[i]; + if (type_str.find(need_sub) == std::string::npos) { + has_mismatch = true; + break; + } + } + } + + if (has_mismatch) { + std::cout << "Exp lex: "; + for (const auto& lex : expected_lexemes) { + std::cout << lex << " "; + } + std::cout << '\n'; + std::cout << "Act lex: "; + for (const auto& item : actual) { + std::cout << item.first << " "; + } + std::cout << '\n'; + std::cout << "Exp typ: "; + for (const auto& sub : expected_type_substrs) { + std::cout << sub << " "; + } + std::cout << '\n'; + std::cout << "Act typ: "; + for (const auto& item : actual) { + std::cout << item.second << " "; + } + std::cout << '\n'; + } + + ASSERT_EQ(expected_lexemes.size(), expected_type_substrs.size()); + for (size_t i = 0; i < actual.size() && i < expected_lexemes.size() && i < expected_type_substrs.size(); ++i) { + EXPECT_EQ(actual[i].first, expected_lexemes[i]) << "lexeme mismatch at index " << i; + const auto& type_str = actual[i].second; + const auto& need_sub = expected_type_substrs[i]; + EXPECT_NE(type_str.find(need_sub), std::string::npos) + << i << " " << actual[i].first << " type \"" << type_str << "\" does not contain expected substring \"" + << need_sub << "\" for lexeme \"" << actual[i].first << "\""; + } + + ASSERT_EQ(actual.size(), expected_lexemes.size()); + ASSERT_EQ(actual.size(), expected_type_substrs.size()); +} diff --git a/tests/test_suites/LexerUnitTestSuite.hpp b/tests/test_suites/LexerUnitTestSuite.hpp new file mode 100644 index 0000000..ec8a932 --- /dev/null +++ b/tests/test_suites/LexerUnitTestSuite.hpp @@ -0,0 +1,23 @@ +#ifndef OVUMC_LEXERUNITTESTSUITE_HPP +#define OVUMC_LEXERUNITTESTSUITE_HPP + +#include +#include "lib/lexer/handlers/Handler.hpp" + +struct LexerUnitTestSuite : public testing::Test { + const std::string kTemporaryDirectoryName = "./gtest_tmp"; + + static std::vector> ExtractLexemesAndTypes(const std::vector& tokens); + + static void PrintLexemesAndTypes(const std::vector>& items); + + static void AssertLexemesAndTypesEqual(const std::vector>& actual, + const std::vector& expected_lexemes, + const std::vector& expected_type_substrs); + + void SetUp() override; // method that is called at the beginning of every test + + void TearDown() override; // method that is called at the end of every test +}; + +#endif // OVUMC_LEXERUNITTESTSUITE_HPP