Skip to content

Commit

Permalink
#6031: Parse whitespace tokens
Browse files Browse the repository at this point in the history
  • Loading branch information
codereader committed Aug 8, 2022
1 parent e4dd890 commit 82f19cf
Show file tree
Hide file tree
Showing 2 changed files with 177 additions and 11 deletions.
148 changes: 141 additions & 7 deletions libs/parser/DefBlockSyntaxParser.h
@@ -1,8 +1,10 @@
#pragma once

#include <memory>
#include <vector>

#include "string/tokeniser.h"
#include "string/join.h"

namespace parser
{
Expand All @@ -11,14 +13,93 @@ namespace parser
// Can be of type text, whitespace or comment
struct DefSyntaxToken
{

enum class Type
{
Nothing,
Whitespace,
};

// Token type
Type type;

// The raw string as parsed from the source text
std::string value;

void clear()
{
type = Type::Nothing;
value.clear();
}
};

struct DefSyntaxNode
class DefSyntaxNode
{
public:
using Ptr = std::shared_ptr<DefSyntaxNode>;

enum class Type
{
Root,
Whitespace,
};

private:
// All child nodes of this node
std::vector<Ptr> _children;

Type _type;

public:
DefSyntaxNode(Type type) :
_type(type)
{}

Type getType() const
{
return _type;
}

const std::vector<Ptr>& getChildren() const
{
return _children;
}

void appendChildNode(Ptr&& node)
{
_children.emplace_back(node);
}

virtual std::string getString() const
{
std::string value;
value.reserve(getChildren().size() * 10);

for (const auto& child : getChildren())
{
value += child->getString();
}

return value;
}
};

class DefWhitespaceSyntax :
public DefSyntaxNode
{
private:
DefSyntaxToken _token;
public:
DefWhitespaceSyntax(const DefSyntaxToken& token) :
DefSyntaxNode(Type::Whitespace),
_token(token)
{
assert(token.type == DefSyntaxToken::Type::Whitespace);
}

std::string getString() const override
{
return _token.value;
}
};

struct DefSyntaxTree
Expand All @@ -40,7 +121,8 @@ class DefSyntaxTokeniserFunc
// Enumeration of states
enum class State
{
None, // haven't found anything yet
Searching, // haven't found anything yet
Whitespace, // on whitespace
} _state;

constexpr static const char* const Delims = " \t\n\v\r";
Expand All @@ -61,7 +143,7 @@ class DefSyntaxTokeniserFunc

public:
DefSyntaxTokeniserFunc() :
_state(State::None)
_state(State::Searching)
{}

/**
Expand All @@ -74,8 +156,44 @@ class DefSyntaxTokeniserFunc
template<typename InputIterator>
bool operator() (InputIterator& next, const InputIterator& end, DefSyntaxToken& tok)
{
// Return true if we have found a named block
return false;
// Initialise state, no persistence between calls
_state = State::Searching;

// Clear out the token, no guarantee that it is empty
tok.clear();

while (next != end)
{
char ch = *next;

switch (_state)
{
case State::Searching:
if (IsWhitespace(ch))
{
_state = State::Whitespace;
tok.type = DefSyntaxToken::Type::Whitespace;
tok.value += ch;
++next;
continue;
}
break;
case State::Whitespace:
if (IsWhitespace(ch))
{
tok.value += ch;
++next;
continue;
}

// Ran out of whitespace, return token
return true;
break;
}
}

// Return true if we have found a non-empty token
return !tok.value.empty();
}
};

Expand Down Expand Up @@ -108,7 +226,23 @@ class DefBlockSyntaxParser
// The returned syntax tree reference is never null
DefSyntaxTree::Ptr parse()
{
return std::make_shared<DefSyntaxTree>();
auto syntaxTree = std::make_shared<DefSyntaxTree>();

syntaxTree->root = std::make_shared<DefSyntaxNode>(DefSyntaxNode::Type::Root);

while (!_tokIter.isExhausted())
{
auto token = *_tokIter++;

switch (token.type)
{
case DefSyntaxToken::Type::Whitespace:
syntaxTree->root->appendChildNode(std::make_shared<DefWhitespaceSyntax>(token));
break;
}
}

return syntaxTree;
}
};

Expand Down
40 changes: 36 additions & 4 deletions test/DefBlockSyntaxParser.cpp
Expand Up @@ -5,14 +5,46 @@
namespace test
{


namespace
{

inline parser::DefSyntaxTree::Ptr parseText(const std::string& text)
{
parser::DefBlockSyntaxParser<std::string> parser(text);
return parser.parse();
}

}

TEST(DefBlockSyntaxParser, EmptyText)
{
std::string testString = "";
auto syntaxTree = parseText("");

EXPECT_TRUE(syntaxTree) << "Syntax Root must not be null";
}

TEST(DefBlockSyntaxParser, Whitespace)
{
auto syntaxTree = parseText(" ");
EXPECT_EQ(syntaxTree->root->getChildren().size(), 1) << "Expected 1 whitespace node";
EXPECT_EQ(syntaxTree->root->getChildren().front()->getType(), parser::DefSyntaxNode::Type::Whitespace);
EXPECT_EQ(syntaxTree->root->getChildren().front()->getString(), " ");

syntaxTree = parseText("\n\n");
EXPECT_EQ(syntaxTree->root->getChildren().size(), 1) << "Expected 1 whitespace node";
EXPECT_EQ(syntaxTree->root->getChildren().front()->getType(), parser::DefSyntaxNode::Type::Whitespace);
EXPECT_EQ(syntaxTree->root->getChildren().front()->getString(), "\n\n");

parser::DefBlockSyntaxParser<std::string> parser(testString);
auto root = parser.parse();
syntaxTree = parseText("\t \t");
EXPECT_EQ(syntaxTree->root->getChildren().size(), 1) << "Expected 1 whitespace node";
EXPECT_EQ(syntaxTree->root->getChildren().front()->getType(), parser::DefSyntaxNode::Type::Whitespace);
EXPECT_EQ(syntaxTree->root->getChildren().front()->getString(), "\t \t");

EXPECT_TRUE(root) << "Syntax Root must not be null";
syntaxTree = parseText("\r\n \r\n");
EXPECT_EQ(syntaxTree->root->getChildren().size(), 1) << "Expected 1 whitespace node";
EXPECT_EQ(syntaxTree->root->getChildren().front()->getType(), parser::DefSyntaxNode::Type::Whitespace);
EXPECT_EQ(syntaxTree->root->getChildren().front()->getString(), "\r\n \r\n");
}

}

0 comments on commit 82f19cf

Please sign in to comment.