Skip to content

Commit

Permalink
Working parser.
Browse files Browse the repository at this point in the history
  • Loading branch information
abellgithub committed Jun 9, 2020
1 parent a9b4db2 commit e06a383
Show file tree
Hide file tree
Showing 6 changed files with 384 additions and 274 deletions.
318 changes: 177 additions & 141 deletions filters/private/expr/Lexer.cpp
@@ -1,3 +1,5 @@
//ABELL
#include <iostream>

#include "Lexer.hpp"

Expand All @@ -6,183 +8,217 @@ namespace pdal
namespace expr
{

Token Lexer::get(TokenClass cls)
namespace
{
char c;
struct EofException
{};

size_t originalPos = m_pos;
while (isspace(m_buf[m_pos]))
m_pos++;
}

if (m_pos == m_buf.size())
return Token(TokenType::Eof);
char Lexer::getChar()
{
char c = 0;
if (m_pos < m_buf.size())
c = m_buf[m_pos];
m_pos++;
return c;
}

void Lexer::putChar()
{
//ABELL
if (m_pos == 0)
throw std::runtime_error("Put back a bad character.");
m_pos--;
}

Token Lexer::get()
{
char c;
Token tok;
if (cls == TokenClass::Or)
{
tok = logicalOperator();
if (tok.type() != TokenType::Or)
tok = Token(TokenType::Error);
}
else if (cls == TokenClass::And)
{
tok = logicalOperator();
if (tok.type() != TokenType::And)
tok = Token(TokenType::Error);
}
else if (cls == TokenClass::Compare)
tok = comparisonOperator();
else if (cls == TokenClass::Add)
{
tok = arithmeticOperator();
if (tok.type() != TokenType::Add && tok.type() != TokenType::Subtract)
tok = Token(TokenType::Error);
}
else if (cls == TokenClass::Multiply)
{
tok = arithmeticOperator();
if (tok.type() != TokenType::Multiply &&
tok.type() != TokenType::Divide)
tok = Token(TokenType::Error);
}
else if (cls == TokenClass::Primary)
{
tok = dimension();
if (!tok)
{
tok = number();
if (!tok)
tok = Token(TokenType::Error);
}
}
else if (cls == TokenClass::Lparen)
{
tok = misc();
if (tok.type() != TokenType::Lparen)
tok = Token(TokenType::Error);
}
else if (cls == TokenClass::Rparen)

if (m_pos >= m_buf.size())
return Token(TokenType::Eof, m_buf.size(), m_buf.size(), "");
while (true)
{
tok = misc();
if (tok.type() != TokenType::Rparen)
tok = Token(TokenType::Error);
}
m_tokPos = m_pos;
c = getChar();

if (tok)
m_pos = tok.end();
else
m_pos = originalPos;
if (std::isspace(c))
continue;

tok = top(c);
break;
}
return tok;
}

void Lexer::put(Token t)
{
m_pos = t.m_start;
}

Token Lexer::misc()
void Lexer::putEnd(Token t)
{
char c = m_buf[m_pos];
if (c == '(')
return Token(TokenType::Lparen, m_pos, m_pos + 1);
if (c == ')')
return Token(TokenType::Rparen, m_pos, m_pos + 1);
return Token(TokenType::Error);
m_pos = t.m_end;
}

Token Lexer::arithmeticOperator()
Token Lexer::top(char c)
{
char c = m_buf[m_pos];
Token tok;

//ABELL - Need to check for rparen?
if (c == '+')
{
char d = m_buf[m_pos + 1];
if (d != '+')
return Token(TokenType::Add, m_pos, m_pos + 1);
}
if (c == '-')
switch (c)
{
char d = m_buf[m_pos + 1];
if (d != '-')
return Token(TokenType::Subtract, m_pos, m_pos + 1);
case '&':
tok = ampersand();
break;
case '|':
tok = bar();
break;
case '!':
tok = exclamation();
break;
case '-':
tok = dash();
break;
case '<':
tok = less();
break;
case '>':
tok = greater();
break;
case '=':
tok = equal();
break;
case '+':
tok = Token(TokenType::Plus, m_tokPos, m_pos, "+");
break;
case '*':
tok = Token(TokenType::Asterisk, m_tokPos, m_pos, "*");
break;
case '/':
tok = Token(TokenType::Slash, m_tokPos, m_pos, "/");
break;
case '(':
tok = Token(TokenType::Lparen, m_tokPos, m_pos, "(");
break;
case ')':
tok = Token(TokenType::Rparen, m_tokPos, m_pos, ")");
break;
case '0': case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9':
tok = number();
break;
case 'a': case 'b': case 'c': case 'd': case 'e':
case 'f': case 'g': case 'h': case 'i': case 'j':
case 'k': case 'l': case 'm': case 'n': case 'o':
case 'p': case 'q': case 'r': case 's': case 't':
case 'u': case 'v': case 'w': case 'x': case 'y':
case 'z':
case 'A': case 'B': case 'C': case 'D': case 'E':
case 'F': case 'G': case 'H': case 'I': case 'J':
case 'K': case 'L': case 'M': case 'N': case 'O':
case 'P': case 'Q': case 'R': case 'S': case 'T':
case 'U': case 'V': case 'W': case 'X': case 'Y':
case 'Z':
tok = letter();
break;
default:
tok = Token(TokenType::Error, m_tokPos, m_pos, "Syntax error.");
break;
}
if (c == '/')
return Token(TokenType::Divide, m_pos, m_pos + 1);
if (c == '*')
return Token(TokenType::Multiply, m_pos, m_pos + 1);
return Token(TokenType::Error);
return tok;
}

Token Lexer::comparisonOperator()
Token Lexer::ampersand()
{
Token tok(TokenType::Error);
char c = getChar();
if (c == '&')
return Token(TokenType::And, m_tokPos, m_pos, "&&");
putChar();
return Token(TokenType::Error, m_tokPos, m_pos, "'&' invalid in this "
"context.");
}

char c = m_buf[m_pos];
if (!c)
return Token(TokenType::Error);
Token Lexer::bar()
{
char c = getChar();
if (c == '|')
return Token(TokenType::Or, m_tokPos, m_pos, "||");
putChar();
return Token(TokenType::Error, m_tokPos, m_pos, "'!' invalid in this "
"context.");
}

char d = m_buf[m_pos + 1];
if (d)
{
if (c == '=' && d == '=')
return Token(TokenType::Equal, m_pos, m_pos + 2);
if (c == '!' && d == '=')
return Token(TokenType::NotEqual, m_pos, m_pos + 2);
if (c == '>' && d == '=')
return Token(TokenType::GreaterEqual, m_pos, m_pos + 2);
if (c == '<' && d == '=')
return Token(TokenType::LessEqual, m_pos, m_pos + 2);
}
if (c == '>')
return Token(TokenType::Greater, m_pos, m_pos + 1);
if (c == '<')
return Token(TokenType::Less, m_pos, m_pos + 1);
return Token(TokenType::Error);
Token Lexer::exclamation()
{
char c = getChar();
if (c == '=')
return Token(TokenType::NotEqual, m_tokPos, m_pos, "!=");
putChar();
return Token(TokenType::Not, m_tokPos, m_pos, "!");
}

Token Lexer::logicalOperator()
Token Lexer::dash()
{
char c = m_buf[m_pos];
if (!c)
return Token(TokenType::Error);
char d = m_buf[m_pos + 1];
if (d)
{
if (c == '&' && d == '&')
return Token(TokenType::And, m_pos, m_pos + 2);
if (c == '|' && d == '|')
return Token(TokenType::Or, m_pos, m_pos + 2);
}
return Token(TokenType::Error);
char c = getChar();
putChar();
if (c != '-')
return Token(TokenType::Dash, m_tokPos, m_pos, "-");
return Token(TokenType::Error, m_tokPos, m_pos,
"Found disallowed consecutive dashes: '--'");
}

Token Lexer::equal()
{
char c = getChar();
if (c == '=')
return Token(TokenType::Equal, m_tokPos, m_pos, "==");
putChar();
return Token(TokenType::Error, m_tokPos, m_pos, "'=' invalid in this "
"context");
}

Token Lexer::less()
{
char c = getChar();
if (c == '=')
return Token(TokenType::LessEqual, m_tokPos, m_pos, "<=");
putChar();
return Token(TokenType::Less, m_tokPos, m_pos, "<");
}

Token Lexer::greater()
{
char c = getChar();
if (c == '=')
return Token(TokenType::GreaterEqual, m_tokPos, m_pos, ">=");
putChar();
return Token(TokenType::Greater, m_tokPos, m_pos, ">");
}

Token Lexer::number()
{
const char *start = m_buf.data() + m_pos;
const char *start = m_buf.data() + m_tokPos;
char *end;

double v = strtod(start, &end);
if (start == end)
return Token(TokenType::Error);
return Token(TokenType::Number, m_pos, end - m_buf.data(), v);
m_pos = end - m_buf.data();
return Token(TokenType::Number, m_tokPos, m_pos,
m_buf.substr(m_tokPos, m_pos), v);
}


Token Lexer::dimension()
Token Lexer::letter()
{
size_t end;
for (end = m_pos; end < m_buf.size(); ++end)
if (!std::isalpha((int)m_buf[end]))
break;

if (end > m_pos)
return Token(TokenType::Dimension, m_pos, end,
m_buf.substr(m_pos, end - m_pos));
return Token(TokenType::Error);

/**
size_t end = Dimension::extractName(m_buf, m_pos);
return Token(TokenType::Dimension, m_pos, m_pos + end,
m_buf.substr(m_pos, end));
**/
while (true)
{
char c = getChar();
if (!std::isalpha(c) && c != '_')
{
putChar();
return Token(TokenType::Identifier, m_tokPos, m_pos,
m_buf.substr(m_tokPos, m_pos - m_tokPos));
}
}
}

} // namespace expr
Expand Down

0 comments on commit e06a383

Please sign in to comment.