#ifndef HONGMINI_HPP
#define HONGMINI_HPP
#include <string>
#include <sstream>
#include <iterator>
#include <vector>
namespace hongmini {
/** \brief Constant characters for tokenization.
*
* \tparam Character type. Usually char or wchar_t.
*/
template<typename Char>
struct token_chars {
typedef Char char_type;
typedef std::basic_string<Char> string_type;
static const Char* whitespaces() { return " \t\r\n"; }
static const string_type definition() { return "<-"; }
static const string_type comma() { return ","; }
static const string_type attribute() { return "."; }
static const string_type colon() { return ":"; }
static const string_type terminator() { return ";"; }
static const string_type parenthesis_begin() { return "("; }
static const string_type parenthesis_end() { return ")"; }
static const string_type curlybracket_begin() { return "{"; }
static const string_type curlybracket_end() { return "}"; }
static const string_type list_literal_begin() { return "["; }
static const string_type list_literal_end() { return "]"; }
static const string_type string_literal() { return "\""; }
static const std::vector<string_type> operators() {
std::vector<string_type> ops(11);
ops[0] = definition();
ops[1] = comma();
ops[2] = attribute();
ops[3] = colon();
ops[4] = terminator();
ops[5] = parenthesis_begin();
ops[6] = parenthesis_end();
ops[7] = curlybracket_begin();
ops[8] = curlybracket_end();
ops[9] = list_literal_begin();
ops[10] = list_literal_end();
return ops;
}
};
/** \brief Anonymous namespace for Hongmini internals.
*
* Anonymous namespace that contains internal functions.
*/
namespace {
/** \brief Pushes tokens from the stream into the output iterator.
*
* Pushes tokens from the std::stringstream into the output iterator.
* Given std::stringstream becomes flush and empty.
* Output iterator steps forward.
*
* \tparam std::stringstream or std::wstringstream.
* \tparam Output iterator.
* \param An input string stream.
* \param An output iterator.
*/
template<typename Stream, typename OutputIter>
inline void _push_token(Stream& ss, OutputIter& out) {
typedef std::basic_string<typename Stream::char_type> String;
const String str(ss.str());
if (str.empty()) return;
*out = str;
++out;
ss.clear();
ss.str(String());
}
/** \brief Returns true if a superstring ends with a substring. */
template<typename String>
inline bool _endswith(const String& super, const String& sub) {
for (typename String::const_reverse_iterator super_it(super.rbegin()),
sub_it(sub.rbegin());
sub_it != sub.rend(); ++super_it, ++sub_it) {
if (super_it == super.rend() || *super_it != *sub_it) return false;
}
return true;
}
}
/** \brief Tokenizes a source code.
*
* Creates tokens from the source code.
*/
template<typename InputIter, typename OutputIter>
inline void tokenize(InputIter begin, InputIter end, OutputIter out) {
typedef typename std::iterator_traits<InputIter>::value_type Char;
typedef std::basic_string<Char> String;
typedef token_chars<Char> TkChars;
std::basic_ostringstream<Char> ss;
const String str_literal(TkChars::string_literal());
bool literal(false);
for (InputIter i(begin); i != end; ++i) {
// string literals (end)
if (literal) {
ss << *i;
const String buf(ss.str());
if (buf.size() > str_literal.size() && _endswith(buf, str_literal)) {
_push_token(ss, out);
}
continue;
}
// whitespaces
bool whitespace(false);
for (const Char* wi(TkChars::whitespaces()); *wi; ++wi) {
if (*i == *wi) {
whitespace = true;
break;
}
}
if (whitespace) {
_push_token(ss, out);
continue;
}
ss << *i;
const String buffer(ss.str());
// string literals (begin)
if (_endswith(buffer, TkChars::string_literal())) {
ss.str(buffer.substr(0, buffer.size() - str_literal.size()));
_push_token(ss, out);
ss << str_literal;
literal = true;
continue;
}
// operators
std::vector<String> ops(TkChars::operators());
bool matched(false);
for (typename std::vector<String>::iterator opit(ops.begin());
opit != ops.end(); ++opit) {
const String op(*opit);
if (!_endswith(buffer, op)) continue;
ss.str(buffer.substr(0, buffer.size() - op.size()));
_push_token(ss, out);
ss << op;
_push_token(ss, out);
matched = true;
break;
}
if (matched) continue;
}
_push_token(ss, out);
}
}
#endif