/
BasicScanner.h
executable file
·183 lines (129 loc) · 4.87 KB
/
BasicScanner.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
/**************************************************************************
BasicScanner - abstract base class for a reader/lexer for parsing
Credits:
- By Paul-Michael Agapow, 2003, Dept. Biology, University College
London, London WC1E 6BT, UK.
- <mail://p.agapow@ucl.ac.uk> <http://www.bio.ic.ac.uk/evolve/>
About:
- Aka reader, lexer, scanner, source iterator. Enables a source to be
broken up into tokens or logical units.
- Derive from this to make scanners that handle various input sources,
like streams, fstreams, arrays, buffers etc.
- Actually we differentiate between this and a lexer, because a lexer
understands something of the syntax of the parsed langauge
**************************************************************************/
#ifndef BASICSCANNER_H
#define BASICSCANNER_H
// *** INCLUDES
#include "Sbl.h"
#include "StringUtils.h"
#include <string>
#include <vector>
SBL_NAMESPACE_START
// *** CONSTANTS & DEFINES
enum eoln_t
{
kEoln_Unknown,
kEoln_Dos,
kEoln_Mac,
kEoln_Unix
};
typedef int posn_t;
// Format codes
const char kFormatTab = 't';
const char kFormatNotTab = 'T';
const char kFormatNumber = 'n';
const char kFormatEoln = 'l';
const bool kScan_Eat = true;
const bool kScan_DontEat = false;
const bool kScan_SkipSpace = true;
const bool kScan_DontSkipSpace = false;
const posn_t kScan_SrcBegin = 0;
const posn_t kScan_SrcEnd = -1;
// *** CLASS DECLARATION *************************************************/
class BasicScanner
{
public:
// LIFECYCLE
BasicScanner ()
: mStartComment ("/*"),
mStopComment ("*/"),
mLineComment ("//"),
mMeta ("#"),
mSpace (" \r\n\t"),
mEolnType (kEoln_Unknown),
mCommentsAreSpace (true)
{}
virtual ~BasicScanner () {};
// ACCESS
void SetComments
( const char* iStartToken, const char* iStopToken )
{ mStartComment = iStartToken; mStopComment = iStopToken; }
void SetLineComment ( const char* iLineToken )
{ mLineComment = iLineToken; }
void SetMeta ( const char* iMetaToken )
{ mMeta = iMetaToken; }
void SetSpace ( const char* iCharSet )
{ mSpace = iCharSet; }
virtual UInt GetLineIndex ();
// SERVICES
// !! Primitives - low level, must be overridden in derived class.
// !! Do not access these directly.
// Get Char: get next raw char from source, return sucess, CountLines()
virtual bool GetChar (char& ch) = 0;
// Go To: move to given posn, 0=start, -1=end, adjust line count
virtual posn_t Goto (posn_t iPosn) = 0;
// Get Posn: grab current position for later use in GoTo()
virtual posn_t GetPosn () = 0;
// Cast as bool: is there anything left in the source?
virtual operator bool () const = 0;
// !! Low Level Services - for infrequent but direct access
bool ReadChar (char& oCurrChar);
virtual bool PeekChar (char& ch);
virtual posn_t Rewind ();
virtual posn_t Wind ();
virtual eoln_t DetectEoln ();
// !! High Level Services
// Consume functions read characters until they encounter one that is
// not in their set, then rollback to just before it
char ConsumeWhile (const char *iCharSet);
void ConsumeUntil (const char *iCharSet);
void ConsumeUntilToken (const char *iToken, bool iEatToken = kScan_DontEat);
char ConsumeSpace ();
void ConsumeLine ();
void ReadCharThrow (char& ch, bool iSkipSpace = true);
bool ReadCharSkipSpace (char& ch);
void ReadFormat (std::string& oToken, const char* ikFormat);
void ReadFormat (std::vector<std::string>& oTokenVector, const char* ikFormat);
void ReadOne (std::string& ioToken, const char* iCharSet);
void ReadOneOrNone (std::string& ioToken, const char* iCharSet);
char Read (std::string& ioToken, const char* iDelimiters, bool iEatDelim = kScan_DontEat);
void ReadExpected (const char* iExpectedCStr);
char ReadToken (std::string& ioToken, const char* iDelimiters = "",
bool iEatDelim = kScan_DontEat);
void ReadLine (std::string& ioLine, bool iEatSpace = false);
void ReadLine (std::string& ioLine, const char* iDelimiters, bool iEatSpace = false);
void ReadWhile (std::string& ioToken, const char* iCharSet);
char ReadUntil (std::string& ioToken, const char* iCharSet, bool iEatDelimiter = false);
void ReadNumberToken (std::string& ioToken);
void ReadIntToken (std::string& ioToken);
void UnreadToken (std::string& iToken);
void PeekToken (std::string& oToken);
int ReadInt ();
double ReadDbl ();
// DEPRECIATED & DEBUG
virtual void UnreadChar (char ch) = 0;
// INTERNALS
private:
// format parameters
std::string mStartComment, mStopComment, mLineComment, mMeta, mSpace;
eoln_t mEolnType;
bool mCommentsAreSpace;
// helper functions
bool isMemberOf (const char* ikCharSet, char iTestChar);
void SkipWhileComment ();
bool IsCommentDelim (std::string& theDelim);
};
SBL_NAMESPACE_STOP
#endif
// *** END ***************************************************************/