-
Notifications
You must be signed in to change notification settings - Fork 82
/
Copy pathhaskell.x
175 lines (142 loc) · 4.28 KB
/
haskell.x
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
--
-- Lexical syntax for Haskell 98.
--
-- (c) Simon Marlow 2003, with the caveat that much of this is
-- translated directly from the syntax in the Haskell 98 report.
--
-- This isn't a complete Haskell 98 lexer - it doesn't handle layout
-- for one thing. However, it could be adapted with a small
-- amount of effort.
--
{
module Main (main) where
import Data.Char (chr)
}
%wrapper "monad"
$whitechar = [ \t\n\r\f\v]
$special = [\(\)\,\;\[\]\`\{\}]
$ascdigit = 0-9
$unidigit = [] -- TODO
$digit = [$ascdigit $unidigit]
$ascsymbol = [\!\#\$\%\&\*\+\.\/\<\=\>\?\@\\\^\|\-\~]
$unisymbol = [] -- TODO
$symbol = [$ascsymbol $unisymbol] # [$special \_\:\"\']
$large = [A-Z \xc0-\xd6 \xd8-\xde]
$small = [a-z \xdf-\xf6 \xf8-\xff \_]
$alpha = [$small $large]
$graphic = [$small $large $symbol $digit $special \:\"\']
$octit = 0-7
$hexit = [0-9 A-F a-f]
$idchar = [$alpha $digit \']
$symchar = [$symbol \:]
$nl = [\n\r]
@reservedid =
as|case|class|data|default|deriving|do|else|hiding|if|
import|in|infix|infixl|infixr|instance|let|module|newtype|
of|qualified|then|type|where
@reservedop =
".." | ":" | "::" | "=" | \\ | "|" | "<-" | "->" | "@" | "~" | "=>"
@varid = $small $idchar*
@conid = $large $idchar*
@varsym = $symbol $symchar*
@consym = \: $symchar*
@decimal = $digit+
@octal = $octit+
@hexadecimal = $hexit+
@exponent = [eE] [\-\+] @decimal
$cntrl = [$large \@\[\\\]\^\_]
@ascii = \^ $cntrl | NUL | SOH | STX | ETX | EOT | ENQ | ACK
| BEL | BS | HT | LF | VT | FF | CR | SO | SI | DLE
| DC1 | DC2 | DC3 | DC4 | NAK | SYN | ETB | CAN | EM
| SUB | ESC | FS | GS | RS | US | SP | DEL
$charesc = [abfnrtv\\\"\'\&]
@escape = \\ ($charesc | @ascii | @decimal | o @octal | x @hexadecimal)
@gap = \\ $whitechar+ \\
@string = $graphic # [\"\\] | " " | @escape | @gap
haskell :-
<0> $white+ { skip }
<0> "--"\-*[^$symbol].* { skip }
"{-" { nested_comment }
<0> $special { mkL LSpecial }
<0> @reservedid { mkL LReservedId }
<0> @conid \. @varid { mkL LQVarId }
<0> @conid \. @conid { mkL LQConId }
<0> @varid { mkL LVarId }
<0> @conid { mkL LConId }
<0> @reservedop { mkL LReservedOp }
<0> @conid \. @varsym { mkL LVarSym }
<0> @conid \. @consym { mkL LConSym }
<0> @varsym { mkL LVarSym }
<0> @consym { mkL LConSym }
<0> @decimal
| 0[oO] @octal
| 0[xX] @hexadecimal { mkL LInteger }
<0> @decimal \. @decimal @exponent?
| @decimal @exponent { mkL LFloat }
<0> \' ($graphic # [\'\\] | " " | @escape) \'
{ mkL LChar }
<0> \" @string* \" { mkL LString }
{
data Lexeme = L AlexPosn LexemeClass String
data LexemeClass
= LInteger
| LFloat
| LChar
| LString
| LSpecial
| LReservedId
| LReservedOp
| LVarId
| LQVarId
| LConId
| LQConId
| LVarSym
| LQVarSym
| LConSym
| LQConSym
| LEOF
deriving Eq
mkL :: LexemeClass -> AlexInput -> Int -> Alex Lexeme
mkL c (p,_,_,str) len = return (L p c (take len str))
nested_comment :: AlexInput -> Int -> Alex Lexeme
nested_comment _ _ = do
input <- alexGetInput
go 1 input
where go 0 input = do alexSetInput input; alexMonadScan
go n input = do
case alexGetByte input of
Nothing -> err input
Just (c,input) -> do
case chr (fromIntegral c) of
'-' -> do
let temp = input
case alexGetByte input of
Nothing -> err input
Just (125,input) -> go (n-1) input
Just (45, input) -> go n temp
Just (c,input) -> go n input
'\123' -> do
case alexGetByte input of
Nothing -> err input
Just (c,input) | c == fromIntegral (ord '-') -> go (n+1) input
Just (c,input) -> go n input
c -> go n input
err input = do alexSetInput input; lexError "error in nested comment"
lexError s = do
(p,c,_,input) <- alexGetInput
alexError (showPosn p ++ ": " ++ s ++
(if (not (null input))
then " before " ++ show (head input)
else " at end of file"))
scanner str = runAlex str $ do
let loop i = do tok@(L _ cl _) <- alexMonadScan;
if cl == LEOF
then return i
else do loop $! (i+1)
loop 0
alexEOF = return (L undefined LEOF "")
showPosn (AlexPn _ line col) = show line ++ ':': show col
main = do
s <- getContents
print (scanner s)
}