-
Notifications
You must be signed in to change notification settings - Fork 463
/
Lexer.hs
161 lines (143 loc) · 5.15 KB
/
Lexer.hs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RecordWildCards #-}
{-# LANGUAGE TypeFamilies #-}
module PlutusCore.Parser.Lexer where
import PlutusCore qualified as PLC
import PlutusCore.Parser.ParserCommon as PLC (Parser)
import PlutusCore.Parser.Type as LT
import PlutusPrelude (NonEmpty ((:|)), Pretty (pretty), Render (render))
import Data.List qualified as DL
import Data.List.NonEmpty qualified as NE
import Data.Proxy (Proxy (..))
import Data.Set qualified as Set
import PlutusCore.Parser.Type (Keyword (KwIFix, KwLam))
import Text.Megaparsec
import Text.Megaparsec.Byte
import Text.Megaparsec.Char
import Text.Megaparsec.Char.Lexer qualified as L
{- Note [Keywords] This version of the lexer relaxes the syntax so that keywords
(con, lam, ...) and built-in names can be re-used as variable names, reducing
the risk of textual source code being unparsable due to illegal names (produced
by a compiler, for example). To achieve this, we use Alex's "start codes" which
allow you to put the lexer into modes in which only certain actions are valid.
In the PLC grammar, keywords like `abs`, `con` and so on can only occur after a
`(`, so when we see one of these we put the lexer into a special mode where
these are interpreted as keywords and converted into elements of the LexKeyword
type; having done this, we return to the initial lexer state, denoted by 0,
where we can use keywords as variable names. A similar strategy is used for
built in type names. -}
{- Note [Precedence of regular expression matches]
For reference, Section 3.2.2 of the Alex manual says "When the input stream
matches more than one rule, the rule which matches the longest prefix of the
input stream wins. If there are still several rules which match an equal
number of characters, then the rule which appears earliest in the file wins."
-}
pKeyword :: Parser Keyword
pScheme = choice
[ KwAbs <$ string "abs"
, KwLam <$ string "lam"
, KwIFix <$ string "ifix"
, KwFun <$ string "fun"
, KwAll <$ string "all"
, KwType <$ string "type"
, KwProgram <$ string "program"
, KwCon <$ string "con"
, KwIWrap <$ string "iwrap"
, KwBuiltin <$ string "builtin"
, KwUnwrap <$ string "unwrap"
, KwError <$ string "error"
, KwForce <$ string "force"
, KwDelay <$ string "delay"]
data WithPos a = WithPos
{ startPos :: SourcePos
, endPos :: SourcePos
, tokenLength :: Int
, tokenVal :: a
} deriving (Eq, Ord, Show)
data TkStream = TkStream
{ tkStreamInput :: String -- for showing offending lines
, unTkStream :: [WithPos LT.Token]
}
-- data KwStream = KwStream
-- { kwStreamInput :: String
-- , unKwStream :: [WithPos Keyword ]
-- }
instance Stream TkStream where
type Token TkStream = WithPos LT.Token
type Tokens TkStream = [WithPos LT.Token]
tokenToChunk Proxy x = [x]
tokensToChunk Proxy xs = xs
chunkToTokens Proxy = id
chunkLength Proxy = length
chunkEmpty Proxy = null
take1_ (TkStream _ []) = Nothing
take1_ (TkStream str (t:ts)) = Just
( t
, TkStream (drop (tokensLength pxy (t:|[])) str) ts
)
takeN_ n (TkStream str s)
| n <= 0 = Just ([], TkStream str s)
| null s = Nothing
| otherwise =
let (x, s') = splitAt n s
in case NE.nonEmpty x of
Nothing -> Just (x, TkStream str s')
Just nex -> Just (x, TkStream (drop (tokensLength pxy nex) str) s')
takeWhile_ f (TkStream str s) =
let (x, s') = DL.span f s
in case NE.nonEmpty x of
Nothing -> (x, TkStream str s')
Just nex -> (x, TkStream (drop (tokensLength pxy nex) str) s')
showToken :: LT.Token -> String
showToken = render . pretty
instance VisualStream TkStream where
showTokens Proxy = unwords
. NE.toList
. fmap (showToken . tokenVal)
tokensLength Proxy xs = sum (tokenLength <$> xs)
instance TraversableStream TkStream where
reachOffset o PosState {..} =
( Just (prefix ++ restOfLine)
, PosState
{ pstateInput = TkStream
{ tkStreamInput = postStr
, unTkStream = post
}
, pstateOffset = max pstateOffset o
, pstateSourcePos = newSourcePos
, pstateTabWidth = pstateTabWidth
, pstateLinePrefix = prefix
}
)
where
prefix =
if sameLine
then pstateLinePrefix ++ preStr
else preStr
sameLine = sourceLine newSourcePos == sourceLine pstateSourcePos
newSourcePos =
case post of
[] -> pstateSourcePos
(x:_) -> startPos x
(pre, post) = splitAt (o - pstateOffset) (unTkStream pstateInput)
(preStr, postStr) = splitAt tokensConsumed (tkStreamInput pstateInput)
tokensConsumed =
case NE.nonEmpty pre of
Nothing -> 0
Just nePre -> tokensLength pxy nePre
restOfLine = takeWhile (/= '\n') postStr
pxy :: Proxy TkStream
pxy = Proxy
type Parser = Parsec (PLC.ParseError SourcePos) TkStream
liftToken :: LT.Token -> WithPos LT.Token
liftToken = WithPos pos pos 0
where
pos = initialPos ""
pToken :: LT.Token -> Parser LT.Token
pToken c = token test (Set.singleton . Tokens . nes . liftToken $ c)
where
test (WithPos _ _ _ x) =
if x == c
then Just x
else Nothing
nes x = x :| []