Skip to content

Commit

Permalink
refactor(lexer): ♻️ Change lexer to distinguish between manual and au…
Browse files Browse the repository at this point in the history
…tomatic semicolon tokens

Lexer now uses a `TokenLineSeparator` for whenever it auto-inserts "semicolons" rather than the normal `TokenSemicolon`
  • Loading branch information
bristermitten committed Jun 2, 2024
1 parent bda543e commit 613783b
Show file tree
Hide file tree
Showing 7 changed files with 38 additions and 21 deletions.
8 changes: 8 additions & 0 deletions src/Elara/Lexer/Token.hs
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,7 @@ data Token
| TokenUnderscore
| TokenIndent
| TokenDedent
| TokenLineSeparator
| TokenEOF
deriving (Show, Eq, Ord)

Expand Down Expand Up @@ -153,6 +154,7 @@ tokenRepr = \case
TokenUnderscore -> "_"
TokenIndent -> "<INDENT>"
TokenDedent -> "<DEDENT>"
TokenLineSeparator -> "<LINESEP>"
TokenEOF -> "<EOF>"

unsafeTokenText :: Token -> Text
Expand All @@ -171,3 +173,9 @@ unsafeTokenFloat :: Token -> Double
unsafeTokenFloat = \case
TokenFloat f -> f
t -> error ("unsafeTokenFloat: " <> show t)

isIndent :: Token -> Bool
isIndent TokenIndent = True
isIndent TokenDedent = True
isIndent TokenLineSeparator = True
isIndent _ = False
6 changes: 3 additions & 3 deletions src/Elara/Lexer/Utils.hs
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ import Elara.AST.Name (ModuleName (..))
import Elara.AST.Region (Located (Located), RealPosition (..), RealSourceRegion (..), SourceRegion (GeneratedRegion), column, line, positionToDiagnosePosition)
import Elara.Error
import Elara.Error.Codes qualified as Codes
import Elara.Lexer.Token (Lexeme, TokPosition, Token (TokenDedent, TokenIndent, TokenSemicolon))
import Elara.Lexer.Token (Lexeme, TokPosition, Token (..))
import Error.Diagnose (Marker (..), Note (..), Report (Err))
import Polysemy
import Polysemy.Error
Expand Down Expand Up @@ -149,7 +149,7 @@ startWhite _ str = do
case span (view (indent % to (> indentation))) indents of
(pre, top : xs) -> do
-- pre is all the levels that need to be closed, top is the level that we need to match
fakeClosings <- sequenceA [fake TokenDedent, fake TokenSemicolon]
fakeClosings <- sequenceA [fake TokenDedent, fake TokenLineSeparator]
if top ^. indent == indentation
then
put
Expand All @@ -160,7 +160,7 @@ startWhite _ str = do
else throw (TooMuchIndentation top (viaNonEmpty last $ init indents) indentation s)
(_, []) -> error (" Indent stack contains nothing greater than " <> show indentation)
pure Nothing
EQ -> Just <$> fake TokenSemicolon
EQ -> Just <$> fake TokenLineSeparator

-- Insert dedent for any leftover unclosed indents
cleanIndentation :: LexMonad [Lexeme]
Expand Down
2 changes: 1 addition & 1 deletion src/Elara/Parse.hs
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ parse p path = fromEither . first WParseErrorBundle . runParser p path
type ParsePipelineEffects = '[Error (WParseErrorBundle TokenStream ElaraParseError)]

createTokenStream :: String -> [Lexeme] -> TokenStream
createTokenStream = TokenStream
createTokenStream i tokens = TokenStream i tokens False

parsePipeline ::
Members ParsePipelineEffects r =>
Expand Down
4 changes: 2 additions & 2 deletions src/Elara/Parse/Expression.hs
Original file line number Diff line number Diff line change
Expand Up @@ -182,10 +182,10 @@ ifElse :: Parser FrontendExpr
ifElse = locatedExpr $ do
token_ TokenIf
condition <- exprParser
_ <- optional (token_ TokenSemicolon)
_ <- optional lineSeparator
token_ TokenThen
thenBranch <- exprBlock element
_ <- optional (token_ TokenSemicolon)
_ <- optional lineSeparator
token_ TokenElse
elseBranch <- exprBlock element
pure (If condition thenBranch elseBranch)
Expand Down
6 changes: 5 additions & 1 deletion src/Elara/Parse/Indents.hs
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,10 @@ import Elara.Parse.Combinators (sepEndBy1')
import Elara.Parse.Primitives (Parser, token_)

import Text.Megaparsec (try)
import Text.Megaparsec.Debug

lineSeparator :: Parser ()
lineSeparator = token_ TokenLineSeparator <|> token_ TokenSemicolon

indentToken :: Parser ()
indentToken = token_ TokenIndent <|> token_ TokenLeftBrace
Expand All @@ -23,7 +27,7 @@ block mergeFunction single exprParser = try singleBlock <|> wholeBlock
singleBlock = single <$> exprParser
wholeBlock = do
indentToken
exprs <- sepEndBy1' exprParser (token_ TokenSemicolon)
exprs <- sepEndBy1' exprParser lineSeparator
dedentToken
pure $ mergeFunction exprs

Expand Down
14 changes: 8 additions & 6 deletions src/Elara/Parse/Module.hs
Original file line number Diff line number Diff line change
Expand Up @@ -12,16 +12,18 @@ import Elara.Parse.Names (opName, varName)
import Elara.Parse.Names qualified as Parse (moduleName)
import Elara.Parse.Primitives

import Elara.Parse.Indents (lineSeparator)
import Text.Megaparsec (MonadParsec (..), PosState (pstateSourcePos), SourcePos (sourceName), State (statePosState), sepEndBy)
import Text.Megaparsec.Debug

module' :: Parser (Module 'Frontend)
module' = fmapLocated Module $ do
mHeader <- optional (header <* optional (token_ TokenSemicolon))
mHeader <- optional (header <* optional lineSeparator)
thisFile <- sourceName . pstateSourcePos . statePosState <$> getParserState
let _name = maybe (Located (GeneratedRegion thisFile) (ModuleName ("Main" :| []))) fst mHeader
imports <- sepEndBy import' (token_ TokenSemicolon)
_ <- optional (token_ TokenSemicolon)
declarations <- sepEndBy (declaration _name) (token_ TokenSemicolon)
imports <- sepEndBy import' lineSeparator

declarations <- sepEndBy (declaration _name) lineSeparator

pure $
Module'
Expand Down Expand Up @@ -58,9 +60,9 @@ exposition = exposedValue <|> exposedOp

import' :: Parser (Import 'Frontend)
import' = fmapLocated Import $ do
token_ TokenImport
dbg "import'" $ token_ TokenImport

moduleName' <- located Parse.moduleName
moduleName' <- dbg "mn" $ located Parse.moduleName
isQualified <- isJust <$> optional (token_ TokenQualified)
as <- optional . located $ do
token_ TokenAs
Expand Down
19 changes: 11 additions & 8 deletions src/Elara/Parse/Stream.hs
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ import Text.Megaparsec
data TokenStream = TokenStream
{ tokenStreamInput :: !String
, tokenStreamTokens :: ![Lexeme]
, skipIndents :: Bool
}
deriving (Show, Eq)

Expand All @@ -27,23 +28,25 @@ instance Stream TokenStream where
chunkLength Proxy = length
chunkEmpty Proxy = null
take1_ :: TokenStream -> Maybe (Text.Megaparsec.Token TokenStream, TokenStream)
take1_ (TokenStream _ []) = Nothing
take1_ (TokenStream str (t : ts)) = Just (t, TokenStream (drop (tokensLength (Proxy @TokenStream) (t :| [])) str) ts)
takeN_ n (TokenStream str s)
| n <= 0 = Just ([], TokenStream str s)
take1_ (TokenStream _ [] _) = Nothing
take1_ (TokenStream str (Located _ t : ts) skipIndents@True) | isIndent t = take1_ (TokenStream str ts skipIndents)
take1_ (TokenStream str (t : ts) skipIndents) =
Just (t, TokenStream (drop (tokensLength (Proxy @TokenStream) (t :| [])) str) ts skipIndents)
takeN_ n (TokenStream str s skipIndents)
| n <= 0 = Just ([], TokenStream str s skipIndents)
| null s = Nothing
| otherwise -- repeatedly call take1_ until it returns Nothing
=
let (x, s') = takeWhile_ (const True) (TokenStream str s)
let (x, s') = takeWhile_ (const True) (TokenStream str s skipIndents)
in case takeN_ (n - length x) s' of
Nothing -> Nothing
Just (xs, s'') -> Just (x ++ xs, s'')

takeWhile_ f (TokenStream str s) =
takeWhile_ f (TokenStream str s skipIndents) =
let (x, s') = span f s
in case nonEmpty x of
Nothing -> (x, TokenStream str s')
Just nex -> (x, TokenStream (drop (tokensLength (Proxy @TokenStream) nex) str) s')
Nothing -> (x, TokenStream str s' skipIndents)
Just nex -> (x, TokenStream (drop (tokensLength (Proxy @TokenStream) nex) str) s' skipIndents)

instance VisualStream TokenStream where
showTokens Proxy =
Expand Down

0 comments on commit 613783b

Please sign in to comment.