Skip to content

Commit

Permalink
Refactor Parser
Browse files Browse the repository at this point in the history
Only the internal implementation is refactored.  Neither the exported
API nor the behavior is changed.
  • Loading branch information
TravisCardwell committed Jun 20, 2024
1 parent 7b8cbae commit 270006e
Showing 1 changed file with 58 additions and 133 deletions.
191 changes: 58 additions & 133 deletions src/LiterateX/Parser.hs
Original file line number Diff line number Diff line change
Expand Up @@ -9,17 +9,13 @@
------------------------------------------------------------------------------

{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE RecordWildCards #-}
{-# LANGUAGE OverloadedStrings #-}

module LiterateX.Parser
( -- * API
parse
) where

-- https://hackage.haskell.org/package/base
import Control.Monad (guard)
import Data.Maybe (fromMaybe)

-- https://hackage.haskell.org/package/conduit
import qualified Data.Conduit as C
import Data.Conduit (ConduitT)
Expand All @@ -46,148 +42,77 @@ parse
:: Monad m
=> SourceFormat
-> ConduitT Text SourceLine m ()
parse = parseSourceLines . parserFunctionsFor
parse sourceFormat = do
let parseLine' = parseLine sourceFormat
mLine <- C.await
case mLine of
Just line -> do
C.yield $ if "#!" `T.isPrefixOf` line
then SourceLine.Shebang line
else parseLine' line
C.awaitForever $ C.yield . parseLine'
Nothing -> return ()

------------------------------------------------------------------------------
-- $Internal

-- | Parser functions that determine how input is parsed
data ParserFunctions
= ParserFunctions
{ isCodeBlank :: !(Text -> Bool)
, isDocBlank :: !(Text -> Bool)
, isRule :: !(Text -> Bool)
, getDoc :: !(Text -> Maybe Text)
, getCode :: !(Text -> Text)
}

------------------------------------------------------------------------------

-- | Get the parser functions for the specified source format
parserFunctionsFor :: SourceFormat -> ParserFunctions
parserFunctionsFor = \case
SourceFormat.DoubleDash -> lineCommentParserFunctions '-' 2
SourceFormat.DoubleSlash -> lineCommentParserFunctions '/' 2
SourceFormat.Hash -> lineCommentParserFunctions '#' 1
SourceFormat.LiterateHaskell -> literateHaskellParserFunctions
SourceFormat.Percent -> lineCommentParserFunctions '%' 1
SourceFormat.LispSemicolons -> lispCommentParserFunctions
-- | Parse a source line according to the source format
parseLine :: SourceFormat -> Text -> SourceLine
parseLine = \case
SourceFormat.DoubleDash -> parseLineCommentLine '-' 2
SourceFormat.DoubleSlash -> parseLineCommentLine '/' 2
SourceFormat.Hash -> parseLineCommentLine '#' 1
SourceFormat.LiterateHaskell -> parseLiterateHaskellLine
SourceFormat.Percent -> parseLineCommentLine '%' 1
SourceFormat.LispSemicolons -> parseLispCommentLine

------------------------------------------------------------------------------

-- | Get parser functions for source with line-based comments
lineCommentParserFunctions
-- | Parse a source line using line-based comments
parseLineCommentLine
:: Char -- ^ comment character
-> Int -- ^ number of comment characters to create line comment
-> ParserFunctions
lineCommentParserFunctions char count = ParserFunctions{..}
where
docBlank :: Text
docBlank = T.pack $ replicate count char

prefixLen :: Int
prefixLen = count + 1

prefix :: Text
prefix = T.pack $ replicate count char ++ " "

isCodeBlank :: Text -> Bool
isCodeBlank = T.null

isDocBlank :: Text -> Bool
isDocBlank = (== docBlank)

isRule :: Text -> Bool
isRule line = T.length line > count && T.all (== char) line

getDoc :: Text -> Maybe Text
getDoc line = do
let (linePrefix, lineSuffix) = T.splitAt prefixLen line
guard $ linePrefix == prefix
pure lineSuffix

getCode :: Text -> Text
getCode = id
-> Text -- ^ source line
-> SourceLine
parseLineCommentLine char count line
| T.null line = SourceLine.CodeBlank
| otherwise = case T.uncons <$> T.span (== char) line of
("", _) -> SourceLine.Code line
(_, Nothing) -> case T.compareLength line count of
EQ -> SourceLine.DocBlank
GT -> SourceLine.Rule
LT -> SourceLine.Code line
(l, Just (' ', r)) | T.compareLength l count == EQ -> SourceLine.Doc r
_otherwise -> SourceLine.Code line

------------------------------------------------------------------------------

-- | Get parser functions for source with Lisp-style comments
-- | Parse a source line using Lisp-style comments
--
-- Lisp-style comments begin with one or more semicolons.
lispCommentParserFunctions :: ParserFunctions
lispCommentParserFunctions = ParserFunctions{..}
where
isCodeBlank :: Text -> Bool
isCodeBlank = T.null

isDocBlank :: Text -> Bool
isDocBlank line =
let len = T.length line
in len >= 1 && len <= 4 && T.all (== ';') line

isRule :: Text -> Bool
isRule line = T.length line > 4 && T.all (== ';') line

getDoc :: Text -> Maybe Text
getDoc line = do
let (linePrefix, (sep, lineSuffix)) = T.splitAt 1 <$> T.breakOn " " line
guard $ not (T.null linePrefix) && T.all (== ';') linePrefix
guard $ sep == " "
pure lineSuffix

getCode :: Text -> Text
getCode = id

------------------------------------------------------------------------------

-- | Get parser functions for parsing literate Haskell
literateHaskellParserFunctions :: ParserFunctions
literateHaskellParserFunctions = ParserFunctions{..}
where
isCodeBlank :: Text -> Bool
isCodeBlank = (== ">")

isDocBlank :: Text -> Bool
isDocBlank = T.null

isRule :: Text -> Bool
isRule = const False

getDoc :: Text -> Maybe Text
getDoc line
| "> " `T.isPrefixOf` line = Nothing
| otherwise = Just line

getCode :: Text -> Text
getCode line = fromMaybe line $ T.stripPrefix "> " line
parseLispCommentLine
:: Text -- ^ source line
-> SourceLine
parseLispCommentLine line
| T.null line = SourceLine.CodeBlank
| otherwise = case T.uncons <$> T.span (== ';') line of
("", _) -> SourceLine.Code line
(_, Nothing)
| T.compareLength line 4 == GT -> SourceLine.Rule
| otherwise -> SourceLine.DocBlank
(_, Just (' ', r)) -> SourceLine.Doc r
_otherwise -> SourceLine.Code line

------------------------------------------------------------------------------

-- | Create a "Conduit" transformer for the specified parser functions
--
-- This function produces a 'SourceLine' for each line of input. A
-- 'SourceLine.Shebang' can only be produced on the first line. Note that the
-- order that the parser functions are used is significant; the parser
-- functions are written for this order.
parseSourceLines
:: Monad m
=> ParserFunctions
-> ConduitT Text SourceLine m ()
parseSourceLines ParserFunctions{..} = do
mLine <- C.await
case mLine of
Just line -> do
C.yield $ if "#!" `T.isPrefixOf` line
then SourceLine.Shebang line
else parse' line
C.awaitForever $ C.yield . parse'
Nothing -> return ()
where
parse' :: Text -> SourceLine
parse' line
| isCodeBlank line = SourceLine.CodeBlank
| isDocBlank line = SourceLine.DocBlank
| isRule line = SourceLine.Rule
| otherwise = case (getDoc line, getCode line) of
(Just doc, _code) -> SourceLine.Doc doc
(Nothing, code) -> SourceLine.Code code
-- | Parse a Literate Haskell source line
parseLiterateHaskellLine
:: Text -- ^ source line
-> SourceLine
parseLiterateHaskellLine line = case T.uncons line of
Nothing -> SourceLine.DocBlank
Just ('>', r1) -> case T.uncons r1 of
Nothing -> SourceLine.CodeBlank
Just (' ', r2) -> SourceLine.Code r2
_otherwise -> SourceLine.Doc line
_otherwise -> SourceLine.Doc line

0 comments on commit 270006e

Please sign in to comment.