Refactor Parser

Only the internal implementation is refactored. Neither the exported API nor the behavior is changed.
ExtremaIS · Jun 20, 2024 · 270006e · 270006e
1 parent 7b8cbae
commit 270006e
Showing 1 changed file with 58 additions and 133 deletions.
diff --git a/src/LiterateX/Parser.hs b/src/LiterateX/Parser.hs
@@ -9,17 +9,13 @@
 ------------------------------------------------------------------------------
 
 {-# LANGUAGE LambdaCase #-}
-{-# LANGUAGE RecordWildCards #-}
+{-# LANGUAGE OverloadedStrings #-}
 
 module LiterateX.Parser
   ( -- * API
     parse
   ) where
 
--- https://hackage.haskell.org/package/base
-import Control.Monad (guard)
-import Data.Maybe (fromMaybe)
-
 -- https://hackage.haskell.org/package/conduit
 import qualified Data.Conduit as C
 import Data.Conduit (ConduitT)
@@ -46,148 +42,77 @@ parse
   :: Monad m
   => SourceFormat
   -> ConduitT Text SourceLine m ()
-parse = parseSourceLines . parserFunctionsFor
+parse sourceFormat = do
+    let parseLine' = parseLine sourceFormat
+    mLine <- C.await
+    case mLine of
+      Just line -> do
+        C.yield $ if "#!" `T.isPrefixOf` line
+          then SourceLine.Shebang line
+          else parseLine' line
+        C.awaitForever $ C.yield . parseLine'
+      Nothing -> return ()
 
 ------------------------------------------------------------------------------
 -- $Internal
 
--- | Parser functions that determine how input is parsed
-data ParserFunctions
-  = ParserFunctions
-    { isCodeBlank :: !(Text -> Bool)
-    , isDocBlank  :: !(Text -> Bool)
-    , isRule      :: !(Text -> Bool)
-    , getDoc      :: !(Text -> Maybe Text)
-    , getCode     :: !(Text -> Text)
-    }
-
-------------------------------------------------------------------------------
-
--- | Get the parser functions for the specified source format
-parserFunctionsFor :: SourceFormat -> ParserFunctions
-parserFunctionsFor = \case
-    SourceFormat.DoubleDash      -> lineCommentParserFunctions '-' 2
-    SourceFormat.DoubleSlash     -> lineCommentParserFunctions '/' 2
-    SourceFormat.Hash            -> lineCommentParserFunctions '#' 1
-    SourceFormat.LiterateHaskell -> literateHaskellParserFunctions
-    SourceFormat.Percent         -> lineCommentParserFunctions '%' 1
-    SourceFormat.LispSemicolons  -> lispCommentParserFunctions
+-- | Parse a source line according to the source format
+parseLine :: SourceFormat -> Text -> SourceLine
+parseLine = \case
+    SourceFormat.DoubleDash      -> parseLineCommentLine '-' 2
+    SourceFormat.DoubleSlash     -> parseLineCommentLine '/' 2
+    SourceFormat.Hash            -> parseLineCommentLine '#' 1
+    SourceFormat.LiterateHaskell -> parseLiterateHaskellLine
+    SourceFormat.Percent         -> parseLineCommentLine '%' 1
+    SourceFormat.LispSemicolons  -> parseLispCommentLine
 
 ------------------------------------------------------------------------------
 
--- | Get parser functions for source with line-based comments
-lineCommentParserFunctions
+-- | Parse a source line using line-based comments
+parseLineCommentLine
   :: Char  -- ^ comment character
   -> Int   -- ^ number of comment characters to create line comment
-  -> ParserFunctions
-lineCommentParserFunctions char count = ParserFunctions{..}
-  where
-    docBlank :: Text
-    docBlank = T.pack $ replicate count char
-
-    prefixLen :: Int
-    prefixLen = count + 1
-
-    prefix :: Text
-    prefix = T.pack $ replicate count char ++ " "
-
-    isCodeBlank :: Text -> Bool
-    isCodeBlank = T.null
-
-    isDocBlank :: Text -> Bool
-    isDocBlank = (== docBlank)
-
-    isRule :: Text -> Bool
-    isRule line = T.length line > count && T.all (== char) line
-
-    getDoc :: Text -> Maybe Text
-    getDoc line = do
-      let (linePrefix, lineSuffix) = T.splitAt prefixLen line
-      guard $ linePrefix == prefix
-      pure lineSuffix
-
-    getCode :: Text -> Text
-    getCode = id
+  -> Text  -- ^ source line
+  -> SourceLine
+parseLineCommentLine char count line
+    | T.null line = SourceLine.CodeBlank
+    | otherwise = case T.uncons <$> T.span (== char) line of
+        ("", _) -> SourceLine.Code line
+        (_, Nothing) -> case T.compareLength line count of
+          EQ -> SourceLine.DocBlank
+          GT -> SourceLine.Rule
+          LT -> SourceLine.Code line
+        (l, Just (' ', r)) | T.compareLength l count == EQ -> SourceLine.Doc r
+        _otherwise -> SourceLine.Code line
 
 ------------------------------------------------------------------------------
 
--- | Get parser functions for source with Lisp-style comments
+-- | Parse a source line using Lisp-style comments
 --
 -- Lisp-style comments begin with one or more semicolons.
-lispCommentParserFunctions :: ParserFunctions
-lispCommentParserFunctions = ParserFunctions{..}
-  where
-    isCodeBlank :: Text -> Bool
-    isCodeBlank = T.null
-
-    isDocBlank :: Text -> Bool
-    isDocBlank line =
-      let len = T.length line
-      in  len >= 1 && len <= 4 && T.all (== ';') line
-
-    isRule :: Text -> Bool
-    isRule line = T.length line > 4 && T.all (== ';') line
-
-    getDoc :: Text -> Maybe Text
-    getDoc line = do
-      let (linePrefix, (sep, lineSuffix)) = T.splitAt 1 <$> T.breakOn " " line
-      guard $ not (T.null linePrefix) && T.all (== ';') linePrefix
-      guard $ sep == " "
-      pure lineSuffix
-
-    getCode :: Text -> Text
-    getCode = id
-
-------------------------------------------------------------------------------
-
--- | Get parser functions for parsing literate Haskell
-literateHaskellParserFunctions :: ParserFunctions
-literateHaskellParserFunctions = ParserFunctions{..}
-  where
-    isCodeBlank :: Text -> Bool
-    isCodeBlank = (== ">")
-
-    isDocBlank :: Text -> Bool
-    isDocBlank = T.null
-
-    isRule :: Text -> Bool
-    isRule = const False
-
-    getDoc :: Text -> Maybe Text
-    getDoc line
-      | "> " `T.isPrefixOf` line = Nothing
-      | otherwise                = Just line
-
-    getCode :: Text -> Text
-    getCode line = fromMaybe line $ T.stripPrefix "> " line
+parseLispCommentLine
+  :: Text  -- ^ source line
+  -> SourceLine
+parseLispCommentLine line
+    | T.null line = SourceLine.CodeBlank
+    | otherwise = case T.uncons <$> T.span (== ';') line of
+        ("", _) -> SourceLine.Code line
+        (_, Nothing)
+          | T.compareLength line 4 == GT -> SourceLine.Rule
+          | otherwise -> SourceLine.DocBlank
+        (_, Just (' ', r)) -> SourceLine.Doc r
+        _otherwise -> SourceLine.Code line
 
 ------------------------------------------------------------------------------
 
--- | Create a "Conduit" transformer for the specified parser functions
---
--- This function produces a 'SourceLine' for each line of input.  A
--- 'SourceLine.Shebang' can only be produced on the first line.  Note that the
--- order that the parser functions are used is significant; the parser
--- functions are written for this order.
-parseSourceLines
-  :: Monad m
-  => ParserFunctions
-  -> ConduitT Text SourceLine m ()
-parseSourceLines ParserFunctions{..} = do
-    mLine <- C.await
-    case mLine of
-      Just line -> do
-        C.yield $ if "#!" `T.isPrefixOf` line
-          then SourceLine.Shebang line
-          else parse' line
-        C.awaitForever $ C.yield . parse'
-      Nothing -> return ()
-  where
-    parse' :: Text -> SourceLine
-    parse' line
-      | isCodeBlank line = SourceLine.CodeBlank
-      | isDocBlank line  = SourceLine.DocBlank
-      | isRule line      = SourceLine.Rule
-      | otherwise        = case (getDoc line, getCode line) of
-          (Just doc, _code) -> SourceLine.Doc doc
-          (Nothing,  code)  -> SourceLine.Code code
+-- | Parse a Literate Haskell source line
+parseLiterateHaskellLine
+  :: Text  -- ^ source line
+  -> SourceLine
+parseLiterateHaskellLine line = case T.uncons line of
+    Nothing -> SourceLine.DocBlank
+    Just ('>', r1) -> case T.uncons r1 of
+      Nothing -> SourceLine.CodeBlank
+      Just (' ', r2) -> SourceLine.Code r2
+      _otherwise -> SourceLine.Doc line
+    _otherwise -> SourceLine.Doc line