From 8afaa68c7b856114c01330f9a66d74c1372fd641 Mon Sep 17 00:00:00 2001
From: Ben Orchard <thefirstmuffinman@gmail.com>
Date: Thu, 20 Oct 2022 11:38:34 +0100
Subject: [PATCH 01/11] partial work supporting GHC 9.4

---
 TODO.md                                      |    1 +
 flatparse.cabal                              |   20 +-
 src/FlatParse/Basic.hs                       | 1230 +++------------
 src/FlatParse/Basic/Chars.hs                 |  171 +++
 src/FlatParse/Basic/Integers.hs              |  340 ++++
 src/FlatParse/Basic/Internal.hs              |  197 +++
 src/FlatParse/Basic/Parser.hs                |  135 ++
 src/FlatParse/Basic/Position.hs              |   71 +
 src/FlatParse/Common/Assorted.hs             |  135 ++
 src/FlatParse/Common/Numbers.hs              |   70 +
 src/FlatParse/Common/Position.hs             |   42 +
 src/FlatParse/Common/Trie.hs                 |   85 +
 src/FlatParse/Examples/BasicLambda/Lexer.hs  |   10 +-
 src/FlatParse/Examples/BasicLambda/Parser.hs |    2 +-
 src/FlatParse/Internal.hs                    |  277 ----
 src/FlatParse/Internal/UnboxedNumerics.hs    |  115 --
 src/FlatParse/Stateful.hs                    | 1453 ------------------
 test/Test.hs                                 |  272 ++--
 18 files changed, 1636 insertions(+), 2990 deletions(-)
 create mode 100644 TODO.md
 create mode 100644 src/FlatParse/Basic/Chars.hs
 create mode 100644 src/FlatParse/Basic/Integers.hs
 create mode 100644 src/FlatParse/Basic/Internal.hs
 create mode 100644 src/FlatParse/Basic/Parser.hs
 create mode 100644 src/FlatParse/Basic/Position.hs
 create mode 100644 src/FlatParse/Common/Assorted.hs
 create mode 100644 src/FlatParse/Common/Numbers.hs
 create mode 100644 src/FlatParse/Common/Position.hs
 create mode 100644 src/FlatParse/Common/Trie.hs
 delete mode 100644 src/FlatParse/Internal.hs
 delete mode 100644 src/FlatParse/Internal/UnboxedNumerics.hs
 delete mode 100644 src/FlatParse/Stateful.hs

diff --git a/TODO.md b/TODO.md
new file mode 100644
index 0000000..4501b18
--- /dev/null
+++ b/TODO.md
@@ -0,0 +1 @@
+  * add unsafeTakeBs# which doesn't assert >=0. can cause hangs so bad
diff --git a/flatparse.cabal b/flatparse.cabal
index b7f0032..4ea5335 100644
--- a/flatparse.cabal
+++ b/flatparse.cabal
@@ -1,6 +1,6 @@
 cabal-version: 1.12
 
--- This file has been generated from package.yaml by hpack version 0.34.4.
+-- This file has been generated from package.yaml by hpack version 0.35.0.
 --
 -- see: https://github.com/sol/hpack
 
@@ -44,11 +44,17 @@ flag llvm
 library
   exposed-modules:
       FlatParse.Basic
+      FlatParse.Basic.Chars
+      FlatParse.Basic.Integers
+      FlatParse.Basic.Internal
+      FlatParse.Basic.Parser
+      FlatParse.Basic.Position
+      FlatParse.Common.Assorted
+      FlatParse.Common.Numbers
+      FlatParse.Common.Position
+      FlatParse.Common.Trie
       FlatParse.Examples.BasicLambda.Lexer
       FlatParse.Examples.BasicLambda.Parser
-      FlatParse.Internal
-      FlatParse.Internal.UnboxedNumerics
-      FlatParse.Stateful
   other-modules:
       Paths_flatparse
   hs-source-dirs:
@@ -71,11 +77,11 @@ library
     , containers
     , integer-gmp
     , template-haskell
+  default-language: Haskell2010
   if flag(dump)
     ghc-options: -ddump-simpl -ddump-stg-final -ddump-cmm -dsuppress-all -dno-suppress-type-signatures -ddump-to-file
   if flag(llvm)
     ghc-options: -fllvm
-  default-language: Haskell2010
 
 test-suite spec
   type: exitcode-stdio-1.0
@@ -105,11 +111,11 @@ test-suite spec
     , flatparse
     , hspec
     , quickcheck-instances
+  default-language: Haskell2010
   if flag(dump)
     ghc-options: -ddump-simpl -ddump-stg-final -ddump-cmm -dsuppress-all -dno-suppress-type-signatures -ddump-to-file
   if flag(llvm)
     ghc-options: -fllvm
-  default-language: Haskell2010
 
 benchmark bench
   type: exitcode-stdio-1.0
@@ -146,8 +152,8 @@ benchmark bench
     , megaparsec
     , parsec
     , primitive
+  default-language: Haskell2010
   if flag(dump)
     ghc-options: -ddump-simpl -ddump-stg-final -ddump-cmm -dsuppress-all -dno-suppress-type-signatures -ddump-to-file
   if flag(llvm)
     ghc-options: -fllvm
-  default-language: Haskell2010
diff --git a/src/FlatParse/Basic.hs b/src/FlatParse/Basic.hs
index cda9867..14858ce 100644
--- a/src/FlatParse/Basic.hs
+++ b/src/FlatParse/Basic.hs
@@ -24,12 +24,12 @@ module FlatParse.Basic (
 
   -- * Errors and failures
   , failed
-  , Base.empty
+  , Control.Applicative.empty
   , err
   , lookahead
   , fails
   , try
-  , optional
+  , Control.Applicative.optional
   , optional_
   , withOption
   , cut
@@ -37,72 +37,46 @@ module FlatParse.Basic (
 
   -- * Basic lexing and parsing
   , eof
-  , takeBs
-  , takeRestBs
+  , take
+  , takeRest
   , skip
-  , char
-  , byte
-  , bytes
-  , byteString
-  , string
   , switch
   , switchWithPost
   , rawSwitchWithPost
+
+  , getCharOf
+  , getBytesOf
+  , getByteStringOf
+  , getStringOf
+
+  , getChar
+  , getChar_
+  , getCharASCII
+  , getCharASCII_
+  , getAsciiDecimalInt
+  , getAsciiDecimalInteger
+  , getAsciiHexInt
+  , getCString
+
+  , Common.isDigit
+  , Common.isGreekLetter
+  , Common.isLatinLetter
+
   , satisfy
   , satisfy_
   , satisfyASCII
   , satisfyASCII_
   , fusedSatisfy
   , fusedSatisfy_
-  , anyWord8
-  , anyWord8_
-  , anyWord16
-  , anyWord16_
-  , anyWord32
-  , anyWord32_
-  , anyWord64
-  , anyWord64_
-  , anyWord
-  , anyWord_
-  , anyInt8
-  , anyInt16
-  , anyInt32
-  , anyInt64
-  , anyInt
-  , anyChar
-  , anyChar_
-  , anyCharASCII
-  , anyCharASCII_
-  , FlatParse.Internal.isDigit
-  , FlatParse.Internal.isGreekLetter
-  , FlatParse.Internal.isLatinLetter
-  , FlatParse.Basic.readInt
-  , FlatParse.Basic.readIntHex
-  , FlatParse.Basic.readInteger
-  , anyCString
-
-  -- ** Explicit-endianness machine integers
-  , anyWord16le
-  , anyWord16be
-  , anyWord32le
-  , anyWord32be
-  , anyWord64le
-  , anyWord64be
-  , anyInt16le
-  , anyInt16be
-  , anyInt32le
-  , anyInt32be
-  , anyInt64le
-  , anyInt64be
 
   -- * Combinators
   , (<|>)
   , branch
   , chainl
   , chainr
-  , many
+  , Control.Applicative.many
   , many_
-  , some
+  , Control.Applicative.some
   , some_
   , notFollowedBy
   , isolate
@@ -130,8 +104,8 @@ module FlatParse.Basic (
   -- * Getting the rest of the input as a 'String'
   , takeLine
   , traceLine
-  , takeRest
-  , traceRest
+  , takeRestString
+  , traceRestString
 
   -- * `String` conversions
   , packUTF8
@@ -144,15 +118,6 @@ module FlatParse.Basic (
   , takeBs#
   , atSkip#
 
-  -- *** Machine integer continuation parsers
-  , withAnyWord8#
-  , withAnyWord16#
-  , withAnyWord32#
-  , withAnyWord64#
-  , withAnyInt8#
-  , withAnyInt16#
-  , withAnyInt32#
-  , withAnyInt64#
 
   -- ** Location & address primitives
   , setBack#
@@ -162,27 +127,24 @@ module FlatParse.Basic (
   , atAddr#
 
   -- ** Unsafe
-  , anyCStringUnsafe
-  , scan8#
-  , scan16#
-  , scan32#
-  , scan64#
-  , scanAny8#
-  , scanBytes#
+  , getCStringUnsafe
+
+  , module FlatParse.Basic.Integers
 
   ) where
 
-import qualified Control.Applicative as Base
+import Prelude hiding ( take, getChar )
+
+import Control.Applicative
+
 import Control.Monad
 import Data.Foldable
 import Data.List (sortBy)
 import Data.Map (Map)
 import Data.Ord (comparing)
-import Data.Word
 import GHC.Exts
 import GHC.Word
-import GHC.Int
-import GHC.ForeignPtr
+import GHC.ForeignPtr ( ForeignPtr(..) )
 import Language.Haskell.TH
 import System.IO.Unsafe
 
@@ -191,76 +153,17 @@ import qualified Data.ByteString.Unsafe as B
 import qualified Data.ByteString.Internal as B
 import qualified Data.Map.Strict as M
 
-import FlatParse.Internal
-import FlatParse.Internal.UnboxedNumerics
-
---------------------------------------------------------------------------------
-
--- | Primitive result of a parser. Possible results are given by `OK#`, `Err#` and `Fail#`
---   pattern synonyms.
-type Res# e a =
-  (#
-    (# a, Addr# #)
-  | (# #)
-  | (# e #)
-  #)
-
--- | Contains return value and a pointer to the rest of the input buffer.
-pattern OK# :: a -> Addr# -> Res# e a
-pattern OK# a s = (# (# a, s #) | | #)
-
--- | Constructor for errors which are by default non-recoverable.
-pattern Err# :: e -> Res# e a
-pattern Err# e = (# | | (# e #) #)
-
--- | Constructor for recoverable failure.
-pattern Fail# :: Res# e a
-pattern Fail# = (# | (# #) | #)
-{-# complete OK#, Err#, Fail# #-}
-
--- | @Parser e a@ has an error type @e@ and a return type @a@.
-newtype Parser e a = Parser {runParser# :: ForeignPtrContents -> Addr# -> Addr# -> Res# e a}
-
-instance Functor (Parser e) where
-  fmap f (Parser g) = Parser \fp eob s -> case g fp eob s of
-    OK# a s -> let !b = f a in OK# b s
-    x       -> unsafeCoerce# x
-  {-# inline fmap #-}
-
-  (<$) a' (Parser g) = Parser \fp eob s -> case g fp eob s of
-    OK# a s -> OK# a' s
-    x       -> unsafeCoerce# x
-  {-# inline (<$) #-}
+import qualified FlatParse.Common.Numbers as Common
+import qualified FlatParse.Common.Assorted as Common
+import FlatParse.Common.Position
+import FlatParse.Common.Trie
+import FlatParse.Common.Assorted ( packBytes, splitBytes, strToBytes, packUTF8 )
 
-instance Applicative (Parser e) where
-  pure a = Parser \fp eob s -> OK# a s
-  {-# inline pure #-}
-  Parser ff <*> Parser fa = Parser \fp eob s -> case ff fp eob s of
-    OK# f s -> case fa fp eob s of
-      OK# a s  -> let !b = f a in OK# b s
-      x        -> unsafeCoerce# x
-    x -> unsafeCoerce# x
-  {-# inline (<*>) #-}
-  Parser fa <* Parser fb = Parser \fp eob s -> case fa fp eob s of
-    OK# a s   -> case fb fp eob s of
-      OK# b s -> OK# a s
-      x -> unsafeCoerce# x
-    x -> unsafeCoerce# x
-  {-# inline (<*) #-}
-  Parser fa *> Parser fb = Parser \fp eob s -> case fa fp eob s of
-    OK# a s -> fb fp eob s
-    x       -> unsafeCoerce# x
-  {-# inline (*>) #-}
-
-instance Monad (Parser e) where
-  return = pure
-  {-# inline return #-}
-  Parser fa >>= f = Parser \fp eob s -> case fa fp eob s of
-    OK# a s -> runParser# (f a) fp eob s
-    x       -> unsafeCoerce# x
-  {-# inline (>>=) #-}
-  (>>) = (*>)
-  {-# inline (>>) #-}
+import FlatParse.Basic.Parser
+import FlatParse.Basic.Integers
+import FlatParse.Basic.Internal
+import FlatParse.Basic.Chars
+import FlatParse.Basic.Position
 
 -- | Higher-level boxed data type for parsing results.
 data Result e a =
@@ -295,35 +198,15 @@ runParser (Parser f) b@(B.PS (ForeignPtr _ fp) _ (I# len)) = unsafeDupablePerfor
         pure Fail
 {-# inlinable runParser #-}
 
--- | Run a parser on a `String` input. Reminder: @OverloadedStrings@ for `B.ByteString` does not
---   yield a valid UTF-8 encoding! For non-ASCII `B.ByteString` literal input, use `runParserS` or
---   `packUTF8` for testing.
-runParserS :: Parser e a -> String -> Result e a
-runParserS pa s = runParser pa (packUTF8 s)
-
 
 --------------------------------------------------------------------------------
 
--- | The failing parser. By default, parser choice `(<|>)` arbitrarily backtracks
---   on parser failure.
-failed :: Parser e a
-failed = Parser \fp eob s -> Fail#
-{-# inline failed #-}
-
 -- | Throw a parsing error. By default, parser choice `(<|>)` can't backtrack
 --   on parser error. Use `try` to convert an error to a recoverable failure.
 err :: e -> Parser e a
 err e = Parser \fp eob s -> Err# e
 {-# inline err #-}
 
--- | Save the parsing state, then run a parser, then restore the state.
-lookahead :: Parser e a -> Parser e a
-lookahead (Parser f) = Parser \fp eob s ->
-  case f fp eob s of
-    OK# a _ -> OK# a s
-    x       -> x
-{-# inline lookahead #-}
-
 -- | Convert a parsing failure to a success.
 fails :: Parser e a -> Parser e ()
 fails (Parser f) = Parser \fp eob s ->
@@ -340,19 +223,14 @@ try (Parser f) = Parser \fp eob s -> case f fp eob s of
   x      -> x
 {-# inline try #-}
 
--- | Convert a parsing failure to a `Maybe`. If possible, use `withOption` instead.
-optional :: Parser e a -> Parser e (Maybe a)
-optional p = (Just <$> p) <|> pure Nothing
-{-# inline optional #-}
-
 -- | Convert a parsing failure to a `()`.
 optional_ :: Parser e a -> Parser e ()
 optional_ p = (() <$ p) <|> pure ()
 {-# inline optional_ #-}
 
--- | CPS'd version of `optional`. This is usually more efficient, since it gets rid of the
---   extra `Maybe` allocation.
-withOption :: Parser e a -> (a -> Parser e b) -> Parser e b -> Parser e b
+-- | CPS'd version of `optional`. This is usually more efficient, since it gets
+--   rid of the extra `Maybe` allocation.
+withOption :: Parser e a -> (a -> Parser e r) -> Parser e r -> Parser e r
 withOption (Parser f) just (Parser nothing) = Parser \fp eob s -> case f fp eob s of
   OK# a s -> runParser# (just a) fp eob s
   Fail#   -> nothing fp eob s
@@ -389,16 +267,16 @@ eof = Parser \fp eob s -> case eqAddr# eob s of
 -- | Read the given number of bytes as a 'ByteString'.
 --
 -- Throws a runtime error if given a negative integer.
-takeBs :: Int -> Parser e B.ByteString
-takeBs (I# n#) = takeBs# n#
-{-# inline takeBs #-}
+take :: Int -> Parser e B.ByteString
+take (I# n#) = takeBs# n#
+{-# inline take #-}
 
 -- | Consume the rest of the input. May return the empty bytestring.
-takeRestBs :: Parser e B.ByteString
-takeRestBs = Parser \fp eob s ->
+takeRest :: Parser e B.ByteString
+takeRest = Parser \fp eob s ->
   let n# = minusAddr# eob s
   in  OK# (B.PS (ForeignPtr s fp) 0 (I# n#)) eob
-{-# inline takeRestBs #-}
+{-# inline takeRest #-}
 
 -- | Skip forward @n@ bytes. Fails if fewer than @n@ bytes are available.
 --
@@ -409,52 +287,13 @@ skip (I# os#) = atSkip# os# (pure ())
 
 -- | Parse a UTF-8 character literal. This is a template function, you can use it as
 --   @$(char \'x\')@, for example, and the splice in this case has type @Parser e ()@.
-char :: Char -> Q Exp
-char c = string [c]
-
--- | Read a `Word8`.
-byte :: Word8 -> Parser e ()
-byte w = ensureBytes# 1 >> scan8# w
-{-# inline byte #-}
-
--- | Read a sequence of bytes. This is a template function, you can use it as @$(bytes [3, 4, 5])@,
---   for example, and the splice has type @Parser e ()@. For a non-TH variant see 'byteString'.
-bytes :: [Word] -> Q Exp
-bytes bytes = do
-  let !len = length bytes
-  [| ensureBytes# len >> $(scanBytes# bytes) |]
-
--- | Parse a given `B.ByteString`. If the bytestring is statically known, consider using 'bytes' instead.
-byteString :: B.ByteString -> Parser e ()
-byteString (B.PS (ForeignPtr bs fcontent) _ (I# len)) =
-
-  let go64 :: Addr# -> Addr# -> Addr# -> State# RealWorld -> (# Res# e (), State# RealWorld #)
-      go64 bs bsend s w =
-        let bs' = plusAddr# bs 8# in
-        case gtAddr# bs' bsend of
-          1# -> go8 bs bsend s w
-          _  -> case eqWord# (indexWord64OffAddr# bs 0#) (indexWord64OffAddr# s 0#) of
-            1# -> go64 bs' bsend (plusAddr# s 8#) w
-            _  -> (# Fail#, w #)
-
-      go8 :: Addr# -> Addr# -> Addr# -> State# RealWorld -> (# Res# e (), State# RealWorld #)
-      go8 bs bsend s w = case ltAddr# bs bsend of
-        1# -> case eqWord8'# (indexWord8OffAddr# bs 0#) (indexWord8OffAddr# s 0#) of
-          1# -> go8 (plusAddr# bs 1#) bsend (plusAddr# s 1#) w
-          _  -> (# Fail#, w #)
-        _  -> (# OK# () s, w #)
-
-  in Parser \fp eob s -> case len <=# minusAddr# eob s of
-       1# -> runRW# \w -> case go64 bs (plusAddr# bs len) s w of
-               (# res, w #) -> case touch# fcontent w of
-                 w -> res
-       _  -> Fail#
-{-# inline byteString #-}
+getCharOf :: Char -> Q Exp
+getCharOf c = getStringOf [c]
 
 -- | Parse a UTF-8 string literal. This is a template function, you can use it as @$(string "foo")@,
 --   for example, and the splice has type @Parser e ()@.
-string :: String -> Q Exp
-string str = bytes (strToBytes str)
+getStringOf :: String -> Q Exp
+getStringOf str = getBytesOf (strToBytes str)
 
 {-|
 This is a template function which makes it possible to branch on a collection of string literals in
@@ -523,233 +362,32 @@ rawSwitchWithPost postAction cases fallback = do
   !fallback <- sequence fallback
   genTrie $! genSwitchTrie' postAction cases fallback
 
--- | Parse a UTF-8 `Char` for which a predicate holds.
-satisfy :: (Char -> Bool) -> Parser e Char
-satisfy f = Parser \fp eob s -> case runParser# anyChar fp eob s of
-  OK# c s | f c -> OK# c s
-  _             -> Fail#
-{-#  inline satisfy #-}
-
--- | Skip a UTF-8 `Char` for which a predicate holds.
-satisfy_ :: (Char -> Bool) -> Parser e ()
-satisfy_ f = Parser \fp eob s -> case runParser# anyChar fp eob s of
-  OK# c s | f c -> OK# () s
-  _             -> Fail#
-{-#  inline satisfy_ #-}
-
--- | Parse an ASCII `Char` for which a predicate holds. Assumption: the predicate must only return
---   `True` for ASCII-range characters. Otherwise this function might read a 128-255 range byte,
---   thereby breaking UTF-8 decoding.
-satisfyASCII :: (Char -> Bool) -> Parser e Char
-satisfyASCII f = Parser \fp eob s -> case eqAddr# eob s of
-  1# -> Fail#
-  _  -> case derefChar8# s of
-    c1 | f (C# c1) -> OK# (C# c1) (plusAddr# s 1#)
-       | otherwise -> Fail#
-{-#  inline satisfyASCII #-}
-
--- | Skip an ASCII `Char` for which a predicate holds. Assumption: the predicate
---   must only return `True` for ASCII-range characters.
-satisfyASCII_ :: (Char -> Bool) -> Parser e ()
-satisfyASCII_ f = Parser \fp eob s -> case eqAddr# eob s of
-  1# -> Fail#
-  _  -> case derefChar8# s of
-    c1 | f (C# c1) -> OK# () (plusAddr# s 1#)
-       | otherwise -> Fail#
-{-#  inline satisfyASCII_ #-}
-
--- | This is a variant of `satisfy` which allows more optimization. We can pick four testing
---   functions for the four cases for the possible number of bytes in the UTF-8 character. So in
---   @fusedSatisfy f1 f2 f3 f4@, if we read a one-byte character, the result is scrutinized with
---   @f1@, for two-bytes, with @f2@, and so on. This can result in dramatic lexing speedups.
---
---   For example, if we want to accept any letter, the naive solution would be to use
---   `Data.Char.isLetter`, but this accesses a large lookup table of Unicode character classes. We
---   can do better with @fusedSatisfy isLatinLetter isLetter isLetter isLetter@, since here the
---   `isLatinLetter` is inlined into the UTF-8 decoding, and it probably handles a great majority of
---   all cases without accessing the character table.
-fusedSatisfy :: (Char -> Bool) -> (Char -> Bool) -> (Char -> Bool) -> (Char -> Bool) -> Parser e Char
-fusedSatisfy f1 f2 f3 f4 = Parser \fp eob buf -> case eqAddr# eob buf of
-  1# -> Fail#
-  _  -> case derefChar8# buf of
-    c1 -> case c1 `leChar#` '\x7F'# of
-      1# | f1 (C# c1) -> OK# (C# c1) (plusAddr# buf 1#)
-         | otherwise  -> Fail#
-      _  -> case eqAddr# eob (plusAddr# buf 1#) of
-        1# -> Fail#
-        _ -> case indexCharOffAddr# buf 1# of
-          c2 -> case c1 `leChar#` '\xDF'# of
-            1# ->
-              let resc = C# (chr# (((ord# c1 -# 0xC0#) `uncheckedIShiftL#` 6#) `orI#`
-                                   (ord# c2 -# 0x80#)))
-              in case f2 resc of
-                   True -> OK# resc (plusAddr# buf 2#)
-                   _    -> Fail#
-            _ -> case eqAddr# eob (plusAddr# buf 2#) of
-              1# -> Fail#
-              _  -> case indexCharOffAddr# buf 2# of
-                c3 -> case c1 `leChar#` '\xEF'# of
-                  1# ->
-                    let resc = C# (chr# (((ord# c1 -# 0xE0#) `uncheckedIShiftL#` 12#) `orI#`
-                                         ((ord# c2 -# 0x80#) `uncheckedIShiftL#`  6#) `orI#`
-                                         (ord# c3 -# 0x80#)))
-                    in case f3 resc of
-                         True -> OK# resc (plusAddr# buf 3#)
-                         _    -> Fail#
-                  _ -> case eqAddr# eob (plusAddr# buf 3#) of
-                    1# -> Fail#
-                    _  -> case indexCharOffAddr# buf 3# of
-                      c4 ->
-                        let resc = C# (chr# (((ord# c1 -# 0xF0#) `uncheckedIShiftL#` 18#) `orI#`
-                                             ((ord# c2 -# 0x80#) `uncheckedIShiftL#` 12#) `orI#`
-                                             ((ord# c3 -# 0x80#) `uncheckedIShiftL#`  6#) `orI#`
-                                              (ord# c4 -# 0x80#)))
-                        in case f4 resc of
-                             True -> OK# resc (plusAddr# buf 4#)
-                             _    -> Fail#
-{-# inline fusedSatisfy #-}
-
--- | Skipping variant of `fusedSatisfy`.
-fusedSatisfy_ :: (Char -> Bool) -> (Char -> Bool) -> (Char -> Bool) -> (Char -> Bool) -> Parser e ()
-fusedSatisfy_ f1 f2 f3 f4 = () <$ fusedSatisfy f1 f2 f3 f4
-{-# inline fusedSatisfy_ #-}
-
--- | Parse any UTF-8-encoded `Char`.
-anyChar :: Parser e Char
-anyChar = Parser \fp eob buf -> case eqAddr# eob buf of
-  1# -> Fail#
-  _  -> case derefChar8# buf of
-    c1 -> case c1 `leChar#` '\x7F'# of
-      1# -> OK# (C# c1) (plusAddr# buf 1#)
-      _  -> case eqAddr# eob (plusAddr# buf 1#) of
-        1# -> Fail#
-        _ -> case indexCharOffAddr# buf 1# of
-          c2 -> case c1 `leChar#` '\xDF'# of
-            1# ->
-              let resc = ((ord# c1 -# 0xC0#) `uncheckedIShiftL#` 6#) `orI#`
-                          (ord# c2 -# 0x80#)
-              in OK# (C# (chr# resc)) (plusAddr# buf 2#)
-            _ -> case eqAddr# eob (plusAddr# buf 2#) of
-              1# -> Fail#
-              _  -> case indexCharOffAddr# buf 2# of
-                c3 -> case c1 `leChar#` '\xEF'# of
-                  1# ->
-                    let resc = ((ord# c1 -# 0xE0#) `uncheckedIShiftL#` 12#) `orI#`
-                               ((ord# c2 -# 0x80#) `uncheckedIShiftL#`  6#) `orI#`
-                                (ord# c3 -# 0x80#)
-                    in OK# (C# (chr# resc)) (plusAddr# buf 3#)
-                  _ -> case eqAddr# eob (plusAddr# buf 3#) of
-                    1# -> Fail#
-                    _  -> case indexCharOffAddr# buf 3# of
-                      c4 ->
-                        let resc = ((ord# c1 -# 0xF0#) `uncheckedIShiftL#` 18#) `orI#`
-                                   ((ord# c2 -# 0x80#) `uncheckedIShiftL#` 12#) `orI#`
-                                   ((ord# c3 -# 0x80#) `uncheckedIShiftL#`  6#) `orI#`
-                                    (ord# c4 -# 0x80#)
-                        in OK# (C# (chr# resc)) (plusAddr# buf 4#)
-{-# inline anyChar #-}
-
--- | Skip any UTF-8-encoded `Char`.
-anyChar_ :: Parser e ()
-anyChar_ = Parser \fp eob buf -> case eqAddr# eob buf of
-  1# -> Fail#
-  _  -> case derefChar8# buf of
-    c1 -> case c1 `leChar#` '\x7F'# of
-      1# -> OK# () (plusAddr# buf 1#)
-      _  ->
-        let buf' =
-              case c1 `leChar#` '\xDF'# of
-                1# -> plusAddr# buf 2#
-                _  -> case c1 `leChar#` '\xEF'# of
-                    1# -> plusAddr# buf 3#
-                    _ ->  plusAddr# buf 4#
-        in case leAddr# buf' eob of
-             1# -> OK# () buf'
-             _  -> Fail#
-{-# inline anyChar_ #-}
-
-
--- | Parse any `Char` in the ASCII range, fail if the next input character is not in the range.
---   This is more efficient than `anyChar` if we are only working with ASCII.
-anyCharASCII :: Parser e Char
-anyCharASCII = Parser \fp eob buf -> case eqAddr# eob buf of
-  1# -> Fail#
-  _  -> case derefChar8# buf of
-    c1 -> case c1 `leChar#` '\x7F'# of
-      1# -> OK# (C# c1) (plusAddr# buf 1#)
-      _  -> Fail#
-{-# inline anyCharASCII #-}
-
--- | Skip any `Char` in the ASCII range. More efficient than `anyChar_` if we're working only with
---   ASCII.
-anyCharASCII_ :: Parser e ()
-anyCharASCII_ = () <$ anyCharASCII
-{-# inline anyCharASCII_ #-}
-
 -- | Read a non-negative `Int` from the input, as a non-empty digit sequence.
 -- The `Int` may overflow in the result.
-readInt :: Parser e Int
-readInt = Parser \fp eob s -> case FlatParse.Internal.readInt eob s of
+getAsciiDecimalInt :: Parser e Int
+getAsciiDecimalInt = Parser \fp eob s -> case Common.readInt eob s of
   (# (##) | #)        -> Fail#
   (# | (# n, s' #) #) -> OK# (I# n) s'
-{-# inline readInt #-}
+{-# inline getAsciiDecimalInt #-}
 
 -- | Read an `Int` from the input, as a non-empty case-insensitive ASCII
 --   hexadecimal digit sequence. The `Int` may overflow in the result.
-readIntHex :: Parser e Int
-readIntHex = Parser \fp eob s -> case FlatParse.Internal.readIntHex eob s of
+getAsciiHexInt :: Parser e Int
+getAsciiHexInt = Parser \fp eob s -> case Common.readIntHex eob s of
   (# (##) | #)        -> Fail#
   (# | (# n, s' #) #) -> OK# (I# n) s'
-{-# inline readIntHex #-}
+{-# inline getAsciiHexInt #-}
 
 -- | Read a non-negative `Integer` from the input, as a non-empty digit
 -- sequence.
-readInteger :: Parser e Integer
-readInteger = Parser \fp eob s -> case FlatParse.Internal.readInteger fp eob s of
+getAsciiDecimalInteger :: Parser e Integer
+getAsciiDecimalInteger = Parser \fp eob s -> case Common.readInteger fp eob s of
   (# (##) | #)        -> Fail#
   (# | (# i, s' #) #) -> OK# i s'
-{-# inline readInteger #-}
+{-# inline getAsciiDecimalInteger #-}
 
 --------------------------------------------------------------------------------
 
--- | Choose between two parsers. If the first parser fails, try the second one, but if the first one
---   throws an error, propagate the error.
-infixr 6 <|>
-(<|>) :: Parser e a -> Parser e a -> Parser e a
-(<|>) (Parser f) (Parser g) = Parser \fp eob s ->
-  case f fp eob s of
-    Fail# -> g fp eob s
-    x     -> x
-{-# inline[1] (<|>) #-}
-
-instance Base.Alternative (Parser e) where
-  empty = failed
-  {-# inline empty #-}
-  (<|>) = (<|>)
-  {-# inline (Base.<|>) #-}
-
-instance MonadPlus (Parser e) where
-  mzero = failed
-  {-# inline mzero #-}
-  mplus = (<|>)
-  {-# inline mplus #-}
-
-{-# RULES
-
-"flatparse/reassoc-alt" forall l m r. (l <|> m) <|> r = l <|> (m <|> r)
-
-#-}
-
--- | Branch on a parser: if the first argument succeeds, continue with the second, else with the third.
---   This can produce slightly more efficient code than `(<|>)`. Moreover, `ḃranch` does not
---   backtrack from the true/false cases.
-branch :: Parser e a -> Parser e b -> Parser e b -> Parser e b
-branch pa pt pf = Parser \fp eob s -> case runParser# pa fp eob s of
-  OK# _ s -> runParser# pt fp eob s
-  Fail#   -> runParser# pf fp eob s
-  Err# e  -> Err# e
-{-# inline branch #-}
-
 -- | An analogue of the list `foldl` function: first parse a @b@, then parse zero or more @a@-s,
 --   and combine the results in a left-nested way by the @b -> a -> b@ function. Note: this is not
 --   the usual `chainl` function from the parsec libraries!
@@ -771,19 +409,6 @@ chainr f (Parser elem) (Parser end) = Parser go where
     Err# e -> Err# e
 {-# inline chainr #-}
 
--- | Run a parser zero or more times, collect the results in a list. Note: for optimal performance,
---   try to avoid this. Often it is possible to get rid of the intermediate list by using a
---   combinator or a custom parser.
-many :: Parser e a -> Parser e [a]
-many (Parser f) = Parser go where
-  go fp eob s = case f fp eob s of
-    OK# a s -> case go fp eob s of
-                 OK# as s -> OK# (a:as) s
-                 x        -> x
-    Fail#  -> OK# [] s
-    Err# e -> Err# e
-{-# inline many #-}
-
 -- | Skip a parser zero or more times.
 many_ :: Parser e a -> Parser e ()
 many_ (Parser f) = Parser go where
@@ -793,13 +418,6 @@ many_ (Parser f) = Parser go where
     Err# e  -> Err# e
 {-# inline many_ #-}
 
--- | Run a parser one or more times, collect the results in a list. Note: for optimal performance,
---   try to avoid this. Often it is possible to get rid of the intermediate list by using a
---   combinator or a custom parser.
-some :: Parser e a -> Parser e [a]
-some p = (:) <$> p <*> many p
-{-# inline some #-}
-
 -- | Skip a parser one or more times.
 some_ :: Parser e a -> Parser e ()
 some_ pa = pa >> many_ pa
@@ -810,260 +428,44 @@ notFollowedBy :: Parser e a -> Parser e b -> Parser e a
 notFollowedBy p1 p2 = p1 <* fails p2
 {-# inline notFollowedBy #-}
 
--- | @isolate n p@ runs the parser @p@ isolated to the next @n@ bytes. All
---   isolated bytes must be consumed.
---
--- Throws a runtime error if given a negative integer.
-isolate :: Int -> Parser e a -> Parser e a
-isolate (I# n#) p = Parser \fp eob s ->
-  let s' = plusAddr# s n#
-  in  case n# <=# minusAddr# eob s of
-        1# -> case n# >=# 0# of
-          1# -> case runParser# p fp s' s of
-            OK# a s'' -> case eqAddr# s' s'' of
-              1# -> OK# a s''
-              _  -> Fail# -- isolated segment wasn't fully consumed
-            Fail#     -> Fail#
-            Err# e    -> Err# e
-          _  -> error "FlatParse.Basic.isolate: negative integer"
-        _  -> Fail# -- you tried to isolate more than we have left
-{-# inline isolate #-}
-
-
---------------------------------------------------------------------------------
-
--- | Get the current position in the input.
-getPos :: Parser e Pos
-getPos = Parser \fp eob s -> OK# (addrToPos# eob s) s
-{-# inline getPos #-}
-
--- | Set the input position. Warning: this can result in crashes if the position points outside the
---   current buffer. It is always safe to `setPos` values which came from `getPos` with the current
---   input.
-setPos :: Pos -> Parser e ()
-setPos s = Parser \fp eob _ -> OK# () (posToAddr# eob s)
-{-# inline setPos #-}
-
--- | The end of the input.
-endPos :: Pos
-endPos = Pos 0
-{-# inline endPos #-}
-
--- | Return the consumed span of a parser.
-spanOf :: Parser e a -> Parser e Span
-spanOf (Parser f) = Parser \fp eob s -> case f fp eob s of
-  OK# a s' -> OK# (Span (addrToPos# eob s) (addrToPos# eob s')) s'
-  x        -> unsafeCoerce# x
-{-# inline spanOf #-}
-
--- | Bind the result together with the span of the result. CPS'd version of `spanOf`
---   for better unboxing.
-withSpan :: Parser e a -> (a -> Span -> Parser e b) -> Parser e b
-withSpan (Parser f) g = Parser \fp eob s -> case f fp eob s of
-  OK# a s' -> runParser# (g a (Span (addrToPos# eob s) (addrToPos# eob s'))) fp eob s'
-  x        -> unsafeCoerce# x
-{-# inline withSpan #-}
-
--- | Return the `B.ByteString` consumed by a parser. Note: it's more efficient to use `spanOf` and
---   `withSpan` instead.
-byteStringOf :: Parser e a -> Parser e B.ByteString
-byteStringOf (Parser f) = Parser \fp eob s -> case f fp eob s of
-  OK# a s' -> OK# (B.PS (ForeignPtr s fp) 0 (I# (minusAddr# s' s))) s'
-  x        -> unsafeCoerce# x
-{-# inline byteStringOf #-}
-
--- | CPS'd version of `byteStringOf`. Can be more efficient, because the result is more eagerly unboxed
---   by GHC. It's more efficient to use `spanOf` or `withSpan` instead.
-withByteString :: Parser e a -> (a -> B.ByteString -> Parser e b) -> Parser e b
-withByteString (Parser f) g = Parser \fp eob s -> case f fp eob s of
-  OK# a s' -> runParser# (g a (B.PS (ForeignPtr s fp) 0 (I# (minusAddr# s' s)))) fp eob s'
-  x        -> unsafeCoerce# x
-{-# inline withByteString #-}
-
--- | Run a parser in a given input span. The input position and the `Int` state is restored after
---   the parser is finished, so `inSpan` does not consume input and has no side effect.  Warning:
---   this operation may crash if the given span points outside the current parsing buffer. It's
---   always safe to use `inSpan` if the span comes from a previous `withSpan` or `spanOf` call on
---   the current input.
-inSpan :: Span -> Parser e a -> Parser e a
-inSpan (Span s eob) (Parser f) = Parser \fp eob' s' ->
-  case f fp (posToAddr# eob' eob) (posToAddr# eob' s) of
-    OK# a _ -> OK# a s'
-    x       -> unsafeCoerce# x
-{-# inline inSpan #-}
-
---------------------------------------------------------------------------------
-
--- | Check whether a `Pos` points into a `B.ByteString`.
-validPos :: B.ByteString -> Pos -> Bool
-validPos str pos =
-  let go = do
-        start <- getPos
-        pure (start <= pos && pos <= endPos)
-  in case runParser go str of
-    OK b _ -> b
-    _      -> error "impossible"
-{-# inline validPos #-}
-
--- | Compute corresponding line and column numbers for each `Pos` in a list. Throw an error
---   on invalid positions. Note: computing lines and columns may traverse the `B.ByteString`,
---   but it traverses it only once regardless of the length of the position list.
-posLineCols :: B.ByteString -> [Pos] -> [(Int, Int)]
-posLineCols str poss =
-  let go !line !col [] = pure []
-      go line col ((i, pos):poss) = do
-        p <- getPos
-        if pos == p then
-          ((i, (line, col)):) <$> go line col poss
-        else do
-          c <- anyChar
-          if '\n' == c then
-            go (line + 1) 0 ((i, pos):poss)
-          else
-            go line (col + 1) ((i, pos):poss)
-
-      sorted :: [(Int, Pos)]
-      sorted = sortBy (comparing snd) (zip [0..] poss)
-
-  in case runParser (go 0 0 sorted) str of
-       OK res _ -> snd <$> sortBy (comparing fst) res
-       _        -> error "invalid position"
-
--- | Create a `B.ByteString` from a `Span`. The result is invalid if the `Span` points
---   outside the current buffer, or if the `Span` start is greater than the end position.
-unsafeSpanToByteString :: Span -> Parser e B.ByteString
-unsafeSpanToByteString (Span l r) =
-  lookahead (setPos l >> byteStringOf (setPos r))
-{-# inline unsafeSpanToByteString #-}
-
--- | Create a `Pos` from a line and column number. Throws an error on out-of-bounds
---   line and column numbers.
-mkPos :: B.ByteString -> (Int, Int) -> Pos
-mkPos str (line', col') =
-  let go line col | line == line' && col == col' = getPos
-      go line col = (do
-        c <- anyChar
-        if c == '\n' then go (line + 1) 0
-                     else go line (col + 1)) <|> error "mkPos: invalid position"
-  in case runParser (go 0 0) str of
-    OK res _ -> res
-    _        -> error "impossible"
-
--- | Break an UTF-8-coded `B.ByteString` to lines. Throws an error on invalid input.
---   This is mostly useful for grabbing specific source lines for displaying error
---   messages.
-lines :: B.ByteString -> [String]
-lines str =
-  let go = ([] <$ eof) <|> ((:) <$> takeLine <*> go)
-  in case runParser go str of
-    OK ls _ -> ls
-    _       -> error "linesUTF8: invalid input"
-
 --------------------------------------------------------------------------------
 
--- | Parse the rest of the current line as a `String`. Assumes UTF-8 encoding,
---   throws an error if the encoding is invalid.
-takeLine :: Parser e String
-takeLine = branch eof (pure "") do
-  c <- anyChar
-  case c of
-    '\n' -> pure ""
-    _    -> (c:) <$> takeLine
-
--- | Parse the rest of the current line as a `String`, but restore the parsing state.
---   Assumes UTF-8 encoding. This can be used for debugging.
-traceLine :: Parser e String
-traceLine = lookahead takeLine
-
--- | Take the rest of the input as a `String`. Assumes UTF-8 encoding.
-takeRest :: Parser e String
-takeRest = branch eof (pure "") do
-  c <- anyChar
-  cs <- takeRest
-  pure (c:cs)
+-- | Parse a given `B.ByteString`. If the bytestring is statically known,
+--   consider using 'bytes' instead.
+getByteStringOf :: B.ByteString -> Parser e ()
+getByteStringOf (B.PS (ForeignPtr bs fcontent) _ (I# len)) =
 
--- | Get the rest of the input as a `String`, but restore the parsing state. Assumes UTF-8 encoding.
---   This can be used for debugging.
-traceRest :: Parser e String
-traceRest = lookahead takeRest
+  let go64 :: Addr# -> Addr# -> Addr# -> State# RealWorld -> (# Res# e (), State# RealWorld #)
+      go64 bs bsend s w =
+        let bs' = plusAddr# bs 8# in
+        case gtAddr# bs' bsend of
+          1# -> go8 bs bsend s w
+          _  -> if   W64# (indexWord64OffAddr# bs 0#) == W64# (indexWord64OffAddr# s 0#)
+                then go64 bs' bsend (plusAddr# s 8#) w
+                else (# Fail#, w #)
 
---------------------------------------------------------------------------------
+      go8 :: Addr# -> Addr# -> Addr# -> State# RealWorld -> (# Res# e (), State# RealWorld #)
+      go8 bs bsend s w =
+        case ltAddr# bs bsend of
+          1# -> if   W8# (indexWord8OffAddr# bs 0#) == W8# (indexWord8OffAddr# s 0#)
+                then go8 (plusAddr# bs 1#) bsend (plusAddr# s 1#) w
+                else (# Fail#, w #)
+          _  -> (# OK# () s, w #)
 
--- | Convert an UTF-8-coded `B.ByteString` to a `String`.
-unpackUTF8 :: B.ByteString -> String
-unpackUTF8 str = case runParser takeRest str of
-  OK a _ -> a
-  _      -> error "unpackUTF8: invalid encoding"
+  in Parser \fp eob s -> case len <=# minusAddr# eob s of
+       1# -> runRW# \w -> case go64 bs (plusAddr# bs len) s w of
+               (# res, w #) -> case touch# fcontent w of
+                 w -> res
+       _  -> Fail#
+{-# inline getByteStringOf #-}
 
--- | Check that the input has at least the given number of bytes.
-ensureBytes# :: Int -> Parser e ()
-ensureBytes# (I# len) = Parser \fp eob s ->
-  case len  <=# minusAddr# eob s of
-    1# -> OK# () s
-    _  -> Fail#
-{-# inline ensureBytes# #-}
-
--- | Unsafely read a concrete byte from the input. It's not checked that the input has
---   enough bytes.
-scan8# :: Word8 -> Parser e ()
-scan8# (W8# c) = Parser \fp eob s ->
-  case indexWord8OffAddr# s 0# of
-    c' -> case eqWord8'# c c' of
-      1# -> OK# () (plusAddr# s 1#)
-      _  -> Fail#
-{-# inline scan8# #-}
-
--- | Unsafely read two concrete bytes from the input. It's not checked that the input has
---   enough bytes.
-scan16# :: Word16 -> Parser e ()
-scan16# (W16# c) = Parser \fp eob s ->
-  case indexWord16OffAddr# s 0# of
-    c' -> case eqWord16'# c c' of
-      1# -> OK# () (plusAddr# s 2#)
-      _  -> Fail#
-{-# inline scan16# #-}
-
--- | Unsafely read four concrete bytes from the input. It's not checked that the input has
---   enough bytes.
-scan32# :: Word32 -> Parser e ()
-scan32# (W32# c) = Parser \fp eob s ->
-  case indexWord32OffAddr# s 0# of
-    c' -> case eqWord32'# c c' of
-      1# -> OK# () (plusAddr# s 4#)
-      _  -> Fail#
-{-# inline scan32# #-}
-
--- | Unsafely read eight concrete bytes from the input. It's not checked that the input has
---   enough bytes.
-scan64# :: Word -> Parser e ()
-scan64# (W# c) = Parser \fp eob s ->
-  case indexWord64OffAddr# s 0# of
-    c' -> case eqWord# c c' of
-      1# -> OK# () (plusAddr# s 8#)
-      _  -> Fail#
-{-# inline scan64# #-}
-
--- | Unsafely read and return a byte from the input. It's not checked that the input is non-empty.
-scanAny8# :: Parser e Word8
-scanAny8# = Parser \fp eob s -> OK# (W8# (indexWord8OffAddr# s 0#)) (plusAddr# s 1#)
-{-# inline scanAny8# #-}
-
-scanPartial64# :: Int -> Word -> Parser e ()
-scanPartial64# (I# len) (W# w) = Parser \fp eob s ->
-  case indexWordOffAddr# s 0# of
-    w' -> case uncheckedIShiftL# (8# -# len) 3# of
-      sh -> case uncheckedShiftL# w' sh of
-        w' -> case uncheckedShiftRL# w' sh of
-          w' -> case eqWord# w w' of
-            1# -> OK# () (plusAddr# s len)
-            _  -> Fail#
-{-# inline scanPartial64# #-}
-
--- | Decrease the current input position by the given number of bytes.
-setBack# :: Int -> Parser e ()
-setBack# (I# i) = Parser \fp eob s ->
-  OK# () (plusAddr# s (negateInt# i))
-{-# inline setBack# #-}
+-- | Read a sequence of bytes. This is a template function, you can use it as
+--   @$(getBytesOf [3, 4, 5])@, for example, and the splice has type @Parser e
+--   ()@.
+getBytesOf :: [Word] -> Q Exp
+getBytesOf bytes = do
+  let !len = length bytes
+  [| ensureBytes# len >> $(scanBytes# bytes) |]
 
 -- | Template function, creates a @Parser e ()@ which unsafely scans a given
 --   sequence of bytes.
@@ -1071,24 +473,24 @@ scanBytes# :: [Word] -> Q Exp
 scanBytes# bytes = do
   let !(leading, w8s) = splitBytes bytes
       !scanw8s        = go w8s where
-                         go (w8:[] ) = [| scan64# w8 |]
-                         go (w8:w8s) = [| scan64# w8 >> $(go w8s) |]
+                         go (w8:[] ) = [| getWord64OfUnsafe w8 |]
+                         go (w8:w8s) = [| getWord64OfUnsafe w8 >> $(go w8s) |]
                          go []       = [| pure () |]
   case w8s of
     [] -> go leading
           where
-            go (a:b:c:d:[]) = let !w = packBytes [a, b, c, d] in [| scan32# w |]
-            go (a:b:c:d:ws) = let !w = packBytes [a, b, c, d] in [| scan32# w >> $(go ws) |]
-            go (a:b:[])     = let !w = packBytes [a, b]       in [| scan16# w |]
-            go (a:b:ws)     = let !w = packBytes [a, b]       in [| scan16# w >> $(go ws) |]
-            go (a:[])       = [| scan8# a |]
+            go (a:b:c:d:[]) = let !w = packBytes [a, b, c, d] in [| getWord32OfUnsafe w |]
+            go (a:b:c:d:ws) = let !w = packBytes [a, b, c, d] in [| getWord32OfUnsafe w >> $(go ws) |]
+            go (a:b:[])     = let !w = packBytes [a, b]       in [| getWord16OfUnsafe w |]
+            go (a:b:ws)     = let !w = packBytes [a, b]       in [| getWord16OfUnsafe w >> $(go ws) |]
+            go (a:[])       = [| getWord8OfUnsafe a |]
             go []           = [| pure () |]
     _  -> case leading of
 
       []              -> scanw8s
-      [a]             -> [| scan8# a >> $scanw8s |]
-      ws@[a, b]       -> let !w = packBytes ws in [| scan16# w >> $scanw8s |]
-      ws@[a, b, c, d] -> let !w = packBytes ws in [| scan32# w >> $scanw8s |]
+      [a]             -> [| getWord8OfUnsafe a >> $scanw8s |]
+      ws@[a, b]       -> let !w = packBytes ws in [| getWord16OfUnsafe w >> $scanw8s |]
+      ws@[a, b, c, d] -> let !w = packBytes ws in [| getWord32OfUnsafe w >> $scanw8s |]
       ws              -> let !w = packBytes ws
                              !l = length ws
                          in [| scanPartial64# l w >> $scanw8s |]
@@ -1129,7 +531,7 @@ genTrie (rules, t) = do
               !defaultCase  <- fallback r (n + 1)
 
               let cases = mkDoE $
-                    [BindS (VarP (mkName "c")) (VarE 'scanAny8#),
+                    [BindS (VarP (mkName "c")) (VarE 'getWord8Unsafe),
                       NoBindS (CaseE (VarE (mkName "c"))
                          (map (\(w, t) ->
                                  Match (LitP (IntegerL (fromIntegral w)))
@@ -1182,313 +584,145 @@ genSwitchTrie' postAction cases fallback =
 
 --------------------------------------------------------------------------------
 
-withAnyWord8# :: (Word8'# -> Parser e a) -> Parser e a
-withAnyWord8# p = Parser \fp eob buf -> case eqAddr# eob buf of
-  1# -> Fail#
-  _  -> case indexWord8OffAddr# buf 0# of
-    w# -> runParser# (p w#) fp eob (plusAddr# buf 1#)
-{-# inline withAnyWord8# #-}
-
-withAnyWord16# :: (Word16'# -> Parser e a) -> Parser e a
-withAnyWord16# p = Parser \fp eob buf -> case 2# <=# minusAddr# eob buf of
-  0# -> Fail#
-  _  -> case indexWord16OffAddr# buf 0# of
-    w# -> runParser# (p w#) fp eob (plusAddr# buf 2#)
-{-# inline withAnyWord16# #-}
-
-withAnyWord32# :: (Word32'# -> Parser e a) -> Parser e a
-withAnyWord32# p = Parser \fp eob buf -> case 4# <=# minusAddr# eob buf of
-  0# -> Fail#
-  _  -> case indexWord32OffAddr# buf 0# of
-    w# -> runParser# (p w#) fp eob (plusAddr# buf 4#)
-{-# inline withAnyWord32# #-}
-
-withAnyWord64# :: (Word# -> Parser e a) -> Parser e a
-withAnyWord64# p = Parser \fp eob buf -> case 8# <=# minusAddr# eob buf of
-  0# -> Fail#
-  _  -> case indexWordOffAddr# buf 0# of
-    w# -> runParser# (p w#) fp eob (plusAddr# buf 8#)
-{-# inline withAnyWord64# #-}
-
-withAnyInt8# :: (Int8'# -> Parser e a) -> Parser e a
-withAnyInt8# p = Parser \fp eob buf -> case eqAddr# eob buf of
-  1# -> Fail#
-  _  -> case indexInt8OffAddr# buf 0# of
-    i# -> runParser# (p i#) fp eob (plusAddr# buf 1#)
-{-# inline withAnyInt8# #-}
-
-withAnyInt16# :: (Int16'# -> Parser e a) -> Parser e a
-withAnyInt16# p = Parser \fp eob buf -> case 2# <=# minusAddr# eob buf of
-  0# -> Fail#
-  _  -> case indexInt16OffAddr# buf 0# of
-    i# -> runParser# (p i#) fp eob (plusAddr# buf 2#)
-{-# inline withAnyInt16# #-}
-
-withAnyInt32# :: (Int32'# -> Parser e a) -> Parser e a
-withAnyInt32# p = Parser \fp eob buf -> case 4# <=# minusAddr# eob buf of
-  0# -> Fail#
-  _  -> case indexInt32OffAddr# buf 0# of
-    i# -> runParser# (p i#) fp eob (plusAddr# buf 4#)
-{-# inline withAnyInt32# #-}
-
-withAnyInt64# :: (Int# -> Parser e a) -> Parser e a
-withAnyInt64# p = Parser \fp eob buf -> case 8# <=# minusAddr# eob buf of
-  0# -> Fail#
-  _  -> case indexInt64OffAddr# buf 0# of
-    i# -> runParser# (p i#) fp eob (plusAddr# buf 8#)
-{-# inline withAnyInt64# #-}
-
---------------------------------------------------------------------------------
-
--- | Parse any 'Word8' (byte).
-anyWord8 :: Parser e Word8
-anyWord8 = withAnyWord8# (\w# -> pure (W8# w#))
-{-# inline anyWord8 #-}
-
--- | Skip any 'Word8' (byte).
-anyWord8_ :: Parser e ()
-anyWord8_ = () <$ anyWord8
-{-# inline anyWord8_ #-}
-
--- | Parse any 'Word16'.
-anyWord16 :: Parser e Word16
-anyWord16 = withAnyWord16# (\w# -> pure (W16# w#))
-{-# inline anyWord16 #-}
-
--- | Skip any 'Word16'.
-anyWord16_ :: Parser e ()
-anyWord16_ = () <$ anyWord16
-{-# inline anyWord16_ #-}
-
--- | Parse any 'Word32'.
-anyWord32 :: Parser e Word32
-anyWord32 = withAnyWord32# (\w# -> pure (W32# w#))
-{-# inline anyWord32 #-}
-
--- | Skip any 'Word32'.
-anyWord32_ :: Parser e ()
-anyWord32_ = () <$ anyWord32
-{-# inline anyWord32_ #-}
-
--- | Parse any 'Word64'.
-anyWord64 :: Parser e Word64
-anyWord64 = withAnyWord64# (\w# -> pure (W64# w#))
-{-# inline anyWord64 #-}
-
--- | Skip any 'Word64'.
-anyWord64_ :: Parser e ()
-anyWord64_ = () <$ anyWord64
-{-# inline anyWord64_ #-}
-
--- | Parse any 'Word'.
-anyWord :: Parser e Word
-anyWord = withAnyWord64# (\w# -> pure (W# w#))
-{-# inline anyWord #-}
-
--- | Skip any 'Word'.
-anyWord_ :: Parser e ()
-anyWord_ = () <$ anyWord
-{-# inline anyWord_ #-}
-
---------------------------------------------------------------------------------
-
--- | Parse any 'Int8'.
-anyInt8 :: Parser e Int8
-anyInt8 = withAnyInt8# (\i# -> pure (I8# i#))
-{-# inline anyInt8 #-}
-
--- | Parse any 'Int16'.
-anyInt16 :: Parser e Int16
-anyInt16 = withAnyInt16# (\i# -> pure (I16# i#))
-{-# inline anyInt16 #-}
-
--- | Parse any 'Int32'.
-anyInt32 :: Parser e Int32
-anyInt32 = withAnyInt32# (\i# -> pure (I32# i#))
-{-# inline anyInt32 #-}
+-- | Branch on a parser: if the first argument succeeds, continue with the second, else with the third.
+--   This can produce slightly more efficient code than `(<|>)`. Moreover, `ḃranch` does not
+--   backtrack from the true/false cases.
+branch :: Parser e a -> Parser e b -> Parser e b -> Parser e b
+branch pa pt pf = Parser \fp eob s -> case runParser# pa fp eob s of
+  OK# _ s -> runParser# pt fp eob s
+  Fail#   -> runParser# pf fp eob s
+  Err# e  -> Err# e
+{-# inline branch #-}
 
--- | Parse any 'Int64'.
-anyInt64 :: Parser e Int64
-anyInt64 = withAnyInt64# (\i# -> pure (I64# i#))
-{-# inline anyInt64 #-}
+-- | @isolate n p@ runs the parser @p@ isolated to the next @n@ bytes. All
+--   isolated bytes must be consumed.
+--
+-- Throws a runtime error if given a negative integer.
+isolate :: Int -> Parser e a -> Parser e a
+isolate (I# n#) p = withPosInt# n# (\n'# -> isolateUnsafe# n'# p)
+{-# inline isolate #-}
 
--- | Parse any 'Int'.
-anyInt :: Parser e Int
-anyInt = withAnyInt64# (\i# -> pure (I# i#))
-{-# inline anyInt #-}
+-- | @isolate n p@ runs the parser @p@ isolated to the next @n@ bytes. All
+--   isolated bytes must be consumed.
+--
+-- Undefined behaviour if given a negative integer.
+isolateUnsafe# :: Int# -> Parser e a -> Parser e a
+isolateUnsafe# n# p = Parser \fp eob s ->
+  let s' = plusAddr# s n#
+  in  case n# <=# minusAddr# eob s of
+        1# -> case runParser# p fp s' s of
+          OK# a s'' -> case eqAddr# s' s'' of
+            1# -> OK# a s''
+            _  -> Fail# -- isolated segment wasn't fully consumed
+          Fail#     -> Fail#
+          Err# e    -> Err# e
+        _  -> Fail# -- you tried to isolate more than we have left
+{-# inline isolateUnsafe# #-}
 
 --------------------------------------------------------------------------------
 
--- | Parse any 'Word16' (little-endian).
-anyWord16le :: Parser e Word16
-anyWord16le = anyWord16
-{-# inline anyWord16le #-}
-
--- | Parse any 'Word16' (big-endian).
-anyWord16be :: Parser e Word16
-anyWord16be = withAnyWord16# (\w# -> pure (W16# (byteSwap16'# w#)))
-{-# inline anyWord16be #-}
-
--- | Parse any 'Word32' (little-endian).
-anyWord32le :: Parser e Word32
-anyWord32le = anyWord32
-{-# inline anyWord32le #-}
-
--- | Parse any 'Word32' (big-endian).
-anyWord32be :: Parser e Word32
-anyWord32be = withAnyWord32# (\w# -> pure (W32# (byteSwap32'# w#)))
-{-# inline anyWord32be #-}
+-- | Convert an UTF-8-coded `B.ByteString` to a `String`.
+unpackUTF8 :: B.ByteString -> String
+unpackUTF8 str = case runParser takeRestString str of
+  OK a _ -> a
+  _      -> error "unpackUTF8: invalid encoding"
 
--- | Parse any 'Word64' (little-endian).
-anyWord64le :: Parser e Word64
-anyWord64le = anyWord64
-{-# inline anyWord64le #-}
+-- | Take the rest of the input as a `String`. Assumes UTF-8 encoding.
+takeRestString :: Parser e String
+takeRestString = branch eof (pure "") do
+  c <- getChar
+  cs <- takeRestString
+  pure (c:cs)
 
--- | Parse any 'Word64' (big-endian).
-anyWord64be :: Parser e Word64
-anyWord64be = withAnyWord64# (\w# -> pure (W64# (byteSwap# w#)))
-{-# inline anyWord64be #-}
+-- | Get the rest of the input as a `String`, but restore the parsing state. Assumes UTF-8 encoding.
+--   This can be used for debugging.
+traceRestString :: Parser e String
+traceRestString = lookahead takeRestString
 
 --------------------------------------------------------------------------------
 
--- | Parse any 'Int16' (little-endian).
-anyInt16le :: Parser e Int16
-anyInt16le = anyInt16
-{-# inline anyInt16le #-}
-
--- | Parse any 'Int16' (big-endian).
-anyInt16be :: Parser e Int16
-anyInt16be = withAnyWord16# (\w# -> pure (I16# (word16ToInt16# (byteSwap16'# w#))))
-{-# inline anyInt16be #-}
-
--- | Parse any 'Int32' (little-endian).
-anyInt32le :: Parser e Int32
-anyInt32le = anyInt32
-{-# inline anyInt32le #-}
-
--- | Parse any 'Int32' (big-endian).
-anyInt32be :: Parser e Int32
-anyInt32be = withAnyWord32# (\w# -> pure (I32# (word32ToInt32# (byteSwap32'# w#))))
-{-# inline anyInt32be #-}
+-- | Parse the rest of the current line as a `String`. Assumes UTF-8 encoding,
+--   throws an error if the encoding is invalid.
+takeLine :: Parser e String
+takeLine = branch eof (pure "") do
+  c <- getChar
+  case c of
+    '\n' -> pure ""
+    _    -> (c:) <$> takeLine
 
--- | Parse any 'Int64' (little-endian).
-anyInt64le :: Parser e Int64
-anyInt64le = anyInt64
-{-# inline anyInt64le #-}
+-- | Parse the rest of the current line as a `String`, but restore the parsing state.
+--   Assumes UTF-8 encoding. This can be used for debugging.
+traceLine :: Parser e String
+traceLine = lookahead takeLine
 
--- | Parse any 'Int64' (big-endian).
-anyInt64be :: Parser e Int64
-anyInt64be = withAnyWord64# (\w# -> pure (I64# (word2Int# (byteSwap# w#))))
-{-# inline anyInt64be #-}
+-- | Run a parser on a `String` input. Reminder: @OverloadedStrings@ for `B.ByteString` does not
+--   yield a valid UTF-8 encoding! For non-ASCII `B.ByteString` literal input, use `runParserS` or
+--   `packUTF8` for testing.
+runParserS :: Parser e a -> String -> Result e a
+runParserS pa s = runParser pa (packUTF8 s)
 
---------------------------------------------------------------------------------
+-- | Create a `Pos` from a line and column number. Throws an error on out-of-bounds
+--   line and column numbers.
+mkPos :: B.ByteString -> (Int, Int) -> Pos
+mkPos str (line', col') =
+  let go line col | line == line' && col == col' = getPos
+      go line col = (do
+        c <- getChar
+        if c == '\n' then go (line + 1) 0
+                     else go line (col + 1)) <|> error "mkPos: invalid position"
+  in case runParser (go 0 0) str of
+    OK res _ -> res
+    _        -> error "impossible"
 
--- | Skip forward @n@ bytes and run the given parser. Fails if fewer than @n@
---   bytes are available.
---
--- Throws a runtime error if given a negative integer.
-atSkip# :: Int# -> Parser e a -> Parser e a
-atSkip# os# (Parser p) = Parser \fp eob s -> case os# <=# minusAddr# eob s of
-  1# -> case os# >=# 0# of
-    1# -> p fp eob (plusAddr# s os#)
-    _  -> error "FlatParse.Basic.atSkip#: negative integer"
-  _  -> Fail#
-{-# inline atSkip# #-}
+-- | Break an UTF-8-coded `B.ByteString` to lines. Throws an error on invalid
+--   input. This is mostly useful for grabbing specific source lines for
+--   displaying error messages.
+lines :: B.ByteString -> [String]
+lines str =
+  let go = ([] <$ eof) <|> ((:) <$> takeLine <*> go)
+  in case runParser go str of
+    OK ls _ -> ls
+    _       -> error "linesUTF8: invalid input"
 
--- | Read the given number of bytes as a 'ByteString'.
---
--- Throws a runtime error if given a negative integer.
-takeBs# :: Int# -> Parser e B.ByteString
-takeBs# n# = Parser \fp eob s -> case n# <=# minusAddr# eob s of
-  1# -> -- have to runtime check for negative values, because they cause a hang
-    case n# >=# 0# of
-      1# -> OK# (B.PS (ForeignPtr s fp) 0 (I# n#)) (plusAddr# s n#)
-      _  -> error "FlatParse.Basic.takeBs: negative integer"
-  _  -> Fail#
-{-# inline takeBs# #-}
 
---------------------------------------------------------------------------------
+-- | Check whether a `Pos` points into a `B.ByteString`.
+validPos :: B.ByteString -> Pos -> Bool
+validPos str pos =
+  let go = do
+        start <- getPos
+        pure (start <= pos && pos <= endPos)
+  in case runParser go str of
+    OK b _ -> b
+    _      -> error "impossible"
+{-# inline validPos #-}
 
--- | Run a parser, passing it the current address the parser is at.
---
--- Useful for parsing offset-based data tables. For example, you may use this to
--- save the base address to use together with various 0-indexed offsets.
-withAddr# :: (Addr# -> Parser e a) -> Parser e a
-withAddr# p = Parser \fp eob s -> runParser# (p s) fp eob s
-{-# inline withAddr# #-}
-
--- | @takeBsOffAddr# addr# offset# len#@ moves to @addr#@, skips @offset#@
---   bytes, reads @len#@ bytes into a 'ByteString', and restores the original
---   address.
---
--- The 'Addr#' should be from 'withAddr#'.
---
--- Useful for parsing offset-based data tables. For example, you may use this
--- together with 'withAddr#' to jump to an offset in your input and read some
--- data.
-takeBsOffAddr# :: Addr# -> Int# -> Int# -> Parser e B.ByteString
-takeBsOffAddr# addr# offset# len# =
-    lookaheadFromAddr# addr# $ atSkip# offset# $ takeBs# len#
-{-# inline takeBsOffAddr# #-}
-
--- | 'lookahead', but specify the address to lookahead from.
---
--- The 'Addr#' should be from 'withAddr#'.
-lookaheadFromAddr# :: Addr# -> Parser e a -> Parser e a
-lookaheadFromAddr# s = lookahead . atAddr# s
-{-# inline lookaheadFromAddr# #-}
+-- | Compute corresponding line and column numbers for each `Pos` in a list. Throw an error
+--   on invalid positions. Note: computing lines and columns may traverse the `B.ByteString`,
+--   but it traverses it only once regardless of the length of the position list.
+posLineCols :: B.ByteString -> [Pos] -> [(Int, Int)]
+posLineCols str poss =
+  let go !line !col [] = pure []
+      go line col ((i, pos):poss) = do
+        p <- getPos
+        if pos == p then
+          ((i, (line, col)):) <$> go line col poss
+        else do
+          c <- getChar
+          if '\n' == c then
+            go (line + 1) 0 ((i, pos):poss)
+          else
+            go line (col + 1) ((i, pos):poss)
 
--- | Run a parser at the given address.
---
--- The 'Addr#' should be from 'withAddr#'.
---
--- This is a highly internal function -- you likely want 'lookaheadFromAddr#',
--- which will reset the address after running the parser.
-atAddr# :: Addr# -> Parser e a -> Parser e a
-atAddr# s (Parser p) = Parser \fp eob _ -> p fp eob s
-{-# inline atAddr# #-}
+      sorted :: [(Int, Pos)]
+      sorted = sortBy (comparing snd) (zip [0..] poss)
 
---------------------------------------------------------------------------------
+  in case runParser (go 0 0 sorted) str of
+       OK res _ -> snd <$> sortBy (comparing fst) res
+       _        -> error "invalid position"
 
--- | Read a null-terminated bytestring (a C-style string).
---
--- Consumes the null terminator.
-anyCString :: Parser e B.ByteString
-anyCString = Parser \fp eob s -> go' fp eob s
-  where
-    go' fp eob s0 = go 0# s0
-      where
-        go n# s = case eqAddr# eob s of
-          1# -> Fail#
-          _  ->
-            let s' = plusAddr# s 1#
-            -- TODO below is a candidate for improving with ExtendedLiterals!
-            in  case eqWord8# (indexWord8OffAddr''# s 0#) (wordToWord8''# 0##) of
-                  1# -> OK# (B.PS (ForeignPtr s0 fp) 0 (I# n#)) s'
-                  _  -> go (n# +# 1#) s'
-{-# inline anyCString #-}
-
--- | Read a null-terminated bytestring (a C-style string), where the bytestring
---   is known to be null-terminated somewhere in the input.
---
--- Highly unsafe. Unless you have a guarantee that the string will be null
--- terminated before the input ends, use 'anyCString' instead. Honestly, I'm not
--- sure if this is a good function to define. But here it is.
---
--- Fails on GHC versions older than 9.0, since we make use of the
--- 'cstringLength#' primop introduced in GHC 9.0, and we aren't very useful
--- without it.
---
--- Consumes the null terminator.
-anyCStringUnsafe :: Parser e B.ByteString
-{-# inline anyCStringUnsafe #-}
-#if MIN_VERSION_base(4,15,0)
-anyCStringUnsafe = Parser \fp eob s ->
-  case eqAddr# eob s of
-    1# -> Fail#
-    _  -> let n#  = cstringLength# s
-              s'# = plusAddr# s (n# +# 1#)
-           in OK# (B.PS (ForeignPtr s fp) 0 (I# n#)) s'#
-#else
-anyCStringUnsafe = error "Flatparse.Basic.anyCStringUnsafe: requires GHC 9.0 / base-4.15, not available on this compiler"
-#endif
+-- | Create a `B.ByteString` from a `Span`. The result is invalid if the `Span` points
+--   outside the current buffer, or if the `Span` start is greater than the end position.
+unsafeSpanToByteString :: Span -> Parser e B.ByteString
+unsafeSpanToByteString (Span l r) =
+  lookahead (setPos l >> byteStringOf (setPos r))
+{-# inline unsafeSpanToByteString #-}
diff --git a/src/FlatParse/Basic/Chars.hs b/src/FlatParse/Basic/Chars.hs
new file mode 100644
index 0000000..de44048
--- /dev/null
+++ b/src/FlatParse/Basic/Chars.hs
@@ -0,0 +1,171 @@
+module FlatParse.Basic.Chars where
+
+import Prelude hiding ( getChar )
+
+import FlatParse.Basic.Parser
+
+import FlatParse.Common.Assorted ( derefChar8# )
+
+import GHC.Exts
+
+-- | Parse a UTF-8 `Char` for which a predicate holds.
+satisfy :: (Char -> Bool) -> Parser e Char
+satisfy f = Parser \fp eob s -> case runParser# getChar fp eob s of
+  OK# c s | f c -> OK# c s
+  _             -> Fail#
+{-#  inline satisfy #-}
+
+-- | Skip a UTF-8 `Char` for which a predicate holds.
+satisfy_ :: (Char -> Bool) -> Parser e ()
+satisfy_ f = Parser \fp eob s -> case runParser# getChar fp eob s of
+  OK# c s | f c -> OK# () s
+  _             -> Fail#
+{-#  inline satisfy_ #-}
+
+-- | Parse an ASCII `Char` for which a predicate holds. Assumption: the predicate must only return
+--   `True` for ASCII-range characters. Otherwise this function might read a 128-255 range byte,
+--   thereby breaking UTF-8 decoding.
+satisfyASCII :: (Char -> Bool) -> Parser e Char
+satisfyASCII f = Parser \fp eob s -> case eqAddr# eob s of
+  1# -> Fail#
+  _  -> case derefChar8# s of
+    c1 | f (C# c1) -> OK# (C# c1) (plusAddr# s 1#)
+       | otherwise -> Fail#
+{-#  inline satisfyASCII #-}
+
+-- | Skip an ASCII `Char` for which a predicate holds. Assumption: the predicate
+--   must only return `True` for ASCII-range characters.
+satisfyASCII_ :: (Char -> Bool) -> Parser e ()
+satisfyASCII_ f = Parser \fp eob s -> case eqAddr# eob s of
+  1# -> Fail#
+  _  -> case derefChar8# s of
+    c1 | f (C# c1) -> OK# () (plusAddr# s 1#)
+       | otherwise -> Fail#
+{-#  inline satisfyASCII_ #-}
+
+-- | This is a variant of `satisfy` which allows more optimization. We can pick four testing
+--   functions for the four cases for the possible number of bytes in the UTF-8 character. So in
+--   @fusedSatisfy f1 f2 f3 f4@, if we read a one-byte character, the result is scrutinized with
+--   @f1@, for two-bytes, with @f2@, and so on. This can result in dramatic lexing speedups.
+--
+--   For example, if we want to accept any letter, the naive solution would be to use
+--   `Data.Char.isLetter`, but this accesses a large lookup table of Unicode character classes. We
+--   can do better with @fusedSatisfy isLatinLetter isLetter isLetter isLetter@, since here the
+--   `isLatinLetter` is inlined into the UTF-8 decoding, and it probably handles a great majority of
+--   all cases without accessing the character table.
+fusedSatisfy :: (Char -> Bool) -> (Char -> Bool) -> (Char -> Bool) -> (Char -> Bool) -> Parser e Char
+fusedSatisfy f1 f2 f3 f4 = Parser \fp eob buf -> case eqAddr# eob buf of
+  1# -> Fail#
+  _  -> case derefChar8# buf of
+    c1 -> case c1 `leChar#` '\x7F'# of
+      1# | f1 (C# c1) -> OK# (C# c1) (plusAddr# buf 1#)
+         | otherwise  -> Fail#
+      _  -> case eqAddr# eob (plusAddr# buf 1#) of
+        1# -> Fail#
+        _ -> case indexCharOffAddr# buf 1# of
+          c2 -> case c1 `leChar#` '\xDF'# of
+            1# ->
+              let resc = C# (chr# (((ord# c1 -# 0xC0#) `uncheckedIShiftL#` 6#) `orI#`
+                                   (ord# c2 -# 0x80#)))
+              in case f2 resc of
+                   True -> OK# resc (plusAddr# buf 2#)
+                   _    -> Fail#
+            _ -> case eqAddr# eob (plusAddr# buf 2#) of
+              1# -> Fail#
+              _  -> case indexCharOffAddr# buf 2# of
+                c3 -> case c1 `leChar#` '\xEF'# of
+                  1# ->
+                    let resc = C# (chr# (((ord# c1 -# 0xE0#) `uncheckedIShiftL#` 12#) `orI#`
+                                         ((ord# c2 -# 0x80#) `uncheckedIShiftL#`  6#) `orI#`
+                                         (ord# c3 -# 0x80#)))
+                    in case f3 resc of
+                         True -> OK# resc (plusAddr# buf 3#)
+                         _    -> Fail#
+                  _ -> case eqAddr# eob (plusAddr# buf 3#) of
+                    1# -> Fail#
+                    _  -> case indexCharOffAddr# buf 3# of
+                      c4 ->
+                        let resc = C# (chr# (((ord# c1 -# 0xF0#) `uncheckedIShiftL#` 18#) `orI#`
+                                             ((ord# c2 -# 0x80#) `uncheckedIShiftL#` 12#) `orI#`
+                                             ((ord# c3 -# 0x80#) `uncheckedIShiftL#`  6#) `orI#`
+                                              (ord# c4 -# 0x80#)))
+                        in case f4 resc of
+                             True -> OK# resc (plusAddr# buf 4#)
+                             _    -> Fail#
+{-# inline fusedSatisfy #-}
+
+-- | Skipping variant of `fusedSatisfy`.
+fusedSatisfy_ :: (Char -> Bool) -> (Char -> Bool) -> (Char -> Bool) -> (Char -> Bool) -> Parser e ()
+fusedSatisfy_ f1 f2 f3 f4 = () <$ fusedSatisfy f1 f2 f3 f4
+{-# inline fusedSatisfy_ #-}
+
+-- | Parse any UTF-8-encoded `Char`.
+getChar :: Parser e Char
+getChar = Parser \fp eob buf -> case eqAddr# eob buf of
+  1# -> Fail#
+  _  -> case derefChar8# buf of
+    c1 -> case c1 `leChar#` '\x7F'# of
+      1# -> OK# (C# c1) (plusAddr# buf 1#)
+      _  -> case eqAddr# eob (plusAddr# buf 1#) of
+        1# -> Fail#
+        _ -> case indexCharOffAddr# buf 1# of
+          c2 -> case c1 `leChar#` '\xDF'# of
+            1# ->
+              let resc = ((ord# c1 -# 0xC0#) `uncheckedIShiftL#` 6#) `orI#`
+                          (ord# c2 -# 0x80#)
+              in OK# (C# (chr# resc)) (plusAddr# buf 2#)
+            _ -> case eqAddr# eob (plusAddr# buf 2#) of
+              1# -> Fail#
+              _  -> case indexCharOffAddr# buf 2# of
+                c3 -> case c1 `leChar#` '\xEF'# of
+                  1# ->
+                    let resc = ((ord# c1 -# 0xE0#) `uncheckedIShiftL#` 12#) `orI#`
+                               ((ord# c2 -# 0x80#) `uncheckedIShiftL#`  6#) `orI#`
+                                (ord# c3 -# 0x80#)
+                    in OK# (C# (chr# resc)) (plusAddr# buf 3#)
+                  _ -> case eqAddr# eob (plusAddr# buf 3#) of
+                    1# -> Fail#
+                    _  -> case indexCharOffAddr# buf 3# of
+                      c4 ->
+                        let resc = ((ord# c1 -# 0xF0#) `uncheckedIShiftL#` 18#) `orI#`
+                                   ((ord# c2 -# 0x80#) `uncheckedIShiftL#` 12#) `orI#`
+                                   ((ord# c3 -# 0x80#) `uncheckedIShiftL#`  6#) `orI#`
+                                    (ord# c4 -# 0x80#)
+                        in OK# (C# (chr# resc)) (plusAddr# buf 4#)
+{-# inline getChar #-}
+
+-- | Skip any UTF-8-encoded `Char`.
+getChar_ :: Parser e ()
+getChar_ = Parser \fp eob buf -> case eqAddr# eob buf of
+  1# -> Fail#
+  _  -> case derefChar8# buf of
+    c1 -> case c1 `leChar#` '\x7F'# of
+      1# -> OK# () (plusAddr# buf 1#)
+      _  ->
+        let buf' =
+              case c1 `leChar#` '\xDF'# of
+                1# -> plusAddr# buf 2#
+                _  -> case c1 `leChar#` '\xEF'# of
+                    1# -> plusAddr# buf 3#
+                    _ ->  plusAddr# buf 4#
+        in case leAddr# buf' eob of
+             1# -> OK# () buf'
+             _  -> Fail#
+{-# inline getChar_ #-}
+
+-- | Parse any `Char` in the ASCII range, fail if the next input character is not in the range.
+--   This is more efficient than `getChar` if we are only working with ASCII.
+getCharASCII :: Parser e Char
+getCharASCII = Parser \fp eob buf -> case eqAddr# eob buf of
+  1# -> Fail#
+  _  -> case derefChar8# buf of
+    c1 -> case c1 `leChar#` '\x7F'# of
+      1# -> OK# (C# c1) (plusAddr# buf 1#)
+      _  -> Fail#
+{-# inline getCharASCII #-}
+
+-- | Skip any `Char` in the ASCII range. More efficient than `getChar_` if we're working only with
+--   ASCII.
+getCharASCII_ :: Parser e ()
+getCharASCII_ = () <$ getCharASCII
+{-# inline getCharASCII_ #-}
diff --git a/src/FlatParse/Basic/Integers.hs b/src/FlatParse/Basic/Integers.hs
new file mode 100644
index 0000000..f5d7516
--- /dev/null
+++ b/src/FlatParse/Basic/Integers.hs
@@ -0,0 +1,340 @@
+{- | Machine integer parsers.
+
+The following naming conventions are followed:
+
+  * @withX@ are continuation passing style (CPS) parsers.
+    * These are sprinkled throughout to enable better reasoning about
+      performance.
+  * @getX@ are regular monadic parsers.
+  * @getXOf@ parse and check equality with a provided value.
+  * Definitions ending with @#@ are called with unboxed values.
+  * Definitions ending with @Unsafe@ are unsafe. Read their documentation before
+    using.
+
+Other points:
+
+  * TODO: The endianness code is currently lying. We blindly assume that our
+    host system is little-endian, and parse in big-endian by parsing normally
+    then "reversing" the resulting integer.
+  * Unless otherwise mentioned, native byte order is utilized.
+-}
+
+module FlatParse.Basic.Integers
+  (
+  -- * Machine integer parsers (native byte order)
+    getWord8, getWord16, getWord32, getWord64
+  , getInt8,  getInt16,  getInt32,  getInt64
+  , getWord, getInt
+
+  -- * Machine integer parsers (explicit endianness)
+  , getWord16le, getWord16be
+  , getWord32le, getWord32be
+  , getWord64le, getWord64be
+  , getInt16le,  getInt16be
+  , getInt32le,  getInt32be
+  , getInt64le,  getInt64be
+
+  -- * Machine integer parsers which assert value
+  , getWord8Of
+
+  -- * Machine integer CPS parsers
+  , withWord8, withWord16, withWord32, withWord64
+  , withInt8,  withInt16,  withInt32,  withInt64
+  , withWord, withInt
+
+  -- * Unsafe machine integer parsers
+  , getWord8Unsafe
+
+  -- * Unsafe machine integer parsers which assert value
+  , getWord8OfUnsafe, getWord16OfUnsafe, getWord32OfUnsafe, getWord64OfUnsafe
+
+  -- * Helper definitions
+  , withSized#, withSizedUnsafe#
+  , getSizedOfUnsafe#
+  ) where
+
+import GHC.Exts
+import GHC.Word
+import GHC.Int
+
+import FlatParse.Basic.Parser
+import FlatParse.Common.Assorted ( word16ToInt16, word32ToInt32, word64ToInt64 )
+
+--------------------------------------------------------------------------------
+
+-- | Helper for defining CPS parsers for types of a constant byte size (i.e.
+--   machine integers).
+--
+-- Call this with an @indexXYZOffAddr@ primop (e.g.
+-- 'GHC.Exts.indexWord8OffAddr') and the size in bytes of the type you're
+-- parsing.
+withSized#
+    :: Int# -> (Addr# -> Int# -> a) -> (a -> Parser e r) -> Parser e r
+withSized# size# indexOffAddr p = Parser \fp eob buf ->
+    case size# <=# minusAddr# eob buf of
+      0# -> Fail#
+      _  -> runParser# (withSizedUnsafe# size# indexOffAddr p) fp eob buf
+{-# inline withSized# #-}
+
+-- | Unsafe helper for defining CPS parsers for types of a constant byte size
+--   (i.e. machine integers).
+--
+-- The caller must guarantee that the input has enough bytes.
+withSizedUnsafe#
+    :: Int# -> (Addr# -> Int# -> a) -> (a -> Parser e r) -> Parser e r
+withSizedUnsafe# size# indexOffAddr p = Parser \fp eob buf ->
+  let a    = indexOffAddr buf 0#
+      buf' = plusAddr# buf size#
+  in  runParser# (p a) fp eob buf'
+{-# inline withSizedUnsafe# #-}
+
+-- | Parse any 'Word8' (CPS).
+withWord8 :: (Word8 -> Parser e r) -> Parser e r
+withWord8 p = Parser \fp eob buf -> case eqAddr# eob buf of
+  1# -> Fail#
+  _  -> let w# = indexWord8OffAddr# buf 0#
+        in  runParser# (p (W8# w#)) fp eob (plusAddr# buf 1#)
+{-# inline withWord8 #-}
+
+-- | Parse any 'Word16' (native byte order) (CPS).
+withWord16 :: (Word16 -> Parser e r) -> Parser e r
+withWord16 = withSized# 2# (\a i -> W16# (indexWord16OffAddr# a i))
+{-# inline withWord16 #-}
+
+-- | Parse any 'Word32' (native byte order) (CPS).
+withWord32 :: (Word32 -> Parser e r) -> Parser e r
+withWord32 = withSized# 4# (\a i -> W32# (indexWord32OffAddr# a i))
+{-# inline withWord32 #-}
+
+-- | Parse any 'Word64' (native byte order) (CPS).
+withWord64 :: (Word64 -> Parser e r) -> Parser e r
+withWord64 = withSized# 8# (\a i -> W64# (indexWord64OffAddr# a i))
+{-# inline withWord64 #-}
+
+-- | Parse any 'Int8' (CPS).
+withInt8 :: (Int8 -> Parser e r) -> Parser e r
+withInt8 p = Parser \fp eob buf -> case eqAddr# eob buf of
+  1# -> Fail#
+  _  -> let i# = indexInt8OffAddr# buf 0#
+        in  runParser# (p (I8# i#)) fp eob (plusAddr# buf 1#)
+{-# inline withInt8 #-}
+
+-- | Parse any 'Int16' (native byte order) (CPS).
+withInt16 :: (Int16 -> Parser e r) -> Parser e r
+withInt16 = withSized# 2# (\a i -> I16# (indexInt16OffAddr# a i))
+{-# inline withInt16 #-}
+
+-- | Parse any 'Int32' (native byte order) (CPS).
+withInt32 :: (Int32 -> Parser e r) -> Parser e r
+withInt32 = withSized# 4# (\a i -> I32# (indexInt32OffAddr# a i))
+{-# inline withInt32 #-}
+
+-- | Parse any 'Int64' (native byte order) (CPS).
+withInt64 :: (Int64 -> Parser e r) -> Parser e r
+withInt64 = withSized# 8# (\a i -> I64# (indexInt64OffAddr# a i))
+{-# inline withInt64 #-}
+
+-- | Parse any 'Word' (CPS).
+--
+-- TODO assumes 64-bit platform
+withWord :: (Word -> Parser e r) -> Parser e r
+withWord p = Parser \fp eob buf -> case 8# <=# minusAddr# eob buf of
+  0# -> Fail#
+  _  -> let w# = indexWordOffAddr# buf 0#
+        in  runParser# (p (W# w#)) fp eob (plusAddr# buf 8#)
+{-# inline withWord #-}
+
+-- | Parse any 'Int' (CPS).
+--
+-- TODO assumes 64-bit platform
+withInt :: (Int -> Parser e r) -> Parser e r
+withInt p = Parser \fp eob buf -> case 8# <=# minusAddr# eob buf of
+  0# -> Fail#
+  _  -> let i# = indexIntOffAddr# buf 0#
+        in  runParser# (p (I# i#)) fp eob (plusAddr# buf 8#)
+{-# inline withInt #-}
+
+--------------------------------------------------------------------------------
+
+-- | Parse any 'Word8'.
+getWord8 :: Parser e Word8
+getWord8 = withWord8 pure
+{-# inline getWord8 #-}
+
+-- | Parse any 'Word16' (native byte order).
+getWord16 :: Parser e Word16
+getWord16 = withWord16 pure
+{-# inline getWord16 #-}
+
+-- | Parse any 'Word32' (native byte order).
+getWord32 :: Parser e Word32
+getWord32 = withWord32 pure
+{-# inline getWord32 #-}
+
+-- | Parse any 'Word64' (native byte order).
+getWord64 :: Parser e Word64
+getWord64 = withWord64 pure
+{-# inline getWord64 #-}
+
+-- | Parse any 'Int8'.
+getInt8 :: Parser e Int8
+getInt8 = withInt8 pure
+{-# inline getInt8 #-}
+
+-- | Parse any 'Int16' (native byte order).
+getInt16 :: Parser e Int16
+getInt16 = withInt16 pure
+{-# inline getInt16 #-}
+
+-- | Parse any 'Int32' (native byte order).
+getInt32 :: Parser e Int32
+getInt32 = withInt32 pure
+{-# inline getInt32 #-}
+
+-- | Parse any 'Int64' (native byte order).
+getInt64 :: Parser e Int64
+getInt64 = withInt64 pure
+{-# inline getInt64 #-}
+
+-- | Parse any 'Word'.
+--
+-- TODO 'withWord' assumes 64-bit platform
+getWord :: Parser e Word
+getWord = withWord pure
+{-# inline getWord #-}
+
+-- | Parse any 'Int'.
+--
+-- TODO 'withInt' assumes 64-bit platform
+getInt :: Parser e Int
+getInt = withInt pure
+{-# inline getInt #-}
+
+--------------------------------------------------------------------------------
+
+-- | Parse any 'Word16' (little-endian).
+getWord16le :: Parser e Word16
+getWord16le = getWord16
+{-# inline getWord16le #-}
+
+-- | Parse any 'Word16' (big-endian).
+getWord16be :: Parser e Word16
+getWord16be = withWord16 (pure . byteSwap16)
+{-# inline getWord16be #-}
+
+-- | Parse any 'Word32' (little-endian).
+getWord32le :: Parser e Word32
+getWord32le = getWord32
+{-# inline getWord32le #-}
+
+-- | Parse any 'Word32' (big-endian).
+getWord32be :: Parser e Word32
+getWord32be = withWord32 (pure . byteSwap32)
+{-# inline getWord32be #-}
+
+-- | Parse any 'Word64' (little-endian).
+getWord64le :: Parser e Word64
+getWord64le = getWord64
+{-# inline getWord64le #-}
+
+-- | Parse any 'Word64' (big-endian).
+getWord64be :: Parser e Word64
+getWord64be = withWord64 (pure . byteSwap64)
+{-# inline getWord64be #-}
+
+-- | Parse any 'Int16' (little-endian).
+getInt16le :: Parser e Int16
+getInt16le = getInt16
+{-# inline getInt16le #-}
+
+-- | Parse any 'Int16' (big-endian).
+getInt16be :: Parser e Int16
+getInt16be = withWord16 (pure . word16ToInt16 . byteSwap16)
+{-# inline getInt16be #-}
+
+-- | Parse any 'Int32' (little-endian).
+getInt32le :: Parser e Int32
+getInt32le = getInt32
+{-# inline getInt32le #-}
+
+-- | Parse any 'Int32' (big-endian).
+getInt32be :: Parser e Int32
+getInt32be = withWord32 (pure . word32ToInt32 . byteSwap32)
+{-# inline getInt32be #-}
+
+-- | Parse any 'Int64' (little-endian).
+getInt64le :: Parser e Int64
+getInt64le = getInt64
+{-# inline getInt64le #-}
+
+-- | Parse any 'Int64' (big-endian).
+getInt64be :: Parser e Int64
+getInt64be = withWord64 (pure . word64ToInt64 . byteSwap64)
+{-# inline getInt64be #-}
+
+--------------------------------------------------------------------------------
+
+-- | Unsafe helper for defining parsers for types of a constant byte size (i.e.
+--   machine integers) which assert the parsed value's... value.
+--
+-- Call this with an @indexXYZOffAddr@ primop (e.g.
+-- 'GHC.Exts.indexWord8OffAddr'), the size in bytes of the type you're parsing,
+-- and the expected value to test the parsed value against.
+--
+-- The caller must guarantee that the input has enough bytes.
+getSizedOfUnsafe# :: Eq a => Int# -> (Addr# -> Int# -> a) -> a -> Parser e ()
+getSizedOfUnsafe# size# indexOffAddr aExpected =
+    withSizedUnsafe# size# indexOffAddr go
+  where
+    go aParsed =
+        if   aParsed == aExpected
+        then pure ()
+        else failed
+{-# inline getSizedOfUnsafe# #-}
+
+-- | Read the next 1 byte and assert its value as a 'Word8'.
+getWord8Of :: Word8 -> Parser e ()
+getWord8Of wExpected = Parser \fp eob buf -> case eqAddr# eob buf of
+  1# -> Fail#
+  _  -> let w# = indexWord8OffAddr# buf 0#
+        in  if   W8# w# == wExpected
+            then OK# () (plusAddr# buf 1#)
+            else Fail#
+{-# inline getWord8Of #-}
+
+-- | Unsafely read the next 1 byte and assert its value as a 'Word8'.
+--
+-- The caller must guarantee that the input has enough bytes.
+getWord8OfUnsafe :: Word8 -> Parser e ()
+getWord8OfUnsafe = getSizedOfUnsafe# 1# (\a i -> W8# (indexWord8OffAddr# a i))
+{-# inline getWord8OfUnsafe #-}
+
+-- | Unsafely read the next 2 bytes and assert their value as a 'Word16'.
+--
+-- The caller must guarantee that the input has enough bytes.
+getWord16OfUnsafe :: Word16 -> Parser e ()
+getWord16OfUnsafe = getSizedOfUnsafe# 2# (\a i -> W16# (indexWord16OffAddr# a i))
+{-# inline getWord16OfUnsafe #-}
+
+-- | Unsafely read the next 4 bytes and assert their value as a 'Word32'.
+--
+-- The caller must guarantee that the input has enough bytes.
+getWord32OfUnsafe :: Word32 -> Parser e ()
+getWord32OfUnsafe = getSizedOfUnsafe# 4# (\a i -> W32# (indexWord32OffAddr# a i))
+{-# inline getWord32OfUnsafe #-}
+
+-- | Unsafely read the next 8 bytes and assert their value as a 'Word64'.
+--
+-- The caller must guarantee that the input has enough bytes.
+getWord64OfUnsafe :: Word64 -> Parser e ()
+getWord64OfUnsafe = getSizedOfUnsafe# 8# (\a i -> W64# (indexWord64OffAddr# a i))
+{-# inline getWord64OfUnsafe #-}
+
+--------------------------------------------------------------------------------
+
+-- | Unsafely parse any 'Word8', without asserting the input is non-empty.
+--
+-- The caller must guarantee that the input has enough bytes.
+getWord8Unsafe :: Parser e Word8
+getWord8Unsafe = withSizedUnsafe# 1# (\a i -> W8# (indexWord8OffAddr# a i)) pure
+{-# inline getWord8Unsafe #-}
diff --git a/src/FlatParse/Basic/Internal.hs b/src/FlatParse/Basic/Internal.hs
new file mode 100644
index 0000000..c8fee49
--- /dev/null
+++ b/src/FlatParse/Basic/Internal.hs
@@ -0,0 +1,197 @@
+{-# language UnboxedTuples #-}
+
+module FlatParse.Basic.Internal where
+
+import FlatParse.Basic.Parser
+
+import GHC.Exts
+import GHC.Word
+import GHC.ForeignPtr
+
+import qualified Data.ByteString as B
+import qualified Data.ByteString.Internal as B
+
+--------------------------------------------------------------------------------
+
+-- | Save the parsing state, then run a parser, then restore the state.
+lookahead :: Parser e a -> Parser e a
+lookahead (Parser f) = Parser \fp eob s ->
+  case f fp eob s of
+    OK# a _ -> OK# a s
+    x       -> x
+{-# inline lookahead #-}
+
+--------------------------------------------------------------------------------
+
+-- | Check that the input has at least the given number of bytes.
+ensureBytes# :: Int -> Parser e ()
+ensureBytes# (I# len) = Parser \fp eob s ->
+  case len  <=# minusAddr# eob s of
+    1# -> OK# () s
+    _  -> Fail#
+{-# inline ensureBytes# #-}
+
+scanPartial64# :: Int -> Word -> Parser e ()
+scanPartial64# (I# len) (W# w) = Parser \fp eob s ->
+  case indexWordOffAddr# s 0# of
+    w' -> case uncheckedIShiftL# (8# -# len) 3# of
+      sh -> case uncheckedShiftL# w' sh of
+        w' -> case uncheckedShiftRL# w' sh of
+          w' -> case eqWord# w w' of
+            1# -> OK# () (plusAddr# s len)
+            _  -> Fail#
+{-# inline scanPartial64# #-}
+
+-- | Decrease the current input position by the given number of bytes.
+setBack# :: Int -> Parser e ()
+setBack# (I# i) = Parser \fp eob s ->
+  OK# () (plusAddr# s (negateInt# i))
+{-# inline setBack# #-}
+
+--------------------------------------------------------------------------------
+-- Helpers for common internal operations
+
+-- | Assert for the given 'Int#' that @n >= 0@, and pass it on to the given
+--   function.
+--
+-- Throws a runtime error if given a negative integer.
+withPosInt# :: Int# -> (Int# -> a) -> a
+withPosInt# n# f = case n# >=# 0# of
+  1# -> f n#
+  _  -> error "FlatParse.Basic.Internal.withPosInt#: negative integer"
+{-# inline withPosInt# #-}
+
+-- | Run the given parser only if we have not yet reached the end of the buffer.
+withNotEob :: Parser e a -> Parser e a
+withNotEob (Parser p) = Parser \fp eob s -> case eqAddr# eob s of
+  1# -> Fail#
+  _  -> p fp eob s
+{-# inline withNotEob #-}
+
+--------------------------------------------------------------------------------
+-- Low level unboxed combinators
+
+-- | Read the given number of bytes as a 'ByteString'.
+--
+-- Throws a runtime error if given a negative integer.
+takeBs# :: Int# -> Parser e B.ByteString
+takeBs# n# = withPosInt# n# takeBsUnsafe#
+{-# inline takeBs# #-}
+
+-- | Read the given number of bytes as a 'ByteString'.
+--
+-- Undefined behaviour if given a negative integer.
+takeBsUnsafe# :: Int# -> Parser e B.ByteString
+takeBsUnsafe# n# = Parser \fp eob s -> case n# <=# minusAddr# eob s of
+  1# -> OK# (B.PS (ForeignPtr s fp) 0 (I# n#)) (plusAddr# s n#)
+  _  -> Fail#
+{-# inline takeBsUnsafe# #-}
+
+-- | Skip forward @n@ bytes and run the given parser. Fails if fewer than @n@
+--   bytes are available.
+--
+-- Throws a runtime error if given a negative integer.
+atSkip# :: Int# -> Parser e a -> Parser e a
+atSkip# os# p = withPosInt# os# (\n# -> atSkipUnsafe# n# p)
+{-# inline atSkip# #-}
+
+-- | Skip forward @n@ bytes and run the given parser. Fails if fewer than @n@
+--   bytes are available.
+--
+-- Undefined behaviour if given a negative integer.
+atSkipUnsafe# :: Int# -> Parser e a -> Parser e a
+atSkipUnsafe# os# (Parser p) = Parser \fp eob s -> case os# <=# minusAddr# eob s of
+  1# -> p fp eob (plusAddr# s os#)
+  _  -> Fail#
+{-# inline atSkipUnsafe# #-}
+
+-- | Skip forward @n@ bytes. Fails if fewer than @n@ bytes are available.
+--
+-- Throws a runtime error if given a negative integer.
+skip# :: Int# -> Parser e ()
+skip# os# = atSkip# os# (pure ())
+{-# inline skip# #-}
+
+-- | Run a parser, passing it the current address the parser is at.
+--
+-- Useful for parsing offset-based data tables. For example, you may use this to
+-- save the base address to use together with various 0-indexed offsets.
+withAddr# :: (Addr# -> Parser e a) -> Parser e a
+withAddr# p = Parser \fp eob s -> runParser# (p s) fp eob s
+{-# inline withAddr# #-}
+
+-- | @takeBsOffAddr# addr# offset# len#@ moves to @addr#@, skips @offset#@
+--   bytes, reads @len#@ bytes into a 'ByteString', and restores the original
+--   address.
+--
+-- The 'Addr#' should be from 'withAddr#'.
+--
+-- Useful for parsing offset-based data tables. For example, you may use this
+-- together with 'withAddr#' to jump to an offset in your input and read some
+-- data.
+takeBsOffAddr# :: Addr# -> Int# -> Int# -> Parser e B.ByteString
+takeBsOffAddr# addr# offset# len# =
+    lookaheadFromAddr# addr# $ atSkip# offset# $ takeBs# len#
+{-# inline takeBsOffAddr# #-}
+
+-- | 'lookahead', but specify the address to lookahead from.
+--
+-- The 'Addr#' should be from 'withAddr#'.
+lookaheadFromAddr# :: Addr# -> Parser e a -> Parser e a
+lookaheadFromAddr# s = lookahead . atAddr# s
+{-# inline lookaheadFromAddr# #-}
+
+-- | Run a parser at the given address.
+--
+-- The 'Addr#' should be from 'withAddr#'.
+--
+-- This is a highly internal function -- you likely want 'lookaheadFromAddr#',
+-- which will reset the address after running the parser.
+atAddr# :: Addr# -> Parser e a -> Parser e a
+atAddr# s (Parser p) = Parser \fp eob _ -> p fp eob s
+{-# inline atAddr# #-}
+
+--------------------------------------------------------------------------------
+-- Low-level boxed combinators
+
+-- | Read a null-terminated bytestring (a C-style string).
+--
+-- Consumes the null terminator.
+getCString :: Parser e B.ByteString
+getCString = Parser \fp eob s -> go' fp eob s
+  where
+    go' fp eob s0 = go 0# s0
+      where
+        go n# s = case eqAddr# eob s of
+          1# -> Fail#
+          _  ->
+            let s' = plusAddr# s 1#
+                w# = indexWord8OffAddr# s 0#
+            in  if   W8# w# == 0x00
+                then OK# (B.PS (ForeignPtr s0 fp) 0 (I# n#)) s'
+                else go (n# +# 1#) s'
+{-# inline getCString #-}
+
+-- | Read a null-terminated bytestring (a C-style string), where the bytestring
+--   is known to be null-terminated somewhere in the input.
+--
+-- Undefined behaviour if your bytestring isn't null-terminated somewhere.
+-- You almost certainly want 'getCString' instead.
+--
+-- Fails on GHC versions older than 9.0, since we make use of the
+-- 'cstringLength#' primop introduced in GHC 9.0, and we aren't very useful
+-- without it.
+--
+-- Consumes the null terminator.
+getCStringUnsafe :: Parser e B.ByteString
+{-# inline getCStringUnsafe #-}
+#if MIN_VERSION_base(4,15,0)
+getCStringUnsafe = Parser \fp eob s ->
+  case eqAddr# eob s of
+    1# -> Fail#
+    _  -> let n#  = cstringLength# s
+              s'# = plusAddr# s (n# +# 1#)
+           in OK# (B.PS (ForeignPtr s fp) 0 (I# n#)) s'#
+#else
+getCStringUnsafe = error "Flatparse.Basic.getCStringUnsafe: requires GHC 9.0 / base-4.15, not available on this compiler"
+#endif
diff --git a/src/FlatParse/Basic/Parser.hs b/src/FlatParse/Basic/Parser.hs
new file mode 100644
index 0000000..a3117cd
--- /dev/null
+++ b/src/FlatParse/Basic/Parser.hs
@@ -0,0 +1,135 @@
+{-# language UnboxedTuples #-}
+
+module FlatParse.Basic.Parser
+  (
+  -- * Parser & result types
+    Parser(..)
+  , Res#
+  , pattern OK#, pattern Err#, pattern Fail#
+
+  -- * Primitive combinators
+  , failed
+  ) where
+
+import GHC.Exts ( Addr#, unsafeCoerce# )
+import GHC.ForeignPtr ( ForeignPtrContents )
+
+import qualified Control.Applicative as Base
+import Control.Monad ( MonadPlus(..) )
+
+-- | @Parser e a@ has an error type @e@ and a return type @a@.
+newtype Parser e a = Parser {runParser# :: ForeignPtrContents -> Addr# -> Addr# -> Res# e a}
+
+instance Functor (Parser e) where
+  fmap f (Parser g) = Parser \fp eob s -> case g fp eob s of
+    OK# a s -> let !b = f a in OK# b s
+    x       -> unsafeCoerce# x
+  {-# inline fmap #-}
+
+  (<$) a' (Parser g) = Parser \fp eob s -> case g fp eob s of
+    OK# a s -> OK# a' s
+    x       -> unsafeCoerce# x
+  {-# inline (<$) #-}
+
+instance Applicative (Parser e) where
+  pure a = Parser \fp eob s -> OK# a s
+  {-# inline pure #-}
+  Parser ff <*> Parser fa = Parser \fp eob s -> case ff fp eob s of
+    OK# f s -> case fa fp eob s of
+      OK# a s  -> let !b = f a in OK# b s
+      x        -> unsafeCoerce# x
+    x -> unsafeCoerce# x
+  {-# inline (<*>) #-}
+  Parser fa <* Parser fb = Parser \fp eob s -> case fa fp eob s of
+    OK# a s   -> case fb fp eob s of
+      OK# b s -> OK# a s
+      x -> unsafeCoerce# x
+    x -> unsafeCoerce# x
+  {-# inline (<*) #-}
+  Parser fa *> Parser fb = Parser \fp eob s -> case fa fp eob s of
+    OK# a s -> fb fp eob s
+    x       -> unsafeCoerce# x
+  {-# inline (*>) #-}
+
+instance Monad (Parser e) where
+  return = pure
+  {-# inline return #-}
+  Parser fa >>= f = Parser \fp eob s -> case fa fp eob s of
+    OK# a s -> runParser# (f a) fp eob s
+    x       -> unsafeCoerce# x
+  {-# inline (>>=) #-}
+  (>>) = (*>)
+  {-# inline (>>) #-}
+
+instance Base.Alternative (Parser e) where
+  empty = failed
+  {-# inline empty #-}
+
+  (<|>) = (<|>)
+  {-# inline (Base.<|>) #-}
+
+  -- TODO more efficient than default? (we do want to inline, so)
+  many (Parser f) = Parser go where
+    go fp eob s = case f fp eob s of
+      OK# a s -> case go fp eob s of
+                   OK# as s -> OK# (a:as) s
+                   x        -> x
+      Fail#  -> OK# [] s
+      Err# e -> Err# e
+  {-# inline many #-}
+
+  some p = (:) <$> p <*> Base.many p
+  {-# inline some #-}
+
+-- | The failing parser. By default, parser choice `(<|>)` arbitrarily backtracks
+--   on parser failure.
+failed :: Parser e a
+failed = Parser \fp eob s -> Fail#
+{-# inline failed #-}
+
+infixr 6 <|>
+(<|>) :: Parser e a -> Parser e a -> Parser e a
+(<|>) (Parser f) (Parser g) = Parser \fp eob s ->
+  case f fp eob s of
+    Fail# -> g fp eob s
+    x     -> x
+{-# inline[1] (<|>) #-}
+
+{-# RULES
+
+"flatparse/reassoc-alt" forall l m r. (l <|> m) <|> r = l <|> (m <|> r)
+
+#-}
+
+instance MonadPlus (Parser e) where
+  mzero = failed
+  {-# inline mzero #-}
+  mplus = (<|>)
+  {-# inline mplus #-}
+
+--------------------------------------------------------------------------------
+
+-- | Primitive result of a parser, stored using an unboxed sum.
+--
+-- Possible results are given by the pattern synonyms 'OK#', 'Fail#' and
+-- 'Error#' (in that order).
+type Res# e a =
+  (#
+    (# a, Addr# #)
+  | (# #)
+  | (# e #)
+  #)
+
+-- | 'Res#' constructor for a successful parse. Contains return value and a
+--   pointer to the rest of the input buffer.
+pattern OK# :: a -> Addr# -> Res# e a
+pattern OK# a s = (# (# a, s #) | | #)
+
+-- | 'Res#' constructor for recoverable failure.
+pattern Fail# :: Res# e a
+pattern Fail# = (# | (# #) | #)
+
+-- | 'Res#' constructor for errors which are by default non-recoverable.
+pattern Err# :: e -> Res# e a
+pattern Err# e = (# | | (# e #) #)
+{-# complete OK#, Fail#, Err# #-}
diff --git a/src/FlatParse/Basic/Position.hs b/src/FlatParse/Basic/Position.hs
new file mode 100644
index 0000000..f566610
--- /dev/null
+++ b/src/FlatParse/Basic/Position.hs
@@ -0,0 +1,71 @@
+module FlatParse.Basic.Position where
+
+import FlatParse.Basic.Parser
+import FlatParse.Common.Position
+
+import qualified Data.ByteString as B
+import qualified Data.ByteString.Internal as B
+
+import GHC.Exts
+import GHC.ForeignPtr
+
+-- | Get the current position in the input.
+getPos :: Parser e Pos
+getPos = Parser \fp eob s -> OK# (addrToPos# eob s) s
+{-# inline getPos #-}
+
+-- | Set the input position. Warning: this can result in crashes if the position points outside the
+--   current buffer. It is always safe to `setPos` values which came from `getPos` with the current
+--   input.
+setPos :: Pos -> Parser e ()
+setPos s = Parser \fp eob _ -> OK# () (posToAddr# eob s)
+{-# inline setPos #-}
+
+-- | The end of the input.
+endPos :: Pos
+endPos = Pos 0
+{-# inline endPos #-}
+
+-- | Return the consumed span of a parser.
+spanOf :: Parser e a -> Parser e Span
+spanOf (Parser f) = Parser \fp eob s -> case f fp eob s of
+  OK# a s' -> OK# (Span (addrToPos# eob s) (addrToPos# eob s')) s'
+  x        -> unsafeCoerce# x
+{-# inline spanOf #-}
+
+-- | Bind the result together with the span of the result. CPS'd version of
+--   `spanOf` for better unboxing.
+withSpan :: Parser e a -> (a -> Span -> Parser e r) -> Parser e r
+withSpan (Parser f) g = Parser \fp eob s -> case f fp eob s of
+  OK# a s' -> runParser# (g a (Span (addrToPos# eob s) (addrToPos# eob s'))) fp eob s'
+  x        -> unsafeCoerce# x
+{-# inline withSpan #-}
+
+-- | Return the `B.ByteString` consumed by a parser. Note: it's more efficient
+--   to use `spanOf` and `withSpan` instead.
+byteStringOf :: Parser e a -> Parser e B.ByteString
+byteStringOf (Parser f) = Parser \fp eob s -> case f fp eob s of
+  OK# a s' -> OK# (B.PS (ForeignPtr s fp) 0 (I# (minusAddr# s' s))) s'
+  x        -> unsafeCoerce# x
+{-# inline byteStringOf #-}
+
+-- | CPS'd version of `byteStringOf`. Can be more efficient, because the result
+--   is more eagerly unboxed by GHC. It's more efficient to use `spanOf` or
+--   `withSpan` instead.
+withByteString :: Parser e a -> (a -> B.ByteString -> Parser e r) -> Parser e r
+withByteString (Parser f) g = Parser \fp eob s -> case f fp eob s of
+  OK# a s' -> runParser# (g a (B.PS (ForeignPtr s fp) 0 (I# (minusAddr# s' s)))) fp eob s'
+  x        -> unsafeCoerce# x
+{-# inline withByteString #-}
+
+-- | Run a parser in a given input span. The input position and the `Int` state is restored after
+--   the parser is finished, so `inSpan` does not consume input and has no side effect.  Warning:
+--   this operation may crash if the given span points outside the current parsing buffer. It's
+--   always safe to use `inSpan` if the span comes from a previous `withSpan` or `spanOf` call on
+--   the current input.
+inSpan :: Span -> Parser e a -> Parser e a
+inSpan (Span s eob) (Parser f) = Parser \fp eob' s' ->
+  case f fp (posToAddr# eob' eob) (posToAddr# eob' s) of
+    OK# a _ -> OK# a s'
+    x       -> unsafeCoerce# x
+{-# inline inSpan #-}
diff --git a/src/FlatParse/Common/Assorted.hs b/src/FlatParse/Common/Assorted.hs
new file mode 100644
index 0000000..140351b
--- /dev/null
+++ b/src/FlatParse/Common/Assorted.hs
@@ -0,0 +1,135 @@
+{-# language UnboxedTuples #-}
+
+module FlatParse.Common.Assorted
+  (
+  -- * Compatibility
+    shortInteger
+
+  -- * 'Char' predicates
+  , isDigit, isLatinLetter, isGreekLetter
+
+  -- * UTF conversions
+  , packUTF8, charToBytes, strToBytes, packBytes, splitBytes, derefChar8#
+
+  -- * Boxed integer coercions
+  -- $boxed-integer-coercion
+  , word16ToInt16
+  , word32ToInt32
+  , word64ToInt64
+  ) where
+
+import Data.Bits
+import Data.Char ( ord )
+import Data.Foldable (foldl')
+import GHC.Exts
+
+import qualified Data.ByteString as B
+
+import Data.Word
+import Data.Int
+
+#if MIN_VERSION_base(4,15,0)
+import GHC.Num.Integer (Integer(..))
+#else
+import GHC.Integer.GMP.Internals (Integer(..))
+#endif
+
+-- Compatibility
+--------------------------------------------------------------------------------
+
+shortInteger :: Int# -> Integer
+#if MIN_VERSION_base(4,15,0)
+shortInteger = IS
+#else
+shortInteger = S#
+#endif
+{-# inline shortInteger #-}
+
+
+-- Char predicates
+--------------------------------------------------------------------------------
+
+-- | @isDigit c = \'0\' <= c && c <= \'9\'@
+isDigit :: Char -> Bool
+isDigit c = '0' <= c && c <= '9'
+{-# inline isDigit #-}
+
+-- | @isLatinLetter c = (\'A\' <= c && c <= \'Z\') || (\'a\' <= c && c <= \'z\')@
+isLatinLetter :: Char -> Bool
+isLatinLetter c = ('A' <= c && c <= 'Z') || ('a' <= c && c <= 'z')
+{-# inline isLatinLetter #-}
+
+-- | @isGreekLetter c = (\'Α\' <= c && c <= \'Ω\') || (\'α\' <= c && c <= \'ω\')@
+isGreekLetter :: Char -> Bool
+isGreekLetter c = ('Α' <= c && c <= 'Ω') || ('α' <= c && c <= 'ω')
+{-# inline isGreekLetter #-}
+
+-- UTF conversions
+--------------------------------------------------------------------------------
+
+-- | Convert a `String` to an UTF-8-coded `B.ByteString`.
+packUTF8 :: String -> B.ByteString
+packUTF8 str = B.pack $ do
+  c <- str
+  w <- charToBytes c
+  pure (fromIntegral w)
+
+charToBytes :: Char -> [Word]
+charToBytes c'
+    | c <= 0x7f     = [fromIntegral c]
+    | c <= 0x7ff    = [0xc0 .|. y, 0x80 .|. z]
+    | c <= 0xffff   = [0xe0 .|. x, 0x80 .|. y, 0x80 .|. z]
+    | c <= 0x10ffff = [0xf0 .|. w, 0x80 .|. x, 0x80 .|. y, 0x80 .|. z]
+    | otherwise = error "Not a valid Unicode code point"
+  where
+    c = ord c'
+    z = fromIntegral (c                 .&. 0x3f)
+    y = fromIntegral (unsafeShiftR c 6  .&. 0x3f)
+    x = fromIntegral (unsafeShiftR c 12 .&. 0x3f)
+    w = fromIntegral (unsafeShiftR c 18 .&. 0x7)
+
+strToBytes :: String -> [Word]
+strToBytes = concatMap charToBytes
+{-# inline strToBytes #-}
+
+packBytes :: [Word] -> Word
+packBytes = fst . foldl' go (0, 0) where
+  go (acc, shift) w | shift == 64 = error "packWords: too many bytes"
+  go (acc, shift) w = (unsafeShiftL (fromIntegral w) shift .|. acc, shift+8)
+
+-- TODO chunks into 8-bytes for 64-bit performance
+splitBytes :: [Word] -> ([Word], [Word])
+splitBytes ws = case quotRem (length ws) 8 of
+  (0, _) -> (ws, [])
+  (_, r) -> (as, chunk8s bs) where
+              (as, bs) = splitAt r ws
+              chunk8s [] = []
+              chunk8s ws = let (as, bs) = splitAt 8 ws in
+                           packBytes as : chunk8s bs
+
+derefChar8# :: Addr# -> Char#
+derefChar8# addr = indexCharOffAddr# addr 0#
+{-# inline derefChar8# #-}
+
+--------------------------------------------------------------------------------
+
+{- $boxed-integer-coercion
+
+These functions should be no-ops. They correspond to the similarly-named GHC 9.4
+primops which work on unboxed integers.
+-}
+
+-- | Coerce a 'Word16' to 'Int16'.
+word16ToInt16 :: Word16 -> Int16
+word16ToInt16 = fromIntegral
+{-# inline word16ToInt16 #-}
+
+-- | Coerce a 'Word32' to 'Int32'.
+word32ToInt32 :: Word32 -> Int32
+word32ToInt32 = fromIntegral
+{-# inline word32ToInt32 #-}
+
+-- | Coerce a 'Word64' to 'Int64'.
+word64ToInt64 :: Word64 -> Int64
+word64ToInt64 = fromIntegral
+{-# inline word64ToInt64 #-}
diff --git a/src/FlatParse/Common/Numbers.hs b/src/FlatParse/Common/Numbers.hs
new file mode 100644
index 0000000..fbf6826
--- /dev/null
+++ b/src/FlatParse/Common/Numbers.hs
@@ -0,0 +1,70 @@
+{-# language UnboxedTuples #-}
+
+module FlatParse.Common.Numbers where
+
+import FlatParse.Common.Assorted ( shortInteger )
+
+import GHC.Exts
+import GHC.Word
+import GHC.ForeignPtr
+
+import qualified Data.ByteString.Char8 as BC8
+import qualified Data.ByteString.Internal as B
+
+mul10 :: Int# -> Int#
+mul10 n = uncheckedIShiftL# n 3# +# uncheckedIShiftL# n 1#
+{-# inline mul10 #-}
+
+readInt' :: Int# -> Addr# -> Addr# -> (# Int#, Addr# #)
+readInt' acc s end = case eqAddr# s end of
+  1# -> (# acc, s #)
+  _  ->
+    let w# = indexWord8OffAddr# s 0#
+    in  if   0x30 <= W8# w# && W8# w# <= 0x39
+        then
+          let !(I# w'#) = fromIntegral (W8# w#)
+          in  readInt' (mul10 acc +# (w'# -# 0x30#)) (plusAddr# s 1#) end
+        else (# acc, s #)
+
+-- | Read an `Int` from the input, as a non-empty digit sequence. The `Int` may
+--   overflow in the result.
+readInt :: Addr# -> Addr# -> (# (##) | (# Int#, Addr# #) #)
+readInt eob s = case readInt' 0# s eob of
+  (# n, s' #) | 1# <- eqAddr# s s' -> (# (##) | #)
+              | otherwise          -> (# | (# n, s' #) #)
+{-# inline readInt #-}
+
+-- | Read an `Integer` from the input, as a non-empty digit sequence.
+readInteger :: ForeignPtrContents -> Addr# -> Addr# -> (# (##) | (# Integer, Addr# #) #)
+readInteger fp eob s = case readInt' 0# s eob of
+  (# n, s' #)
+    | 1# <- eqAddr# s s'            -> (# (##) | #)
+    | 1# <- minusAddr# s' s <=# 18# -> (# | (# shortInteger n, s' #) #)
+    | otherwise -> case BC8.readInteger (B.PS (ForeignPtr s fp) 0 (I# (minusAddr# s' s))) of
+        Nothing     -> (# (##) | #)
+        Just (i, _) -> (# | (# i, s' #) #)
+{-# inline readInteger #-}
+
+readIntHex' :: Int# -> Addr# -> Addr# -> (# Int#, Addr# #)
+readIntHex' acc s end = case eqAddr# s end of
+  1# -> (# acc, s #)
+  _  ->
+    let w# = indexWord8OffAddr# s 0#
+    in  if 0x30 <= W8# w# && W8# w# <= 0x39
+        then let !(I# w'#) = fromIntegral (W8# w#)
+             in  readIntHex' (uncheckedIShiftL# acc 4# +# (w'# -# 0x30#)) (plusAddr# s 1#) end
+        else if 0x41 <= W8# w# && W8# w# <= 0x46
+        then let !(I# w'#) = fromIntegral (W8# w#)
+             in  readIntHex' (uncheckedIShiftL# acc 4# +# (w'# -# 0x37#)) (plusAddr# s 1#) end
+        else if 0x61 <= W8# w# && W8# w# <= 0x66
+        then let !(I# w'#) = fromIntegral (W8# w#)
+             in  readIntHex' (uncheckedIShiftL# acc 4# +# (w'# -# 0x57#)) (plusAddr# s 1#) end
+        else (# acc, s #)
+
+-- | Read an `Int` from the input, as a non-empty case-insensitive ASCII
+--   hexadecimal digit sequence. The `Int` may overflow in the result.
+{-# INLINE readIntHex #-}
+readIntHex :: Addr# -> Addr# -> (# (##) | (# Int#, Addr# #) #)
+readIntHex eob s = case readIntHex' 0# s eob of
+    (# n, s' #) | 1# <- eqAddr# s s' -> (# (##) | #)
+                | otherwise          -> (# | (# n, s' #) #)
diff --git a/src/FlatParse/Common/Position.hs b/src/FlatParse/Common/Position.hs
new file mode 100644
index 0000000..14f87a8
--- /dev/null
+++ b/src/FlatParse/Common/Position.hs
@@ -0,0 +1,42 @@
+module FlatParse.Common.Position where
+
+import qualified Data.ByteString as B
+import qualified Data.ByteString.Internal as B
+import GHC.Int
+import GHC.ForeignPtr ( ForeignPtr(..) )
+import GHC.Exts
+
+-- | Byte offset counted backwards from the end of the buffer.
+--
+-- TODO can't use unlifted 'Int#' because of type classes. bugger! some day
+newtype Pos = Pos Int deriving (Eq, Show)
+
+-- | A pair of positions.
+data Span = Span !Pos !Pos deriving (Eq, Show)
+
+instance Ord Pos where
+  Pos p <= Pos p' = p' <= p
+  Pos p <  Pos p' = p' <  p
+  Pos p >  Pos p' = p' >  p
+  Pos p >= Pos p' = p' >= p
+  {-# inline (<=) #-}
+  {-# inline (<) #-}
+  {-# inline (>) #-}
+  {-# inline (>=) #-}
+
+addrToPos# :: Addr# -> Addr# -> Pos
+addrToPos# eob s = Pos (I# (minusAddr# eob s))
+{-# inline addrToPos# #-}
+
+posToAddr# :: Addr# -> Pos -> Addr#
+posToAddr# eob (Pos (I# n)) = unsafeCoerce# (minusAddr# eob (unsafeCoerce# n))
+{-# inline posToAddr# #-}
+
+-- | Slice into a `B.ByteString` using a `Span`. The result is invalid if the `Span`
+--   is not a valid slice of the first argument.
+unsafeSlice :: B.ByteString -> Span -> B.ByteString
+unsafeSlice (B.PS (ForeignPtr addr fp) (I# start) (I# len))
+            (Span (Pos (I# o1)) (Pos (I# o2))) =
+  let end = addr `plusAddr#` start `plusAddr#` len
+  in B.PS (ForeignPtr (plusAddr# end (negateInt# o1)) fp) (I# 0#) (I# (o1 -# o2))
+{-# inline unsafeSlice #-}
diff --git a/src/FlatParse/Common/Trie.hs b/src/FlatParse/Common/Trie.hs
new file mode 100644
index 0000000..49bdbed
--- /dev/null
+++ b/src/FlatParse/Common/Trie.hs
@@ -0,0 +1,85 @@
+module FlatParse.Common.Trie where
+
+import FlatParse.Common.Assorted ( charToBytes )
+
+import qualified Data.Map.Strict as M
+import Data.Map.Strict ( Map )
+import Data.Foldable ( foldl' )
+
+data Trie a = Branch !a !(Map Word (Trie a))
+  deriving Show
+
+type Rule = Maybe Int
+
+nilTrie :: Trie Rule
+nilTrie = Branch Nothing mempty
+
+updRule :: Int -> Maybe Int -> Maybe Int
+updRule rule = Just . maybe rule (min rule)
+
+insert :: Int -> [Word] -> Trie Rule -> Trie Rule
+insert rule = go where
+  go [] (Branch rule' ts) =
+    Branch (updRule rule rule') ts
+  go (c:cs) (Branch rule' ts) =
+    Branch rule' (M.alter (Just . maybe (go cs nilTrie) (go cs)) c ts)
+
+listToTrie :: [(Int, String)] -> Trie Rule
+listToTrie = foldl' (\t (!r, !s) -> insert r (charToBytes =<< s) t) nilTrie
+
+-- | Decorate a trie with the minimum lengths of non-empty paths. This
+--   is used later to place `ensureBytes#`.
+mindepths :: Trie Rule -> Trie (Rule, Int)
+mindepths (Branch rule ts) =
+  if M.null ts then
+    Branch (rule, 0) mempty
+  else
+    let !ts' = M.map mindepths ts in
+    Branch (
+      rule,
+      minimum (M.map (\(Branch (rule,d) _) -> maybe (d + 1) (\_ -> 1) rule) ts'))
+      ts'
+
+data Trie' a
+  = Branch' !a !(Map Word (Trie' a))
+  | Path !a ![Word] !(Trie' a)
+  deriving Show
+
+-- | Compress linear paths.
+pathify :: Trie (Rule, Int) -> Trie' (Rule, Int)
+pathify (Branch a ts) = case M.toList ts of
+  [] -> Branch' a mempty
+  [(w, t)] -> case pathify t of
+           Path (Nothing, _) ws t -> Path a (w:ws) t
+           t                      -> Path a [w] t
+  _   -> Branch' a (M.map pathify ts)
+
+-- | Compute where to fall back after we exhausted a branch. If the branch is
+--   empty, that means we've succeded at reading and we jump to the rhs rule.
+fallbacks :: Trie' (Rule, Int) -> Trie' (Rule, Int, Int)
+fallbacks = go Nothing 0  where
+  go :: Rule -> Int -> Trie' (Rule, Int) -> Trie' (Rule, Int, Int)
+  go !rule !n (Branch' (rule', d) ts)
+    | M.null ts        = Branch' (rule', 0, d) mempty
+    | Nothing <- rule' = Branch' (rule, n, d) (go rule (n + 1) <$> ts)
+    | otherwise        = Branch' (rule', 0, d) (go rule' 1     <$> ts)
+  go rule n (Path (rule', d) ws t)
+    | Nothing <- rule' = Path (rule, n, d)  ws (go rule (n + length ws) t)
+    | otherwise        = Path (rule', 0, d) ws (go rule' (length ws) t)
+
+-- | Decorate with `ensureBytes#` invocations, represented as
+--   `Maybe Int`.
+ensureBytes :: Trie' (Rule, Int, Int) -> Trie' (Rule, Int, Maybe Int)
+ensureBytes = go 0 where
+  go :: Int -> Trie' (Rule, Int, Int) -> Trie' (Rule, Int, Maybe Int)
+  go !res = \case
+    Branch' (r, n, d) ts
+      | M.null ts -> Branch' (r, n, Nothing) mempty
+      | res < 1   -> Branch' (r, n, Just d ) (go (d   - 1) <$> ts)
+      | otherwise -> Branch' (r, n, Nothing) (go (res - 1) <$> ts)
+    Path (r, n, d) ws t -> case length ws of
+      l | res < l   -> Path (r, n, Just $! d - res) ws (go (d - l)   t)
+        | otherwise -> Path (r, n, Nothing        ) ws (go (res - l) t)
+
+compileTrie :: [(Int, String)] -> Trie' (Rule, Int, Maybe Int)
+compileTrie = ensureBytes . fallbacks . pathify . mindepths . listToTrie
diff --git a/src/FlatParse/Examples/BasicLambda/Lexer.hs b/src/FlatParse/Examples/BasicLambda/Lexer.hs
index a80355a..7a3935a 100644
--- a/src/FlatParse/Examples/BasicLambda/Lexer.hs
+++ b/src/FlatParse/Examples/BasicLambda/Lexer.hs
@@ -8,7 +8,7 @@ demonstrates a simple but decently informative implementation of error message p
 
 module FlatParse.Examples.BasicLambda.Lexer where
 
-import FlatParse.Basic hiding (Parser, runParser, string, char, cut)
+import FlatParse.Basic hiding ( Parser, runParser )
 
 import qualified FlatParse.Basic as FP
 import qualified Data.ByteString as B
@@ -117,7 +117,7 @@ testParser p str = case packUTF8 str of
 -- | Parse a line comment.
 lineComment :: Parser ()
 lineComment =
-  withOption anyWord8
+  withOption getWord8
     (\case 10 -> ws
            _  -> lineComment)
     (pure ())
@@ -129,7 +129,7 @@ multilineComment = go (1 :: Int) where
   go n = $(switch [| case _ of
     "-}" -> go (n - 1)
     "{-" -> go (n + 1)
-    _    -> branch anyWord8 (go n) (pure ()) |])
+    _    -> branch getWord8 (go n) (pure ()) |])
 
 -- | Consume whitespace.
 ws :: Parser ()
@@ -173,7 +173,7 @@ isKeyword span = inSpan span do
 
 -- | Parse a non-keyword string.
 symbol :: String -> Q Exp
-symbol str = [| token $(FP.string str) |]
+symbol str = [| token $(FP.getStringOf str) |]
 
 -- | Parser a non-keyword string, throw precise error on failure.
 symbol' :: String -> Q Exp
@@ -181,7 +181,7 @@ symbol' str = [| $(symbol str) `cut'` Lit str |]
 
 -- | Parse a keyword string.
 keyword :: String -> Q Exp
-keyword str = [| token ($(FP.string str) `notFollowedBy` identChar) |]
+keyword str = [| token ($(FP.getStringOf str) `notFollowedBy` identChar) |]
 
 -- | Parse a keyword string, throw precise error on failure.
 keyword' :: String -> Q Exp
diff --git a/src/FlatParse/Examples/BasicLambda/Parser.hs b/src/FlatParse/Examples/BasicLambda/Parser.hs
index 12d84bd..f1db3af 100644
--- a/src/FlatParse/Examples/BasicLambda/Parser.hs
+++ b/src/FlatParse/Examples/BasicLambda/Parser.hs
@@ -11,7 +11,7 @@ module FlatParse.Examples.BasicLambda.Parser where
 import Data.Char (ord)
 import qualified Data.ByteString as B
 
-import FlatParse.Basic hiding (Parser, runParser, string, char, cut)
+import FlatParse.Basic hiding ( Parser, cut )
 import FlatParse.Examples.BasicLambda.Lexer
 
 --------------------------------------------------------------------------------
diff --git a/src/FlatParse/Internal.hs b/src/FlatParse/Internal.hs
deleted file mode 100644
index b2a8117..0000000
--- a/src/FlatParse/Internal.hs
+++ /dev/null
@@ -1,277 +0,0 @@
-{-# language UnboxedTuples #-}
-
-module FlatParse.Internal where
-
-import FlatParse.Internal.UnboxedNumerics
-
-import Data.Bits
-import Data.Char
-import Data.Foldable (foldl')
-import Data.Map (Map)
-import GHC.Exts
-import GHC.ForeignPtr
-
-import qualified Data.ByteString as B
-import qualified Data.ByteString.Char8 as BC8
-import qualified Data.ByteString.Internal as B
-import qualified Data.Map.Strict as M
-
-#if MIN_VERSION_base(4,15,0)
-import GHC.Num.Integer (Integer(..))
-#else
-import GHC.Integer.GMP.Internals (Integer(..))
-#endif
-
--- Compatibility
---------------------------------------------------------------------------------
-
-shortInteger :: Int# -> Integer
-#if MIN_VERSION_base(4,15,0)
-shortInteger = IS
-#else
-shortInteger = S#
-#endif
-{-# inline shortInteger #-}
-
-
--- Char predicates
---------------------------------------------------------------------------------
-
--- | @isDigit c = \'0\' <= c && c <= \'9\'@
-isDigit :: Char -> Bool
-isDigit c = '0' <= c && c <= '9'
-{-# inline isDigit #-}
-
--- | @isLatinLetter c = (\'A\' <= c && c <= \'Z\') || (\'a\' <= c && c <= \'z\')@
-isLatinLetter :: Char -> Bool
-isLatinLetter c = ('A' <= c && c <= 'Z') || ('a' <= c && c <= 'z')
-{-# inline isLatinLetter #-}
-
--- | @isGreekLetter c = (\'Α\' <= c && c <= \'Ω\') || (\'α\' <= c && c <= \'ω\')@
-isGreekLetter :: Char -> Bool
-isGreekLetter c = ('Α' <= c && c <= 'Ω') || ('α' <= c && c <= 'ω')
-{-# inline isGreekLetter #-}
-
--- Int(eger) reading
---------------------------------------------------------------------------------
-
-mul10 :: Int# -> Int#
-mul10 n = uncheckedIShiftL# n 3# +# uncheckedIShiftL# n 1#
-{-# inline mul10 #-}
-
-readInt' :: Int# -> Addr# -> Addr# -> (# Int#, Addr# #)
-readInt' acc s end = case eqAddr# s end of
-  1# -> (# acc, s #)
-  _  -> case indexWord8OffAddr''# s 0# of
-    w | 1# <- leWord8# (wordToWord8''# 0x30##) w, 1# <- leWord8# w (wordToWord8''# 0x39##) ->
-      readInt' (mul10 acc +# (word2Int# (word8ToWord''# w) -# 0x30#)) (plusAddr# s 1#) end
-    _ -> (# acc, s #)
-
-
--- | Read an `Int` from the input, as a non-empty digit sequence. The `Int` may
---   overflow in the result.
-readInt :: Addr# -> Addr# -> (# (##) | (# Int#, Addr# #) #)
-readInt eob s = case readInt' 0# s eob of
-  (# n, s' #) | 1# <- eqAddr# s s' -> (# (##) | #)
-              | otherwise          -> (# | (# n, s' #) #)
-{-# inline readInt #-}
-
-readIntHex' :: Int# -> Addr# -> Addr# -> (# Int#, Addr# #)
-readIntHex' acc s end = case eqAddr# s end of
-  1# -> (# acc, s #)
-  _  -> case indexWord8OffAddr''# s 0# of
-    w | 1# <- leWord8# (wordToWord8''# 0x30##) w
-      , 1# <- leWord8# w (wordToWord8''# 0x39##)
-      -> readIntHex' (uncheckedIShiftL# acc 4# +# (word2Int# (word8ToWord''# w) -# 0x30#)) (plusAddr# s 1#) end
-
-      | 1# <- leWord8# (wordToWord8''# 0x41##) w
-      , 1# <- leWord8# w (wordToWord8''# 0x46##)
-      -> readIntHex' (uncheckedIShiftL# acc 4# +# (word2Int# (word8ToWord''# w) -# 0x37#)) (plusAddr# s 1#) end
-
-      | 1# <- leWord8# (wordToWord8''# 0x61##) w
-      , 1# <- leWord8# w (wordToWord8''# 0x66##)
-      -> readIntHex' (uncheckedIShiftL# acc 4# +# (word2Int# (word8ToWord''# w) -# 0x57#)) (plusAddr# s 1#) end
-    _ -> (# acc, s #)
-
--- | Read an `Int` from the input, as a non-empty case-insensitive ASCII
---   hexadecimal digit sequence. The `Int` may overflow in the result.
-{-# INLINE readIntHex #-}
-readIntHex :: Addr# -> Addr# -> (# (##) | (# Int#, Addr# #) #)
-readIntHex eob s = case readIntHex' 0# s eob of
-    (# n, s' #) | 1# <- eqAddr# s s' -> (# (##) | #)
-                | otherwise          -> (# | (# n, s' #) #)
-
--- | Read an `Integer` from the input, as a non-empty digit sequence.
-readInteger :: ForeignPtrContents -> Addr# -> Addr# -> (# (##) | (# Integer, Addr# #) #)
-readInteger fp eob s = case readInt' 0# s eob of
-  (# n, s' #)
-    | 1# <- eqAddr# s s'            -> (# (##) | #)
-    | 1# <- minusAddr# s' s <=# 18# -> (# | (# shortInteger n, s' #) #)
-    | otherwise -> case BC8.readInteger (B.PS (ForeignPtr s fp) 0 (I# (minusAddr# s' s))) of
-        Nothing     -> (# (##) | #)
-        Just (i, _) -> (# | (# i, s' #) #)
-{-# inline readInteger #-}
-
-
--- Positions and spans
---------------------------------------------------------------------------------
-
--- | Byte offset counted backwards from the end of the buffer.
-newtype Pos = Pos Int deriving (Eq, Show)
-
--- | A pair of positions.
-data Span = Span !Pos !Pos deriving (Eq, Show)
-
-instance Ord Pos where
-  Pos p <= Pos p' = p' <= p
-  Pos p <  Pos p' = p' <  p
-  Pos p >  Pos p' = p' >  p
-  Pos p >= Pos p' = p' >= p
-  {-# inline (<=) #-}
-  {-# inline (<) #-}
-  {-# inline (>) #-}
-  {-# inline (>=) #-}
-
-addrToPos# :: Addr# -> Addr# -> Pos
-addrToPos# eob s = Pos (I# (minusAddr# eob s))
-{-# inline addrToPos# #-}
-
-posToAddr# :: Addr# -> Pos -> Addr#
-posToAddr# eob (Pos (I# n)) = unsafeCoerce# (minusAddr# eob (unsafeCoerce# n))
-{-# inline posToAddr# #-}
-
--- | Slice into a `B.ByteString` using a `Span`. The result is invalid if the `Span`
---   is not a valid slice of the first argument.
-unsafeSlice :: B.ByteString -> Span -> B.ByteString
-unsafeSlice (B.PS (ForeignPtr addr fp) (I# start) (I# len))
-            (Span (Pos (I# o1)) (Pos (I# o2))) =
-  let end = addr `plusAddr#` start `plusAddr#` len
-  in B.PS (ForeignPtr (plusAddr# end (negateInt# o1)) fp) (I# 0#) (I# (o1 -# o2))
-{-# inline unsafeSlice #-}
-
--- UTF conversions
---------------------------------------------------------------------------------
-
--- | Convert a `String` to an UTF-8-coded `B.ByteString`.
-packUTF8 :: String -> B.ByteString
-packUTF8 str = B.pack $ do
-  c <- str
-  w <- charToBytes c
-  pure (fromIntegral w)
-
-charToBytes :: Char -> [Word]
-charToBytes c'
-    | c <= 0x7f     = [fromIntegral c]
-    | c <= 0x7ff    = [0xc0 .|. y, 0x80 .|. z]
-    | c <= 0xffff   = [0xe0 .|. x, 0x80 .|. y, 0x80 .|. z]
-    | c <= 0x10ffff = [0xf0 .|. w, 0x80 .|. x, 0x80 .|. y, 0x80 .|. z]
-    | otherwise = error "Not a valid Unicode code point"
-  where
-    c = ord c'
-    z = fromIntegral (c                 .&. 0x3f)
-    y = fromIntegral (unsafeShiftR c 6  .&. 0x3f)
-    x = fromIntegral (unsafeShiftR c 12 .&. 0x3f)
-    w = fromIntegral (unsafeShiftR c 18 .&. 0x7)
-
-strToBytes :: String -> [Word]
-strToBytes = concatMap charToBytes
-{-# inline strToBytes #-}
-
-packBytes :: [Word] -> Word
-packBytes = fst . foldl' go (0, 0) where
-  go (acc, shift) w | shift == 64 = error "packWords: too many bytes"
-  go (acc, shift) w = (unsafeShiftL (fromIntegral w) shift .|. acc, shift+8)
-
-splitBytes :: [Word] -> ([Word], [Word])
-splitBytes ws = case quotRem (length ws) 8 of
-  (0, _) -> (ws, [])
-  (_, r) -> (as, chunk8s bs) where
-              (as, bs) = splitAt r ws
-              chunk8s [] = []
-              chunk8s ws = let (as, bs) = splitAt 8 ws in
-                           packBytes as : chunk8s bs
-
-derefChar8# :: Addr# -> Char#
-derefChar8# addr = indexCharOffAddr# addr 0#
-{-# inline derefChar8# #-}
-
--- Switch trie compilation
---------------------------------------------------------------------------------
-
-data Trie a = Branch !a !(Map Word (Trie a))
-  deriving Show
-
-type Rule = Maybe Int
-
-nilTrie :: Trie Rule
-nilTrie = Branch Nothing mempty
-
-updRule :: Int -> Maybe Int -> Maybe Int
-updRule rule = Just . maybe rule (min rule)
-
-insert :: Int -> [Word] -> Trie Rule -> Trie Rule
-insert rule = go where
-  go [] (Branch rule' ts) =
-    Branch (updRule rule rule') ts
-  go (c:cs) (Branch rule' ts) =
-    Branch rule' (M.alter (Just . maybe (go cs nilTrie) (go cs)) c ts)
-
-listToTrie :: [(Int, String)] -> Trie Rule
-listToTrie = foldl' (\t (!r, !s) -> insert r (charToBytes =<< s) t) nilTrie
-
--- | Decorate a trie with the minimum lengths of non-empty paths. This
---   is used later to place `ensureBytes#`.
-mindepths :: Trie Rule -> Trie (Rule, Int)
-mindepths (Branch rule ts) =
-  if M.null ts then
-    Branch (rule, 0) mempty
-  else
-    let !ts' = M.map mindepths ts in
-    Branch (
-      rule,
-      minimum (M.map (\(Branch (rule,d) _) -> maybe (d + 1) (\_ -> 1) rule) ts'))
-      ts'
-
-data Trie' a
-  = Branch' !a !(Map Word (Trie' a))
-  | Path !a ![Word] !(Trie' a)
-  deriving Show
-
--- | Compress linear paths.
-pathify :: Trie (Rule, Int) -> Trie' (Rule, Int)
-pathify (Branch a ts) = case M.toList ts of
-  [] -> Branch' a mempty
-  [(w, t)] -> case pathify t of
-           Path (Nothing, _) ws t -> Path a (w:ws) t
-           t                      -> Path a [w] t
-  _   -> Branch' a (M.map pathify ts)
-
--- | Compute where to fall back after we exhausted a branch. If the branch is
---   empty, that means we've succeded at reading and we jump to the rhs rule.
-fallbacks :: Trie' (Rule, Int) -> Trie' (Rule, Int, Int)
-fallbacks = go Nothing 0  where
-  go :: Rule -> Int -> Trie' (Rule, Int) -> Trie' (Rule, Int, Int)
-  go !rule !n (Branch' (rule', d) ts)
-    | M.null ts        = Branch' (rule', 0, d) mempty
-    | Nothing <- rule' = Branch' (rule, n, d) (go rule (n + 1) <$> ts)
-    | otherwise        = Branch' (rule', 0, d) (go rule' 1     <$> ts)
-  go rule n (Path (rule', d) ws t)
-    | Nothing <- rule' = Path (rule, n, d)  ws (go rule (n + length ws) t)
-    | otherwise        = Path (rule', 0, d) ws (go rule' (length ws) t)
-
--- | Decorate with `ensureBytes#` invocations, represented as
---   `Maybe Int`.
-ensureBytes :: Trie' (Rule, Int, Int) -> Trie' (Rule, Int, Maybe Int)
-ensureBytes = go 0 where
-  go :: Int -> Trie' (Rule, Int, Int) -> Trie' (Rule, Int, Maybe Int)
-  go !res = \case
-    Branch' (r, n, d) ts
-      | M.null ts -> Branch' (r, n, Nothing) mempty
-      | res < 1   -> Branch' (r, n, Just d ) (go (d   - 1) <$> ts)
-      | otherwise -> Branch' (r, n, Nothing) (go (res - 1) <$> ts)
-    Path (r, n, d) ws t -> case length ws of
-      l | res < l   -> Path (r, n, Just $! d - res) ws (go (d - l)   t)
-        | otherwise -> Path (r, n, Nothing        ) ws (go (res - l) t)
-
-compileTrie :: [(Int, String)] -> Trie' (Rule, Int, Maybe Int)
-compileTrie = ensureBytes . fallbacks . pathify . mindepths . listToTrie
diff --git a/src/FlatParse/Internal/UnboxedNumerics.hs b/src/FlatParse/Internal/UnboxedNumerics.hs
deleted file mode 100644
index a152943..0000000
--- a/src/FlatParse/Internal/UnboxedNumerics.hs
+++ /dev/null
@@ -1,115 +0,0 @@
-{- | Compatibility layer for numeric primops.
-
-GHC 9.2 standardized unboxed numeric primops. Prior, it was quite asymmetric.
-Many primop functions used the native unboxed numerics 'Word#' and 'Int#' even
-if a sized unboxed numeric was in the name, e.g. 'indexWord8OffAddr#' returning
-'Word#' pre-9.2. All boxed machine integers only stored 'Word#' internally!
-
-We target GHC 9.2's better handling. In order to maintain compatibility with
-older GHCs, we define missing primops and wrap ones that changed type. Usually,
-we can write a few wrappers so that 9.2 uses sized unboxed numerics everywhere,
-and pre-9.2 uses native unboxed numerics everywhere. Sometimes we really want to
-work with sized unboxed numerics on both, in which case we have to do more
-involved primop wrapping.
-
-The general pattern is as follows:
-
-  * A ticked primop means it's sized on >=9.2, native on <9.2
-  * A double-ticked primop means it's sized on all
-  * An unticked primop should mean the same as a ticked primop (no guarantees)
-
-Also see: https://gitlab.haskell.org/ghc/ghc/-/wikis/Unboxed-Numerics
--}
-
-module FlatParse.Internal.UnboxedNumerics where
-
-import GHC.Exts
-
--- "Switch" wrappers: sized on >=9.2, native on <9.2
-byteSwap16'# :: Word16'# -> Word16'#
-byteSwap32'# :: Word32'# -> Word32'#
-eqWord8'# :: Word8'# -> Word8'# -> Int#
-eqWord16'# :: Word16'# -> Word16'# -> Int#
-eqWord32'# :: Word32'# -> Word32'# -> Int#
-{-# inline byteSwap16'# #-}
-{-# inline byteSwap32'# #-}
-{-# inline eqWord8'#    #-}
-{-# inline eqWord16'#   #-}
-{-# inline eqWord32'#   #-}
-
--- "Sized" wrappers: sized on all
-indexWord8OffAddr''# :: Addr# -> Int# -> Word8#
-wordToWord8''# :: Word# -> Word8#
-word8ToWord''# :: Word8# -> Word#
-{-# inline indexWord8OffAddr''#  #-}
-{-# inline wordToWord8''# #-}
-{-# inline word8ToWord''# #-}
-
-#if MIN_VERSION_base(4,16,0)
--- GHC >=9.2
-
-type Word8'#  = Word8#
-type Word16'# = Word16#
-type Word32'# = Word32#
-type Int8'#   = Int8#
-type Int16'#  = Int16#
-type Int32'#  = Int32#
-
--- "Switch" wrappers: sized on >=9.2, native on <9.2
-byteSwap16'# w# = wordToWord16# (byteSwap16# (word16ToWord# w#))
-byteSwap32'# w# = wordToWord32# (byteSwap32# (word32ToWord# w#))
-eqWord8'# = eqWord8#
-eqWord16'# = eqWord16#
-eqWord32'# = eqWord32#
-
--- "Sized" wrappers: sized on all
-indexWord8OffAddr''# = indexWord8OffAddr#
-wordToWord8''# = wordToWord8#
-word8ToWord''# = word8ToWord#
-
-#else
--- GHC <9.2
-
-type Word8'#  = Word#
-type Word16'# = Word#
-type Word32'# = Word#
-type Int8'#   = Int#
-type Int16'#  = Int#
-type Int32'#  = Int#
-
--- "Switch" wrappers: sized on >=9.2, native on <9.2
-byteSwap16'# = byteSwap16#
-byteSwap32'# = byteSwap32#
-eqWord8'# = eqWord#
-eqWord16'# = eqWord#
-eqWord32'# = eqWord#
-
--- No need to tick wrap these, they didn't exist <9.2
-word16ToInt16# :: Word16'# -> Int#
-word16ToInt16# w = narrow16Int# (word2Int# w)
-word32ToInt32# :: Word32'# -> Int#
-word32ToInt32# w = narrow32Int# (word2Int# w)
-{-# inline word16ToInt16# #-}
-{-# inline word32ToInt32# #-}
-
--- "Sized" wrappers: sized on all
-indexWord8OffAddr''# a# i# = narrowWord8# (indexWord8OffAddr# a# i#)
-wordToWord8''# = narrowWord8#
-word8ToWord''# = extendWord8#
-
-#endif
-
-#if !MIN_VERSION_base(4,13,0)
--- GHC <8.8
-
-type Word8# = Word#
-narrowWord8# :: Word# -> Word8#
-narrowWord8# = narrow8Word#
-extendWord8# :: Word# -> Word8#
-extendWord8# w# = w#
-leWord8# :: Word8# -> Word8# -> Int#
-leWord8# w1# w2# = leWord# w1# w2#
-eqWord8# :: Word8# -> Word8# -> Int#
-eqWord8# w1# w2# = eqWord# w1# w2#
-
-#endif
diff --git a/src/FlatParse/Stateful.hs b/src/FlatParse/Stateful.hs
deleted file mode 100644
index 82a7f40..0000000
--- a/src/FlatParse/Stateful.hs
+++ /dev/null
@@ -1,1453 +0,0 @@
-{-# language UnboxedTuples #-}
-
-{-|
-This module implements a `Parser` supporting a custom reader environment, custom
-error types and an `Int` state.
--}
-
-module FlatParse.Stateful (
-  -- * Parser types and constructors
-    type Parser(..)
-  , type Res#
-  , pattern OK#
-  , pattern Fail#
-  , pattern Err#
-  , Result(..)
-
-  -- * Running parsers
-  , runParser
-  , runParserS
-
-  -- * Actions on the state and the environment
-  , get
-  , put
-  , modify
-  , ask
-  , local
-
-  -- * Errors and failures
-  , failed
-  , Base.empty
-  , err
-  , lookahead
-  , fails
-  , try
-  , optional
-  , optional_
-  , withOption
-  , cut
-  , cutting
-
-  -- * Basic lexing and parsing
-  , eof
-  , takeBs
-  , takeRestBs
-  , char
-  , byte
-  , bytes
-  , byteString
-  , string
-  , switch
-  , switchWithPost
-  , rawSwitchWithPost
-  , satisfy
-  , satisfy_
-  , satisfyASCII
-  , satisfyASCII_
-  , fusedSatisfy
-  , fusedSatisfy_
-  , anyWord8
-  , anyWord8_
-  , anyWord16
-  , anyWord16_
-  , anyWord32
-  , anyWord32_
-  , anyWord64
-  , anyWord64_
-  , anyWord
-  , anyWord_
-  , anyInt8
-  , anyInt16
-  , anyInt32
-  , anyInt64
-  , anyInt
-  , anyChar
-  , anyChar_
-  , anyCharASCII
-  , anyCharASCII_
-  , isDigit
-  , isGreekLetter
-  , isLatinLetter
-  , FlatParse.Stateful.readInt
-  , FlatParse.Stateful.readIntHex
-  , FlatParse.Stateful.readInteger
-  , anyCString
-
-  -- ** Explicit-endianness machine integers
-  , anyWord16le
-  , anyWord16be
-  , anyWord32le
-  , anyWord32be
-  , anyWord64le
-  , anyWord64be
-  , anyInt16le
-  , anyInt16be
-  , anyInt32le
-  , anyInt32be
-  , anyInt64le
-  , anyInt64be
-
-  -- * Combinators
-  , (<|>)
-  , branch
-  , chainl
-  , chainr
-  , many
-  , many_
-  , some
-  , some_
-  , notFollowedBy
-  , isolate
-
-  -- * Positions and spans
-  , Pos(..)
-  , Span(..)
-  , getPos
-  , setPos
-  , endPos
-  , spanOf
-  , withSpan
-  , byteStringOf
-  , withByteString
-  , inSpan
-
-  -- ** Position and span conversions
-  , Basic.validPos
-  , Basic.posLineCols
-  , unsafeSpanToByteString
-  , Basic.unsafeSlice
-  , Basic.mkPos
-  , Basic.lines
-
-  -- * Getting the rest of the input as a 'String'
-  , takeLine
-  , traceLine
-  , takeRest
-  , traceRest
-
-  -- * `String` conversions
-  , packUTF8
-  , Basic.unpackUTF8
-
-  -- * Internal functions
-  , ensureBytes#
-
-  -- ** Unboxed arguments
-  , takeBs#
-  , atSkip#
-
-  -- *** Machine integer continuation parsers
-  , withAnyWord8#
-  , withAnyWord16#
-  , withAnyWord32#
-  , withAnyWord64#
-  , withAnyInt8#
-  , withAnyInt16#
-  , withAnyInt32#
-  , withAnyInt64#
-
-  -- ** Location & address primitives
-  , setBack#
-  , withAddr#
-  , takeBsOffAddr#
-  , lookaheadFromAddr#
-  , atAddr#
-
-  -- ** Unsafe
-  , anyCStringUnsafe
-  , scan8#
-  , scan16#
-  , scan32#
-  , scan64#
-  , scanAny8#
-  , scanBytes#
-
-  ) where
-
-import qualified Control.Applicative as Base
-import Control.Monad
-import Data.Foldable
-import Data.Map (Map)
-import GHC.Exts
-import GHC.Word
-import GHC.Int
-import Language.Haskell.TH
-import System.IO.Unsafe
-import GHC.ForeignPtr
-
-import qualified Data.ByteString as B
-import qualified Data.ByteString.Internal as B
-import qualified Data.ByteString.Unsafe as B
-import qualified Data.Map.Strict as M
-
-import FlatParse.Internal
-import FlatParse.Internal.UnboxedNumerics
-
-import qualified FlatParse.Basic as Basic
-
---------------------------------------------------------------------------------
-
--- | Primitive result of a parser. Possible results are given by `OK#`, `Err#` and `Fail#`
---   pattern synonyms.
-type Res# e a =
-  (#
-    (# a, Addr#, Int# #)
-  | (# #)
-  | (# e #)
-  #)
-
--- | Contains return value, pointer to the rest of the input buffer and the nex `Int`
---   state.
-pattern OK# :: a -> Addr# -> Int# -> Res# e a
-pattern OK# a s n = (# (# a, s, n #) | | #)
-
--- | Constructor for errors which are by default non-recoverable.
-pattern Err# :: e -> Res# e a
-pattern Err# e = (# | | (# e #) #)
-
--- | Constructor for recoverable failure.
-pattern Fail# :: Res# e a
-pattern Fail# = (# | (# #) | #)
-{-# complete OK#, Err#, Fail# #-}
-
--- | @Parser r e a@ has a reader environment @r@, error type @e@ and a return type @a@.
-newtype Parser r e a = Parser {runParser# :: ForeignPtrContents -> r -> Addr# -> Addr# -> Int# -> Res# e a}
-
-instance Functor (Parser r e) where
-  fmap f (Parser g) = Parser \fp !r eob s n -> case g fp r eob s n of
-    OK# a s n -> let !b = f a in OK# b s n
-    x         -> unsafeCoerce# x
-  {-# inline fmap #-}
-
-  (<$) a' (Parser g) = Parser \fp !r eob s n -> case g fp r eob s n of
-    OK# a s n -> OK# a' s n
-    x         -> unsafeCoerce# x
-  {-# inline (<$) #-}
-
-instance Applicative (Parser r e) where
-  pure a = Parser \fp !r eob s n -> OK# a s n
-  {-# inline pure #-}
-  Parser ff <*> Parser fa = Parser \fp !r eob s n -> case ff fp r eob s n of
-    OK# f s n -> case fa fp r eob s n of
-      OK# a s n  -> let !b = f a in OK# b s n
-      x          -> unsafeCoerce# x
-    x -> unsafeCoerce# x
-  {-# inline (<*>) #-}
-  Parser fa <* Parser fb = Parser \fp !r eob s n -> case fa fp r eob s n of
-    OK# a s n   -> case fb fp r eob s n of
-      OK# b s n -> OK# a s n
-      x -> unsafeCoerce# x
-    x -> unsafeCoerce# x
-  {-# inline (<*) #-}
-  Parser fa *> Parser fb = Parser \fp !r eob s n -> case fa fp r eob s n of
-    OK# a s n -> fb fp r eob s n
-    x         -> unsafeCoerce# x
-  {-# inline (*>) #-}
-
-instance Monad (Parser r e) where
-  return = pure
-  {-# inline return #-}
-  Parser fa >>= f = Parser \fp !r eob s n -> case fa fp r eob s n of
-    OK# a s n -> runParser# (f a) fp r eob s n
-    x         -> unsafeCoerce# x
-  {-# inline (>>=) #-}
-  (>>) = (*>)
-  {-# inline (>>) #-}
-
--- | Higher-level boxed data type for parsing results.
-data Result e a =
-    OK a Int !(B.ByteString)  -- ^ Contains return value, last `Int` state, unconsumed input.
-  | Fail                      -- ^ Recoverable-by-default failure.
-  | Err !e                    -- ^ Unrecoverble-by-default error.
-  deriving Show
-
-instance Functor (Result e) where
-  fmap f (OK a s n) = let !b = f a in OK b s n
-  fmap f r          = unsafeCoerce# r
-  {-# inline fmap #-}
-  (<$) a (OK _ s n) = OK a s n
-  (<$) _ r          = unsafeCoerce# r
-  {-# inline (<$) #-}
-
---------------------------------------------------------------------------------
-
--- | Run a parser. The `Int` argument is the initial state.
-runParser :: Parser r e a -> r -> Int -> B.ByteString -> Result e a
-runParser (Parser f) !r (I# n) b@(B.PS (ForeignPtr _ fp) _ (I# len)) = unsafeDupablePerformIO do
-  B.unsafeUseAsCString b \(Ptr buf) -> do
-    let end = plusAddr# buf len
-    case f fp r end buf n of
-      Err# e ->
-        pure (Err e)
-      OK# a s n -> do
-        let offset = minusAddr# s buf
-        pure (OK a (I# n) (B.drop (I# offset) b))
-      Fail# ->
-        pure Fail
-{-# inlinable runParser #-}
-
--- | Run a parser on a `String` input. Reminder: @OverloadedStrings@ for `B.ByteString` does not
---   yield a valid UTF-8 encoding! For non-ASCII `B.ByteString` literal input, use `runParserS` or
---   `packUTF8` for testing.
-runParserS :: Parser r e a -> r -> Int -> String -> Result e a
-runParserS pa r !n s = runParser pa r n (packUTF8 s)
-
---------------------------------------------------------------------------------
-
--- | Query the `Int` state.
-get :: Parser r e Int
-get = Parser \fp !r eob s n -> OK# (I# n) s n
-{-# inline get #-}
-
--- | Write the `Int` state.
-put :: Int -> Parser r e ()
-put (I# n) = Parser \fp !r eob s _ -> OK# () s n
-{-# inline put #-}
-
--- | Modify the `Int` state.
-modify :: (Int -> Int) -> Parser r e ()
-modify f = Parser \fp !r eob s n ->
-  case f (I# n) of
-    I# n -> OK# () s n
-{-# inline modify #-}
-
--- | Query the environment.
-ask :: Parser r e r
-ask = Parser \fp !r eob s n -> OK# r s n
-{-# inline ask #-}
-
--- | Run a parser in a modified environment.
-local :: (r -> r) -> Parser r e a -> Parser r e a
-local f (Parser g) = Parser \fp !r eob s n -> let !r' = f r in g fp r' eob s n
-{-# inline local #-}
-
---------------------------------------------------------------------------------
-
--- | The failing parser. By default, parser choice `(<|>)` arbitrarily backtracks
---   on parser failure.
-failed :: Parser r e a
-failed = Parser \fp !r eob s n -> Fail#
-{-# inline failed #-}
-
--- | Throw a parsing error. By default, parser choice `(<|>)` can't backtrack
---   on parser error. Use `try` to convert an error to a recoverable failure.
-err :: e -> Parser r e a
-err e = Parser \fp !r eob s n -> Err# e
-{-# inline err #-}
-
--- | Save the parsing state, then run a parser, then restore the state.
-lookahead :: Parser r e a -> Parser r e a
-lookahead (Parser f) = Parser \fp !r eob s n ->
-  case f fp r eob s n of
-    OK# a _ _ -> OK# a s n
-    x         -> x
-{-# inline lookahead #-}
-
--- | Convert a parsing failure to a success.
-fails :: Parser r e a -> Parser r e ()
-fails (Parser f) = Parser \fp !r eob s n ->
-  case f fp r eob s n of
-    OK# _ _ _ -> Fail#
-    Fail#     -> OK# () s n
-    Err# e    -> Err# e
-{-# inline fails #-}
-
--- | Convert a parsing error into failure.
-try :: Parser r e a -> Parser r e a
-try (Parser f) = Parser \fp !r eob s n -> case f fp r eob s n of
-  Err# _ -> Fail#
-  x      -> x
-{-# inline try #-}
-
--- | Convert a parsing failure to a `Maybe`. If possible, use `withOption` instead.
-optional :: Parser r e a -> Parser r e (Maybe a)
-optional p = (Just <$> p) <|> pure Nothing
-{-# inline optional #-}
-
--- | Convert a parsing failure to a `()`.
-optional_ :: Parser r e a -> Parser r e ()
-optional_ p = (() <$ p) <|> pure ()
-{-# inline optional_ #-}
-
--- | CPS'd version of `optional`. This is usually more efficient, since it gets rid of the
---   extra `Maybe` allocation.
-withOption :: Parser r e a -> (a -> Parser r e b) -> Parser r e b -> Parser r e b
-withOption (Parser f) just (Parser nothing) = Parser \fp !r eob s n -> case f fp r eob s n of
-  OK# a s n -> runParser# (just a) fp r eob s n
-  Fail#     -> nothing fp r eob s n
-  Err# e    -> Err# e
-{-# inline withOption #-}
-
--- | Convert a parsing failure to an error.
-cut :: Parser r e a -> e -> Parser r e a
-cut (Parser f) e = Parser \fp !r eob s n -> case f fp r eob s n of
-  Fail# -> Err# e
-  x     -> x
-{-# inline cut #-}
-
--- | Run the parser, if we get a failure, throw the given error, but if we get an error, merge the
---   inner and the newly given errors using the @e -> e -> e@ function. This can be useful for
---   implementing parsing errors which may propagate hints or accummulate contextual information.
-cutting :: Parser r e a -> e -> (e -> e -> e) -> Parser r e a
-cutting (Parser f) e merge = Parser \fp !r eob s n -> case f fp r eob s n of
-  Fail#   -> Err# e
-  Err# e' -> let !e'' = merge e' e in Err# e''
-  x       -> x
-{-# inline cutting #-}
-
---------------------------------------------------------------------------------
-
-
--- | Succeed if the input is empty.
-eof :: Parser r e ()
-eof = Parser \fp !r eob s n -> case eqAddr# eob s of
-  1# -> OK# () s n
-  _  -> Fail#
-{-# inline eof #-}
-
--- | Read the given number of bytes as a 'ByteString'.
---
--- Throws a runtime error if given a negative integer.
-takeBs :: Int -> Parser r e B.ByteString
-takeBs (I# n#) = Parser \fp !r eob s n -> case n# <=# minusAddr# eob s of
-  1# -> -- have to runtime check for negative values, because they cause a hang
-    case n# >=# 0# of
-      1# -> OK# (B.PS (ForeignPtr s fp) 0 (I# n#)) (plusAddr# s n#) n
-      _  -> error "FlatParse.Basic.take: negative integer"
-  _  -> Fail#
-{-# inline takeBs #-}
-
--- | Consume the rest of the input. May return the empty bytestring.
-takeRestBs :: Parser r e B.ByteString
-takeRestBs = Parser \fp !r eob s n ->
-  let n# = minusAddr# eob s
-  in  OK# (B.PS (ForeignPtr s fp) 0 (I# n#)) eob n
-{-# inline takeRestBs #-}
-
--- | Parse a UTF-8 character literal. This is a template function, you can use it as
---   @$(char \'x\')@, for example, and the splice in this case has type @Parser r e ()@.
-char :: Char -> Q Exp
-char c = string [c]
-
--- | Read a byte.
-byte :: Word8 -> Parser r e ()
-byte w = ensureBytes# 1 >> scan8# w
-{-# inline byte #-}
-
--- | Read a sequence of bytes. This is a template function, you can use it as @$(bytes [3, 4, 5])@,
---   for example, and the splice has type @Parser r e ()@. For a non-TH variant see 'byteString'.
-bytes :: [Word] -> Q Exp
-bytes bytes = do
-  let !len = length bytes
-  [| ensureBytes# len >> $(scanBytes# bytes) |]
-
--- | Parse a given `B.ByteString`. If the bytestring is statically known, consider using 'bytes' instead.
-byteString :: B.ByteString -> Parser r e ()
-byteString (B.PS (ForeignPtr bs fcontents) _ (I# len)) =
-
-  let go64 :: Addr# -> Addr# -> Addr# -> Int# -> State# RealWorld -> (# Res# e (), State# RealWorld #)
-      go64 bs bsend s n w =
-        let bs' = plusAddr# bs 8# in
-        case gtAddr# bs' bsend of
-          1# -> go8 bs bsend s n w
-          _  -> case eqWord# (indexWord64OffAddr# bs 0#) (indexWord64OffAddr# s 0#) of
-            1# -> go64 bs' bsend (plusAddr# s 8#) n w
-            _  -> (# Fail#, w #)
-
-      go8 :: Addr# -> Addr# -> Addr# -> Int# -> State# RealWorld -> (# Res# e (), State# RealWorld #)
-      go8 bs bsend s n w = case ltAddr# bs bsend of
-        1# -> case eqWord8'# (indexWord8OffAddr# bs 0#) (indexWord8OffAddr# s 0#) of
-          1# -> go8 (plusAddr# bs 1#) bsend (plusAddr# s 1#) n w
-          _  -> (# Fail#, w #)
-        _  -> (# OK# () s n, w #)
-
-  in Parser \fp !r eob s n -> case len <=# minusAddr# eob s of
-       1# -> runRW# \w -> case go64 bs (plusAddr# bs len) s n w of
-               (# res, w #) -> case touch# fcontents w of
-                 w -> res
-       _  -> Fail#
-{-# inline byteString #-}
-
--- | Parse a UTF-8 string literal. This is a template function, you can use it as @$(string "foo")@,
---   for example, and the splice has type @Parser r e ()@.
-string :: String -> Q Exp
-string str = bytes (strToBytes str)
-
-{-|
-This is a template function which makes it possible to branch on a collection of string literals in
-an efficient way. By using `switch`, such branching is compiled to a trie of primitive parsing
-operations, which has optimized control flow, vectorized reads and grouped checking for needed input
-bytes.
-
-The syntax is slightly magical, it overloads the usual @case@ expression. An example:
-
-@
-    $(switch [| case _ of
-        "foo" -> pure True
-        "bar" -> pure False |])
-@
-
-The underscore is mandatory in @case _ of@. Each branch must be a string literal, but optionally
-we may have a default case, like in
-
-@
-    $(switch [| case _ of
-        "foo" -> pure 10
-        "bar" -> pure 20
-        _     -> pure 30 |])
-@
-
-All case right hand sides must be parsers with the same type. That type is also the type
-of the whole `switch` expression.
-
-A `switch` has longest match semantics, and the order of cases does not matter, except for
-the default case, which may only appear as the last case.
-
-If a `switch` does not have a default case, and no case matches the input, then it returns with
-failure, \without\ having consumed any input. A fallthrough to the default case also does not
-consume any input.
--}
-switch :: Q Exp -> Q Exp
-switch = switchWithPost Nothing
-
-{-|
-Switch expression with an optional first argument for performing a post-processing action after
-every successful branch matching. For example, if we have @ws :: Parser r e ()@ for a
-whitespace parser, we might want to consume whitespace after matching on any of the switch
-cases. For that case, we can define a "lexeme" version of `switch` as follows.
-
-@
-  switch' :: Q Exp -> Q Exp
-  switch' = switchWithPost (Just [| ws |])
-@
-
-Note that this @switch'@ function cannot be used in the same module it's defined in, because of the
-stage restriction of Template Haskell.
--}
-switchWithPost :: Maybe (Q Exp) -> Q Exp -> Q Exp
-switchWithPost postAction exp = do
-  !postAction <- sequence postAction
-  (!cases, !fallback) <- parseSwitch exp
-  genTrie $! genSwitchTrie' postAction cases fallback
-
--- | Version of `switchWithPost` without syntactic sugar. The second argument is the
---   list of cases, the third is the default case.
-rawSwitchWithPost :: Maybe (Q Exp) -> [(String, Q Exp)] -> Maybe (Q Exp) -> Q Exp
-rawSwitchWithPost postAction cases fallback = do
-  !postAction <- sequence postAction
-  !cases <- forM cases \(str, rhs) -> (str,) <$> rhs
-  !fallback <- sequence fallback
-  genTrie $! genSwitchTrie' postAction cases fallback
-
--- | Parse a UTF-8 `Char` for which a predicate holds.
-satisfy :: (Char -> Bool) -> Parser r e Char
-satisfy f = Parser \fp !r eob s n -> case runParser# anyChar fp r eob s n of
-  OK# c s n | f c -> OK# c s n
-  _               -> Fail#
-{-#  inline satisfy #-}
-
--- | Skip a UTF-8 `Char` for which a predicate holds.
-satisfy_ :: (Char -> Bool) -> Parser r e ()
-satisfy_ f = Parser \fp !r eob s n -> case runParser# anyChar fp r eob s n of
-  OK# c s n | f c -> OK# () s n
-  _               -> Fail#
-{-#  inline satisfy_ #-}
-
--- | Parse an ASCII `Char` for which a predicate holds. Assumption: the predicate must only return
---   `True` for ASCII-range characters. Otherwise this function might read a 128-255 range byte,
---   thereby breaking UTF-8 decoding.
-satisfyASCII :: (Char -> Bool) -> Parser r e Char
-satisfyASCII f = Parser \fp !r eob s n -> case eqAddr# eob s of
-  1# -> Fail#
-  _  -> case derefChar8# s of
-    c1 | f (C# c1) -> OK# (C# c1) (plusAddr# s 1#) n
-       | otherwise -> Fail#
-{-#  inline satisfyASCII #-}
-
--- | Skip an ASCII `Char` for which a predicate holds.  Assumption: the
---   predicate must only return `True` for ASCII-range characters.
-satisfyASCII_ :: (Char -> Bool) -> Parser r e ()
-satisfyASCII_ f = () <$ satisfyASCII f
-{-# inline satisfyASCII_ #-}
-
--- | This is a variant of `satisfy` which allows more optimization. We can pick four testing
---   functions for the four cases for the possible number of bytes in the UTF-8 character. So in
---   @fusedSatisfy f1 f2 f3 f4@, if we read a one-byte character, the result is scrutinized with
---   @f1@, for two-bytes, with @f2@, and so on. This can result in dramatic lexing speedups.
---
---   For example, if we want to accept any letter, the naive solution would be to use
---   `Data.Char.isLetter`, but this accesses a large lookup table of Unicode character classes. We
---   can do better with @fusedSatisfy isLatinLetter isLetter isLetter isLetter@, since here the
---   `isLatinLetter` is inlined into the UTF-8 decoding, and it probably handles a great majority of
---   all cases without accessing the character table.
-fusedSatisfy :: (Char -> Bool) -> (Char -> Bool) -> (Char -> Bool) -> (Char -> Bool) -> Parser r e Char
-fusedSatisfy f1 f2 f3 f4 = Parser \fp !r eob buf n -> case eqAddr# eob buf of
-  1# -> Fail#
-  _  -> case derefChar8# buf of
-    c1 -> case c1 `leChar#` '\x7F'# of
-      1# | f1 (C# c1) -> OK# (C# c1) (plusAddr# buf 1#) n
-         | otherwise  -> Fail#
-      _  -> case eqAddr# eob (plusAddr# buf 1#) of
-        1# -> Fail#
-        _ -> case indexCharOffAddr# buf 1# of
-          c2 -> case c1 `leChar#` '\xDF'# of
-            1# ->
-              let resc = C# (chr# (((ord# c1 -# 0xC0#) `uncheckedIShiftL#` 6#) `orI#`
-                                   (ord# c2 -# 0x80#)))
-              in case f2 resc of
-                   True -> OK# resc (plusAddr# buf 2#) n
-                   _    -> Fail#
-            _ -> case eqAddr# eob (plusAddr# buf 2#) of
-              1# -> Fail#
-              _  -> case indexCharOffAddr# buf 2# of
-                c3 -> case c1 `leChar#` '\xEF'# of
-                  1# ->
-                    let resc = C# (chr# (((ord# c1 -# 0xE0#) `uncheckedIShiftL#` 12#) `orI#`
-                                         ((ord# c2 -# 0x80#) `uncheckedIShiftL#`  6#) `orI#`
-                                         (ord# c3 -# 0x80#)))
-                    in case f3 resc of
-                         True -> OK# resc (plusAddr# buf 3#) n
-                         _    -> Fail#
-                  _ -> case eqAddr# eob (plusAddr# buf 3#) of
-                    1# -> Fail#
-                    _  -> case indexCharOffAddr# buf 3# of
-                      c4 ->
-                        let resc = C# (chr# (((ord# c1 -# 0xF0#) `uncheckedIShiftL#` 18#) `orI#`
-                                             ((ord# c2 -# 0x80#) `uncheckedIShiftL#` 12#) `orI#`
-                                             ((ord# c3 -# 0x80#) `uncheckedIShiftL#`  6#) `orI#`
-                                              (ord# c4 -# 0x80#)))
-                        in case f4 resc of
-                             True -> OK# resc (plusAddr# buf 4#) n
-                             _    -> Fail#
-{-# inline fusedSatisfy #-}
-
--- | Skipping variant of `fusedSatisfy`.
-fusedSatisfy_ :: (Char -> Bool) -> (Char -> Bool) -> (Char -> Bool) -> (Char -> Bool) -> Parser r e ()
-fusedSatisfy_ f1 f2 f3 f4 = () <$ fusedSatisfy f1 f2 f3 f4
-{-# inline fusedSatisfy_ #-}
-
--- | Parse any UTF-8-encoded `Char`.
-anyChar :: Parser r e Char
-anyChar = Parser \fp !r eob buf n -> case eqAddr# eob buf of
-  1# -> Fail#
-  _  -> case derefChar8# buf of
-    c1 -> case c1 `leChar#` '\x7F'# of
-      1# -> OK# (C# c1) (plusAddr# buf 1#) n
-      _  -> case eqAddr# eob (plusAddr# buf 1#) of
-        1# -> Fail#
-        _ -> case indexCharOffAddr# buf 1# of
-          c2 -> case c1 `leChar#` '\xDF'# of
-            1# ->
-              let resc = ((ord# c1 -# 0xC0#) `uncheckedIShiftL#` 6#) `orI#`
-                          (ord# c2 -# 0x80#)
-              in OK# (C# (chr# resc)) (plusAddr# buf 2#) n
-            _ -> case eqAddr# eob (plusAddr# buf 2#) of
-              1# -> Fail#
-              _  -> case indexCharOffAddr# buf 2# of
-                c3 -> case c1 `leChar#` '\xEF'# of
-                  1# ->
-                    let resc = ((ord# c1 -# 0xE0#) `uncheckedIShiftL#` 12#) `orI#`
-                               ((ord# c2 -# 0x80#) `uncheckedIShiftL#`  6#) `orI#`
-                                (ord# c3 -# 0x80#)
-                    in OK# (C# (chr# resc)) (plusAddr# buf 3#) n
-                  _ -> case eqAddr# eob (plusAddr# buf 3#) of
-                    1# -> Fail#
-                    _  -> case indexCharOffAddr# buf 3# of
-                      c4 ->
-                        let resc = ((ord# c1 -# 0xF0#) `uncheckedIShiftL#` 18#) `orI#`
-                                   ((ord# c2 -# 0x80#) `uncheckedIShiftL#` 12#) `orI#`
-                                   ((ord# c3 -# 0x80#) `uncheckedIShiftL#`  6#) `orI#`
-                                    (ord# c4 -# 0x80#)
-                        in OK# (C# (chr# resc)) (plusAddr# buf 4#) n
-{-# inline anyChar #-}
-
--- | Skip any UTF-8-encoded `Char`.
-anyChar_ :: Parser r e ()
-anyChar_ = Parser \fp !r eob buf n -> case eqAddr# eob buf of
-  1# -> Fail#
-  _  -> case derefChar8# buf of
-    c1 -> case c1 `leChar#` '\x7F'# of
-      1# -> OK# () (plusAddr# buf 1#) n
-      _  ->
-        let buf' =
-              case c1 `leChar#` '\xDF'# of
-                1# -> plusAddr# buf 2#
-                _  -> case c1 `leChar#` '\xEF'# of
-                    1# -> plusAddr# buf 3#
-                    _ ->  plusAddr# buf 4#
-        in case leAddr# buf' eob of
-             1# -> OK# () buf' n
-             _  -> Fail#
-{-# inline anyChar_ #-}
-
-
--- | Parse any `Char` in the ASCII range, fail if the next input character is not in the range.
---   This is more efficient than `anyChar` if we are only working with ASCII.
-anyCharASCII :: Parser r e Char
-anyCharASCII = Parser \fp !r eob buf n -> case eqAddr# eob buf of
-  1# -> Fail#
-  _  -> case derefChar8# buf of
-    c1 -> case c1 `leChar#` '\x7F'# of
-      1# -> OK# (C# c1) (plusAddr# buf 1#) n
-      _  -> Fail#
-{-# inline anyCharASCII #-}
-
--- | Skip any `Char` in the ASCII range. More efficient than `anyChar_` if we're working only with
---   ASCII.
-anyCharASCII_ :: Parser r e ()
-anyCharASCII_ = () <$ anyCharASCII
-{-# inline anyCharASCII_ #-}
-
--- | Read an `Int` from the input, as a non-empty digit sequence. The `Int` may
---   overflow in the result.
-readInt :: Parser r e Int
-readInt = Parser \fp r eob s n -> case FlatParse.Internal.readInt eob s of
-  (# (##) | #)        -> Fail#
-  (# | (# i, s' #) #) -> OK# (I# i) s' n
-{-# inline readInt #-}
-
--- | Read an `Int` from the input, as a non-empty case-insensitive ASCII
---   hexadecimal digit sequence. The `Int` may overflow in the result.
-readIntHex :: Parser r e Int
-readIntHex = Parser \fp r eob s n -> case FlatParse.Internal.readIntHex eob s of
-  (# (##) | #)        -> Fail#
-  (# | (# i, s' #) #) -> OK# (I# i) s' n
-{-# inline readIntHex #-}
-
--- | Read an `Integer` from the input, as a non-empty digit sequence.
-readInteger :: Parser r e Integer
-readInteger = Parser \fp r eob s n -> case FlatParse.Internal.readInteger fp eob s of
-  (# (##) | #)        -> Fail#
-  (# | (# i, s' #) #) -> OK# i s' n
-{-# inline readInteger #-}
-
-
---------------------------------------------------------------------------------
-
--- | Choose between two parsers. If the first parser fails, try the second one, but if the first one
---   throws an error, propagate the error.
-infixr 6 <|>
-(<|>) :: Parser r e a -> Parser r e a -> Parser r e a
-(<|>) (Parser f) (Parser g) = Parser \fp !r eob s n ->
-  case f fp r eob s n of
-    Fail# -> g fp r eob s n
-    x     -> x
-{-# inline[1] (<|>) #-}
-
-instance Base.Alternative (Parser r e) where
-  empty = failed
-  {-# inline empty #-}
-  (<|>) = (<|>)
-  {-# inline (Base.<|>) #-}
-
-instance MonadPlus (Parser r e) where
-  mzero = failed
-  {-# inline mzero #-}
-  mplus = (<|>)
-  {-# inline mplus #-}
-
-{-# RULES
-
-"flatparse/reassoc-alt" forall l m r. (l <|> m) <|> r = l <|> (m <|> r)
-
-#-}
-
--- | Branch on a parser: if the first argument succeeds, continue with the second, else with the third.
---   This can produce slightly more efficient code than `(<|>)`. Moreover, `ḃranch` does not
---   backtrack from the true/false cases.
-branch :: Parser r e a -> Parser r e b -> Parser r e b -> Parser r e b
-branch pa pt pf = Parser \fp !r eob s n -> case runParser# pa fp r eob s n of
-  OK# _ s n -> runParser# pt fp r eob s n
-  Fail#     -> runParser# pf fp r eob s n
-  Err# e    -> Err# e
-{-# inline branch #-}
-
--- | An analogue of the list `foldl` function: first parse a @b@, then parse zero or more @a@-s,
---   and combine the results in a left-nested way by the @b -> a -> b@ function. Note: this is not
---   the usual `chainl` function from the parsec libraries!
-chainl :: (b -> a -> b) -> Parser r e b -> Parser r e a -> Parser r e b
-chainl f start elem = start >>= go where
-  go b = do {!a <- elem; go $! f b a} <|> pure b
-{-# inline chainl #-}
-
--- | An analogue of the list `foldr` function: parse zero or more @a@-s, terminated by a @b@, and
---   combine the results in a right-nested way using the @a -> b -> b@ function. Note: this is not
---   the usual `chainr` function from the parsec libraries!
-chainr :: (a -> b -> b) -> Parser r e a -> Parser r e b -> Parser r e b
-chainr f (Parser elem) (Parser end) = go where
-  go = Parser \fp !r eob s n -> case elem fp r eob s n of
-    OK# a s n -> case runParser# go fp r eob s n of
-      OK# b s n -> let !b' = f a b in OK# b' s n
-      x         -> x
-    Fail# -> end fp r eob s n
-    Err# e -> Err# e
-{-# inline chainr #-}
-
--- | Run a parser zero or more times, collect the results in a list. Note: for optimal performance,
---   try to avoid this. Often it is possible to get rid of the intermediate list by using a
---   combinator or a custom parser.
-many :: Parser r e a -> Parser r e [a]
-many (Parser f) = go where
-  go = Parser \fp !r eob s n -> case f fp r eob s n of
-    OK# a s n -> case runParser# go fp r eob s n of
-                   OK# as s n -> OK# (a:as) s n
-                   x          -> x
-    Fail#  -> OK# [] s n
-    Err# e -> Err# e
-{-# inline many #-}
-
--- | Skip a parser zero or more times.
-many_ :: Parser r e a -> Parser r e ()
-many_ (Parser f) = go where
-  go = Parser \fp !r eob s n -> case f fp r eob s n of
-    OK# a s n -> runParser# go fp r eob s n
-    Fail#     -> OK# () s n
-    Err# e    -> Err# e
-{-# inline many_ #-}
-
--- | Run a parser one or more times, collect the results in a list. Note: for optimal performance,
---   try to avoid this. Often it is possible to get rid of the intermediate list by using a
---   combinator or a custom parser.
-some :: Parser r e a -> Parser r e [a]
-some p = (:) <$> p <*> many p
-{-# inline some #-}
-
--- | Skip a parser one or more times.
-some_ :: Parser r e a -> Parser r e ()
-some_ pa = pa >> many_ pa
-{-# inline some_ #-}
-
--- | Succeed if the first parser succeeds and the second one fails. The parsing
---   state is restored to the point of the first argument's success.
-notFollowedBy :: Parser r e a -> Parser r e b -> Parser r e a
-notFollowedBy p1 p2 = p1 <* lookahead (fails p2)
-{-# inline notFollowedBy #-}
-
--- | @isolate n p@ runs the parser @p@ isolated to the next @n@ bytes. All
---   isolated bytes must be consumed.
---
--- Throws a runtime error if given a negative integer.
-isolate :: Int -> Parser r e a -> Parser r e a
-isolate (I# n#) p = Parser \fp !r eob s n ->
-  let s' = plusAddr# s n#
-  in  case n# <=# minusAddr# eob s of
-        1# -> case n# >=# 0# of
-          1# -> case runParser# p fp r s' s n of
-            OK# a s'' n' -> case eqAddr# s' s'' of
-              1# -> OK# a s'' n'
-              _  -> Fail# -- isolated segment wasn't fully consumed
-            Fail#     -> Fail#
-            Err# e    -> Err# e
-          _  -> error "FlatParse.Basic.isolate: negative integer"
-        _  -> Fail# -- you tried to isolate more than we have left
-{-# inline isolate #-}
-
---------------------------------------------------------------------------------
-
--- | Get the current position in the input.
-getPos :: Parser r e Pos
-getPos = Parser \fp !r eob s n -> OK# (addrToPos# eob s) s n
-{-# inline getPos #-}
-
--- | Set the input position. Warning: this can result in crashes if the position points outside the
---   current buffer. It is always safe to `setPos` values which came from `getPos` with the current
---   input.
-setPos :: Pos -> Parser r e ()
-setPos s = Parser \fp !r eob _ n -> OK# () (posToAddr# eob s) n
-{-# inline setPos #-}
-
--- | The end of the input.
-endPos :: Pos
-endPos = Pos 0
-{-# inline endPos #-}
-
-
--- | Return the consumed span of a parser. Use `withSpan` if possible for better efficiency.
-spanOf :: Parser r e a -> Parser r e Span
-spanOf (Parser f) = Parser \fp !r eob s n -> case f fp r eob s n of
-  OK# a s' n -> OK# (Span (addrToPos# eob s) (addrToPos# eob s')) s' n
-  x          -> unsafeCoerce# x
-{-# inline spanOf #-}
-
--- | Bind the result together with the span of the result. CPS'd version of `spanOf`
---   for better unboxing.
-withSpan :: Parser r e a -> (a -> Span -> Parser r e b) -> Parser r e b
-withSpan (Parser f) g = Parser \fp !r eob s n -> case f fp r eob s n of
-  OK# a s' n -> runParser# (g a (Span (addrToPos# eob s) (addrToPos# eob s'))) fp r eob s' n
-  x          -> unsafeCoerce# x
-{-# inline withSpan #-}
-
--- | Return the `B.ByteString` consumed by a parser. Note: it's more efficient to use `spanOf` and
---   `withSpan` instead.
-byteStringOf :: Parser r e a -> Parser r e B.ByteString
-byteStringOf (Parser f) = Parser \fp !r eob s n -> case f fp r eob s n of
-  OK# a s' n -> OK# (B.PS (ForeignPtr s fp) 0 (I# (minusAddr# s' s))) s' n
-  x          -> unsafeCoerce# x
-{-# inline byteStringOf #-}
-
--- | CPS'd version of `byteStringOf`. Can be more efficient, because the result is more eagerly unboxed
---   by GHC. It's more efficient to use `spanOf` or `withSpan` instead.
-withByteString :: Parser r e a -> (a -> B.ByteString -> Parser r e b) -> Parser r e b
-withByteString (Parser f) g = Parser \fp !r eob s n -> case f fp r eob s n of
-  OK# a s' n -> runParser# (g a (B.PS (ForeignPtr s fp) 0 (I# (minusAddr# s' s)))) fp r eob s' n
-  x          -> unsafeCoerce# x
-{-# inline withByteString #-}
-
--- | Create a `B.ByteString` from a `Span`. The result is invalid is the `Span` points
---   outside the current buffer, or if the `Span` start is greater than the end position.
-unsafeSpanToByteString :: Span -> Parser r e B.ByteString
-unsafeSpanToByteString (Span l r) =
-  lookahead (setPos l >> byteStringOf (setPos r))
-{-# inline unsafeSpanToByteString #-}
-
-
--- | Run a parser in a given input span. The input position and the `Int` state is restored after
---   the parser is finished, so `inSpan` does not consume input and has no side effect.  Warning:
---   this operation may crash if the given span points outside the current parsing buffer. It's
---   always safe to use `inSpan` if the span comes from a previous `withSpan` or `spanOf` call on
---   the current input.
-inSpan :: Span -> Parser r e a -> Parser r e a
-inSpan (Span s eob) (Parser f) = Parser \fp !r eob' s' n' ->
-  case f fp r (posToAddr# eob' eob) (posToAddr# eob' s) n' of
-    OK# a _ _ -> OK# a s' n'
-    x         -> unsafeCoerce# x
-{-# inline inSpan #-}
-
-
---------------------------------------------------------------------------------
-
--- | Parse the rest of the current line as a `String`. Assumes UTF-8 encoding,
---   throws an error if the encoding is invalid.
-takeLine :: Parser r e String
-takeLine = branch eof (pure "") do
-  c <- anyChar
-  case c of
-    '\n' -> pure ""
-    _    -> (c:) <$> takeLine
-
--- | Parse the rest of the current line as a `String`, but restore the parsing state.
---   Assumes UTF-8 encoding. This can be used for debugging.
-traceLine :: Parser r e String
-traceLine = lookahead takeLine
-
--- | Take the rest of the input as a `String`. Assumes UTF-8 encoding.
-takeRest :: Parser r e String
-takeRest = branch eof (pure "") do
-  c <- anyChar
-  cs <- takeRest
-  pure (c:cs)
-
--- | Get the rest of the input as a `String`, but restore the parsing state. Assumes UTF-8 encoding.
---   This can be used for debugging.
-traceRest :: Parser r e String
-traceRest = lookahead takeRest
-
---------------------------------------------------------------------------------
-
--- | Check that the input has at least the given number of bytes.
-ensureBytes# :: Int -> Parser r e ()
-ensureBytes# (I# len) = Parser \fp !r eob s n ->
-  case len  <=# minusAddr# eob s of
-    1# -> OK# () s n
-    _  -> Fail#
-{-# inline ensureBytes# #-}
-
--- | Unsafely read a concrete byte from the input. It's not checked that the input has
---   enough bytes.
-scan8# :: Word8 -> Parser r e ()
-scan8# (W8# c) = Parser \fp !r eob s n ->
-  case indexWord8OffAddr# s 0# of
-    c' -> case eqWord8'# c c' of
-      1# -> OK# () (plusAddr# s 1#) n
-      _  -> Fail#
-{-# inline scan8# #-}
-
--- | Unsafely read two concrete bytes from the input. It's not checked that the input has
---   enough bytes.
-scan16# :: Word16 -> Parser r e ()
-scan16# (W16# c) = Parser \fp !r eob s n ->
-  case indexWord16OffAddr# s 0# of
-    c' -> case eqWord16'# c c' of
-      1# -> OK# () (plusAddr# s 2#) n
-      _  -> Fail#
-{-# inline scan16# #-}
-
--- | Unsafely read four concrete bytes from the input. It's not checked that the input has
---   enough bytes.
-scan32# :: Word32 -> Parser r e ()
-scan32# (W32# c) = Parser \fp !r eob s n ->
-  case indexWord32OffAddr# s 0# of
-    c' -> case eqWord32'# c c' of
-      1# -> OK# () (plusAddr# s 4#) n
-      _  -> Fail#
-{-# inline scan32# #-}
-
--- | Unsafely read eight concrete bytes from the input. It's not checked that the input has
---   enough bytes.
-scan64# :: Word -> Parser r e ()
-scan64# (W# c) = Parser \fp !r eob s n ->
-  case indexWord64OffAddr# s 0# of
-    c' -> case eqWord# c c' of
-      1# -> OK# () (plusAddr# s 8#) n
-      _  -> Fail#
-{-# inline scan64# #-}
-
--- | Unsafely read and return a byte from the input. It's not checked that the input is non-empty.
-scanAny8# :: Parser r e Word8
-scanAny8# = Parser \fp !r eob s n -> OK# (W8# (indexWord8OffAddr# s 0#)) (plusAddr# s 1#) n
-{-# inline scanAny8# #-}
-
-scanPartial64# :: Int -> Word -> Parser r e ()
-scanPartial64# (I# len) (W# w) = Parser \fp !r eob s n ->
-  case indexWordOffAddr# s 0# of
-    w' -> case uncheckedIShiftL# (8# -# len) 3# of
-      sh -> case uncheckedShiftL# w' sh of
-        w' -> case uncheckedShiftRL# w' sh of
-          w' -> case eqWord# w w' of
-            1# -> OK# () (plusAddr# s len) n
-            _  -> Fail#
-{-# inline scanPartial64# #-}
-
--- | Decrease the current input position by the given number of bytes.
-setBack# :: Int -> Parser r e ()
-setBack# (I# i) = Parser \fp !r eob s n ->
-  OK# () (plusAddr# s (negateInt# i)) n
-{-# inline setBack# #-}
-
--- | Template function, creates a @Parser r e ()@ which unsafely scans a given
---   sequence of bytes.
-scanBytes# :: [Word] -> Q Exp
-scanBytes# bytes = do
-  let !(leading, w8s) = splitBytes bytes
-      !scanw8s        = go w8s where
-                         go (w8:[] ) = [| scan64# w8 |]
-                         go (w8:w8s) = [| scan64# w8 >> $(go w8s) |]
-                         go []       = [| pure () |]
-  case w8s of
-    [] -> go leading
-          where
-            go (a:b:c:d:[]) = let !w = packBytes [a, b, c, d] in [| scan32# w |]
-            go (a:b:c:d:ws) = let !w = packBytes [a, b, c, d] in [| scan32# w >> $(go ws) |]
-            go (a:b:[])     = let !w = packBytes [a, b]       in [| scan16# w |]
-            go (a:b:ws)     = let !w = packBytes [a, b]       in [| scan16# w >> $(go ws) |]
-            go (a:[])       = [| scan8# a |]
-            go []           = [| pure () |]
-    _  -> case leading of
-
-      []              -> scanw8s
-      [a]             -> [| scan8# a >> $scanw8s |]
-      ws@[a, b]       -> let !w = packBytes ws in [| scan16# w >> $scanw8s |]
-      ws@[a, b, c, d] -> let !w = packBytes ws in [| scan32# w >> $scanw8s |]
-      ws              -> let !w = packBytes ws
-                             !l = length ws
-                         in [| scanPartial64# l w >> $scanw8s |]
-
-
--- Switching code generation
---------------------------------------------------------------------------------
-
-#if MIN_VERSION_base(4,15,0)
-mkDoE = DoE Nothing
-{-# inline mkDoE #-}
-#else
-mkDoE = DoE
-{-# inline mkDoE #-}
-#endif
-
-genTrie :: (Map (Maybe Int) Exp, Trie' (Rule, Int, Maybe Int)) -> Q Exp
-genTrie (rules, t) = do
-  branches <- traverse (\e -> (,) <$> (newName "rule") <*> pure e) rules
-
-  let ix m k = case M.lookup k m of
-        Nothing -> error ("key not in map: " ++ show k)
-        Just a  -> a
-
-  let ensure :: Maybe Int -> Maybe (Q Exp)
-      ensure = fmap (\n -> [| ensureBytes# n |])
-
-      fallback :: Rule -> Int ->  Q Exp
-      fallback rule 0 = pure $ VarE $ fst $ ix branches rule
-      fallback rule n = [| setBack# n >> $(pure $ VarE $ fst $ ix branches rule) |]
-
-  let go :: Trie' (Rule, Int, Maybe Int) -> Q Exp
-      go = \case
-        Branch' (r, n, alloc) ts
-          | M.null ts -> pure $ VarE $ fst $ branches M.! r
-          | otherwise -> do
-              !next         <- (traverse . traverse) go (M.toList ts)
-              !defaultCase  <- fallback r (n + 1)
-
-              let cases = mkDoE $
-                    [BindS (VarP (mkName "c")) (VarE 'scanAny8#),
-                      NoBindS (CaseE (VarE (mkName "c"))
-                         (map (\(w, t) ->
-                                 Match (LitP (IntegerL (fromIntegral w)))
-                                       (NormalB t)
-                                       [])
-                              next
-                          ++ [Match WildP (NormalB defaultCase) []]))]
-
-              case ensure alloc of
-                Nothing    -> pure cases
-                Just alloc -> [| branch $alloc $(pure cases) $(fallback r n) |]
-
-        Path (r, n, alloc) ws t ->
-          case ensure alloc of
-            Nothing    -> [| branch $(scanBytes# ws) $(go t) $(fallback r n)|]
-            Just alloc -> [| branch ($alloc >> $(scanBytes# ws)) $(go t) $(fallback r n) |]
-
-  letE
-    (map (\(x, rhs) -> valD (varP x) (normalB (pure rhs)) []) (Data.Foldable.toList branches))
-    (go t)
-
-parseSwitch :: Q Exp -> Q ([(String, Exp)], Maybe Exp)
-parseSwitch exp = exp >>= \case
-  CaseE (UnboundVarE _) []    -> error "switch: empty clause list"
-  CaseE (UnboundVarE _) cases -> do
-    (!cases, !last) <- pure (init cases, last cases)
-    !cases <- forM cases \case
-      Match (LitP (StringL str)) (NormalB rhs) [] -> pure (str, rhs)
-      _ -> error "switch: expected a match clause on a string literal"
-    (!cases, !last) <- case last of
-      Match (LitP (StringL str)) (NormalB rhs) [] -> pure (cases ++ [(str, rhs)], Nothing)
-      Match WildP                (NormalB rhs) [] -> pure (cases, Just rhs)
-      _ -> error "switch: expected a match clause on a string literal or a wildcard"
-    pure (cases, last)
-  _ -> error "switch: expected a \"case _ of\" expression"
-
-genSwitchTrie' :: Maybe Exp -> [(String, Exp)] -> Maybe Exp
-              -> (Map (Maybe Int) Exp, Trie' (Rule, Int, Maybe Int))
-genSwitchTrie' postAction cases fallback =
-
-  let (!branches, !strings) = unzip do
-        (!i, (!str, !rhs)) <- zip [0..] cases
-        case postAction of
-          Nothing    -> pure ((Just i, rhs), (i, str))
-          Just !post -> pure ((Just i, (VarE '(>>)) `AppE` post `AppE` rhs), (i, str))
-
-      !m    =  M.fromList ((Nothing, maybe (VarE 'failed) id fallback) : branches)
-      !trie = compileTrie strings
-  in (m , trie)
-
---------------------------------------------------------------------------------
-
-withAnyWord8# :: (Word8'# -> Parser r e a) -> Parser r e a
-withAnyWord8# p = Parser \fp !r eob buf n -> case eqAddr# eob buf of
-  1# -> Fail#
-  _  -> case indexWord8OffAddr# buf 0# of
-    w# -> runParser# (p w#) fp r eob (plusAddr# buf 1#) n
-{-# inline withAnyWord8# #-}
-
-withAnyWord16# :: (Word16'# -> Parser r e a) -> Parser r e a
-withAnyWord16# p = Parser \fp !r eob buf n -> case 2# <=# minusAddr# eob buf of
-  0# -> Fail#
-  _  -> case indexWord16OffAddr# buf 0# of
-    w# -> runParser# (p w#) fp r eob (plusAddr# buf 2#) n
-{-# inline withAnyWord16# #-}
-
-withAnyWord32# :: (Word32'# -> Parser r e a) -> Parser r e a
-withAnyWord32# p = Parser \fp !r eob buf n -> case 4# <=# minusAddr# eob buf of
-  0# -> Fail#
-  _  -> case indexWord32OffAddr# buf 0# of
-    w# -> runParser# (p w#) fp r eob (plusAddr# buf 4#) n
-{-# inline withAnyWord32# #-}
-
-withAnyWord64# :: (Word# -> Parser r e a) -> Parser r e a
-withAnyWord64# p = Parser \fp !r eob buf n -> case 8# <=# minusAddr# eob buf of
-  0# -> Fail#
-  _  -> case indexWordOffAddr# buf 0# of
-    w# -> runParser# (p w#) fp r eob (plusAddr# buf 8#) n
-{-# inline withAnyWord64# #-}
-
-withAnyInt8# :: (Int8'# -> Parser r e a) -> Parser r e a
-withAnyInt8# p = Parser \fp !r eob buf n -> case eqAddr# eob buf of
-  1# -> Fail#
-  _  -> case indexInt8OffAddr# buf 0# of
-    i# -> runParser# (p i#) fp r eob (plusAddr# buf 1#) n
-{-# inline withAnyInt8# #-}
-
-withAnyInt16# :: (Int16'# -> Parser r e a) -> Parser r e a
-withAnyInt16# p = Parser \fp !r eob buf n -> case 2# <=# minusAddr# eob buf of
-  0# -> Fail#
-  _  -> case indexInt16OffAddr# buf 0# of
-    i# -> runParser# (p i#) fp r eob (plusAddr# buf 2#) n
-{-# inline withAnyInt16# #-}
-
-withAnyInt32# :: (Int32'# -> Parser r e a) -> Parser r e a
-withAnyInt32# p = Parser \fp !r eob buf n -> case 4# <=# minusAddr# eob buf of
-  0# -> Fail#
-  _  -> case indexInt32OffAddr# buf 0# of
-    i# -> runParser# (p i#) fp r eob (plusAddr# buf 4#) n
-{-# inline withAnyInt32# #-}
-
-withAnyInt64# :: (Int# -> Parser r e a) -> Parser r e a
-withAnyInt64# p = Parser \fp !r eob buf n -> case 8# <=# minusAddr# eob buf of
-  0# -> Fail#
-  _  -> case indexInt64OffAddr# buf 0# of
-    i# -> runParser# (p i#) fp r eob (plusAddr# buf 8#) n
-{-# inline withAnyInt64# #-}
-
---------------------------------------------------------------------------------
-
--- | Parse any 'Word8' (byte).
-anyWord8 :: Parser r e Word8
-anyWord8 = withAnyWord8# (\w# -> pure (W8# w#))
-{-# inline anyWord8 #-}
-
--- | Skip any 'Word8' (byte).
-anyWord8_ :: Parser r e ()
-anyWord8_ = () <$ anyWord8
-{-# inline anyWord8_ #-}
-
--- | Parse any 'Word16'.
-anyWord16 :: Parser r e Word16
-anyWord16 = withAnyWord16# (\w# -> pure (W16# w#))
-{-# inline anyWord16 #-}
-
--- | Skip any 'Word16'.
-anyWord16_ :: Parser r e ()
-anyWord16_ = () <$ anyWord16
-{-# inline anyWord16_ #-}
-
--- | Parse any 'Word32'.
-anyWord32 :: Parser r e Word32
-anyWord32 = withAnyWord32# (\w# -> pure (W32# w#))
-{-# inline anyWord32 #-}
-
--- | Skip any 'Word32'.
-anyWord32_ :: Parser r e ()
-anyWord32_ = () <$ anyWord32
-{-# inline anyWord32_ #-}
-
--- | Parse any 'Word64'.
-anyWord64 :: Parser r e Word64
-anyWord64 = withAnyWord64# (\w# -> pure (W64# w#))
-{-# inline anyWord64 #-}
-
--- | Skip any 'Word64'.
-anyWord64_ :: Parser r e ()
-anyWord64_ = () <$ anyWord64
-{-# inline anyWord64_ #-}
-
--- | Parse any 'Word'.
-anyWord :: Parser r e Word
-anyWord = withAnyWord64# (\w# -> pure (W# w#))
-{-# inline anyWord #-}
-
--- | Skip any 'Word'.
-anyWord_ :: Parser r e ()
-anyWord_ = () <$ anyWord
-{-# inline anyWord_ #-}
-
---------------------------------------------------------------------------------
-
--- | Parse any 'Int8'.
-anyInt8 :: Parser r e Int8
-anyInt8 = withAnyInt8# (\i# -> pure (I8# i#))
-{-# inline anyInt8 #-}
-
--- | Parse any 'Int16'.
-anyInt16 :: Parser r e Int16
-anyInt16 = withAnyInt16# (\i# -> pure (I16# i#))
-{-# inline anyInt16 #-}
-
--- | Parse any 'Int32'.
-anyInt32 :: Parser r e Int32
-anyInt32 = withAnyInt32# (\i# -> pure (I32# i#))
-{-# inline anyInt32 #-}
-
--- | Parse any 'Int64'.
-anyInt64 :: Parser r e Int64
-anyInt64 = withAnyInt64# (\i# -> pure (I64# i#))
-{-# inline anyInt64 #-}
-
--- | Parse any 'Int'.
-anyInt :: Parser r e Int
-anyInt = withAnyInt64# (\i# -> pure (I# i#))
-{-# inline anyInt #-}
-
---------------------------------------------------------------------------------
-
--- | Parse any 'Word16' (little-endian).
-anyWord16le :: Parser r e Word16
-anyWord16le = anyWord16
-{-# inline anyWord16le #-}
-
--- | Parse any 'Word16' (big-endian).
-anyWord16be :: Parser r e Word16
-anyWord16be = withAnyWord16# (\w# -> pure (W16# (byteSwap16'# w#)))
-{-# inline anyWord16be #-}
-
--- | Parse any 'Word32' (little-endian).
-anyWord32le :: Parser r e Word32
-anyWord32le = anyWord32
-{-# inline anyWord32le #-}
-
--- | Parse any 'Word32' (big-endian).
-anyWord32be :: Parser r e Word32
-anyWord32be = withAnyWord32# (\w# -> pure (W32# (byteSwap32'# w#)))
-{-# inline anyWord32be #-}
-
--- | Parse any 'Word64' (little-endian).
-anyWord64le :: Parser r e Word64
-anyWord64le = anyWord64
-{-# inline anyWord64le #-}
-
--- | Parse any 'Word64' (big-endian).
-anyWord64be :: Parser r e Word64
-anyWord64be = withAnyWord64# (\w# -> pure (W64# (byteSwap# w#)))
-{-# inline anyWord64be #-}
-
---------------------------------------------------------------------------------
-
--- | Parse any 'Int16' (little-endian).
-anyInt16le :: Parser r e Int16
-anyInt16le = anyInt16
-{-# inline anyInt16le #-}
-
--- | Parse any 'Int16' (big-endian).
-anyInt16be :: Parser r e Int16
-anyInt16be = withAnyWord16# (\w# -> pure (I16# (word16ToInt16# (byteSwap16'# w#))))
-{-# inline anyInt16be #-}
-
--- | Parse any 'Int32' (little-endian).
-anyInt32le :: Parser r e Int32
-anyInt32le = anyInt32
-{-# inline anyInt32le #-}
-
--- | Parse any 'Int32' (big-endian).
-anyInt32be :: Parser r e Int32
-anyInt32be = withAnyWord32# (\w# -> pure (I32# (word32ToInt32# (byteSwap32'# w#))))
-{-# inline anyInt32be #-}
-
--- | Parse any 'Int64' (little-endian).
-anyInt64le :: Parser r e Int64
-anyInt64le = anyInt64
-{-# inline anyInt64le #-}
-
--- | Parse any 'Int64' (big-endian).
-anyInt64be :: Parser r e Int64
-anyInt64be = withAnyWord64# (\w# -> pure (I64# (word2Int# (byteSwap# w#))))
-{-# inline anyInt64be #-}
-
---------------------------------------------------------------------------------
-
--- | Skip forward @n@ bytes and run the given parser. Fails if fewer than @n@
---   bytes are available.
---
--- Throws a runtime error if given a negative integer.
-atSkip# :: Int# -> Parser r e a -> Parser r e a
-atSkip# os# (Parser p) = Parser \fp !r eob s n -> case os# <=# minusAddr# eob s of
-  1# -> case os# >=# 0# of
-    1# -> p fp r eob (plusAddr# s os#) n
-    _  -> error "FlatParse.Basic.atSkip#: negative integer"
-  _  -> Fail#
-{-# inline atSkip# #-}
-
--- | Read the given number of bytes as a 'ByteString'.
---
--- Throws a runtime error if given a negative integer.
-takeBs# :: Int# -> Parser r e B.ByteString
-takeBs# n# = Parser \fp !r eob s n -> case n# <=# minusAddr# eob s of
-  1# -> -- have to runtime check for negative values, because they cause a hang
-    case n# >=# 0# of
-      1# -> OK# (B.PS (ForeignPtr s fp) 0 (I# n#)) (plusAddr# s n#) n
-      _  -> error "FlatParse.Basic.takeBs: negative integer"
-  _  -> Fail#
-{-# inline takeBs# #-}
-
---------------------------------------------------------------------------------
-
--- | Run a parser, passing it the current address the parser is at.
---
--- Useful for parsing offset-based data tables. For example, you may use this to
--- save the base address to use together with various 0-indexed offsets.
-withAddr# :: (Addr# -> Parser r e a) -> Parser r e a
-withAddr# p = Parser \fp !r eob s n -> runParser# (p s) fp r eob s n
-{-# inline withAddr# #-}
-
--- | @takeBsOffAddr# addr# offset# len#@ moves to @addr#@, skips @offset#@
---   bytes, reads @len#@ bytes into a 'ByteString', and restores the original
---   address.
---
--- The 'Addr#' should be from 'withAddr#'.
---
--- Useful for parsing offset-based data tables. For example, you may use this
--- together with 'withAddr#' to jump to an offset in your input and read some
--- data.
-takeBsOffAddr# :: Addr# -> Int# -> Int# -> Parser r e B.ByteString
-takeBsOffAddr# addr# offset# len# =
-    lookaheadFromAddr# addr# $ atSkip# offset# $ takeBs# len#
-{-# inline takeBsOffAddr# #-}
-
--- | 'lookahead', but specify the address to lookahead from.
---
--- The 'Addr#' should be from 'withAddr#'.
-lookaheadFromAddr# :: Addr# -> Parser r e a -> Parser r e a
-lookaheadFromAddr# s = lookahead . atAddr# s
-{-# inline lookaheadFromAddr# #-}
-
--- | Run a parser at the given address.
---
--- The 'Addr#' should be from 'withAddr#'.
---
--- This is a highly internal function -- you likely want 'lookaheadFromAddr#',
--- which will reset the address after running the parser.
-atAddr# :: Addr# -> Parser r e a -> Parser r e a
-atAddr# s (Parser p) = Parser \fp !r eob _ n -> p fp r eob s n
-{-# inline atAddr# #-}
-
---------------------------------------------------------------------------------
-
--- | Read a null-terminated bytestring (a C-style string).
---
--- Consumes the null terminator.
-anyCString :: Parser r e B.ByteString
-anyCString = Parser \fp !r eob s n -> go' fp eob s n
-  where
-    go' fp eob s0 n = go 0# s0 n
-      where
-        go n# s n = case eqAddr# eob s of
-          1# -> Fail#
-          _  ->
-            let s' = plusAddr# s 1#
-            -- TODO below is a candidate for improving with ExtendedLiterals!
-            in  case eqWord8# (indexWord8OffAddr''# s 0#) (wordToWord8''# 0##) of
-                  1# -> OK# (B.PS (ForeignPtr s0 fp) 0 (I# n#)) s' n
-                  _  -> go (n# +# 1#) s' n
-{-# inline anyCString #-}
-
--- | Read a null-terminated bytestring (a C-style string), where the bytestring
---   is known to be null-terminated somewhere in the input.
---
--- Highly unsafe. Unless you have a guarantee that the string will be null
--- terminated before the input ends, use 'anyCString' instead. Honestly, I'm not
--- sure if this is a good function to define. But here it is.
---
--- Fails on GHC versions older than 9.0, since we make use of the
--- 'cstringLength#' primop introduced in GHC 9.0, and we aren't very useful
--- without it.
---
--- Consumes the null terminator.
-anyCStringUnsafe :: Parser r e B.ByteString
-{-# inline anyCStringUnsafe #-}
-#if MIN_VERSION_base(4,15,0)
-anyCStringUnsafe = Parser \fp !r eob s n ->
-  case eqAddr# eob s of
-    1# -> Fail#
-    _  -> let n#  = cstringLength# s
-              s'# = plusAddr# s (n# +# 1#)
-           in OK# (B.PS (ForeignPtr s fp) 0 (I# n#)) s'# n
-#else
-anyCStringUnsafe = error "Flatparse.Basic.anyCStringUnsafe: requires GHC 9.0 / base-4.15, not available on this compiler"
-#endif
diff --git a/test/Test.hs b/test/Test.hs
index 9db3ba8..da4641d 100644
--- a/test/Test.hs
+++ b/test/Test.hs
@@ -3,6 +3,7 @@
 module Main where
 
 import Numeric (showHex)
+
 import Data.ByteString (ByteString)
 import qualified Data.ByteString as B
 import qualified Data.Char
@@ -14,9 +15,12 @@ import Test.Hspec.QuickCheck
 import Test.QuickCheck hiding ( (.&.) )
 import Data.Word
 import Data.Int
+import GHC.Int
 import Data.Bits
 import Test.QuickCheck.Instances.ByteString()
 
+import Control.Applicative
+
 main :: IO ()
 main = hspec $ do
   basicSpec
@@ -33,7 +37,7 @@ basicSpec = describe "FlatParse.Basic" $ do
 
     describe "lookahead" $
       it "restores state" $ do
-        let p = FB.lookahead $(FB.string "fun") *> $(FB.string "function")
+        let p = FB.lookahead $(FB.getStringOf "fun") *> $(FB.getStringOf "function")
         p `shouldParse` "function"
 
     describe "fails" $ do
@@ -85,45 +89,44 @@ basicSpec = describe "FlatParse.Basic" $ do
         \(bs :: ByteString) ->
           (FB.skip (B.length bs) >> FB.eof) `shouldParse` bs
 
-    describe "char" $ do
-      it "succeeds on that char" $ $(FB.char 'a') `shouldParse` "a"
-      it "succeeds on multibyte char" $ $(FB.char 'ȩ') `shouldParse` FB.packUTF8 "ȩ"
-      it "fails on the wrong char" $ $(FB.char 'a') `shouldParseFail` "b"
-      it "fails at end of file" $ $(FB.char 'a') `shouldParseFail` ""
+    describe "getCharOf" $ do
+      it "succeeds on that char" $ $(FB.getCharOf 'a') `shouldParse` "a"
+      it "succeeds on multibyte char" $ $(FB.getCharOf 'ȩ') `shouldParse` FB.packUTF8 "ȩ"
+      it "fails on the wrong char" $ $(FB.getCharOf 'a') `shouldParseFail` "b"
+      it "fails at end of file" $ $(FB.getCharOf 'a') `shouldParseFail` ""
 
-    describe "byte" $ do
-      it "succeeds on that byte" $ FB.byte 0x61 `shouldParse` "\x61"
-      it "succeeds on high bytes" $ FB.byte 0xfe `shouldParse` "\xfe"
-      it "fails on the wrong byte" $ FB.byte 0x61 `shouldParseFail` "\x62"
-      it "fails on end of file" $ FB.byte 0x61 `shouldParseFail` ""
+    describe "getWord8Of" $ do
+      it "succeeds on that byte" $ FB.getWord8Of 0x61 `shouldParse` "\x61"
+      it "succeeds on high bytes" $ FB.getWord8Of 0xfe `shouldParse` "\xfe"
+      it "fails on the wrong byte" $ FB.getWord8Of 0x61 `shouldParseFail` "\x62"
+      it "fails on end of file" $ FB.getWord8Of 0x61 `shouldParseFail` ""
 
-    describe "bytes" $ do
+    describe "getBytesOf" $ do
       it "succeeds on those bytes" $
-        $(FB.bytes [1, 2, 3, 4]) `shouldParse` "\x01\x02\x03\x04"
+        $(FB.getBytesOf [1, 2, 3, 4]) `shouldParse` "\x01\x02\x03\x04"
       it "succeeds on high bytes" $
-        $(FB.bytes [0xf1, 0xf2, 0xf3, 0xf4]) `shouldParse` "\xf1\xf2\xf3\xf4"
+        $(FB.getBytesOf [0xf1, 0xf2, 0xf3, 0xf4]) `shouldParse` "\xf1\xf2\xf3\xf4"
       it "fails on wrong bytes" $
-        $(FB.bytes [1, 2, 5, 4]) `shouldParseFail` "\x01\x02\x03\x04"
+        $(FB.getBytesOf [1, 2, 5, 4]) `shouldParseFail` "\x01\x02\x03\x04"
       it "fails when out of space" $
-        $(FB.bytes [1, 2, 3, 4]) `shouldParseFail` "\x01\x02\x03"
+        $(FB.getBytesOf [1, 2, 3, 4]) `shouldParseFail` "\x01\x02\x03"
 
     describe "byteString" $ do
       it "succeeds on those bytes" $
-        FB.byteString (B.pack [1, 2, 3, 4]) `shouldParse` "\x01\x02\x03\x04"
+        FB.getByteStringOf (B.pack [1, 2, 3, 4]) `shouldParse` "\x01\x02\x03\x04"
       it "succeeds on high bytestring" $
-        FB.byteString (B.pack [0xf1, 0xf2, 0xf3, 0xf4]) `shouldParse` "\xf1\xf2\xf3\xf4"
+        FB.getByteStringOf (B.pack [0xf1, 0xf2, 0xf3, 0xf4]) `shouldParse` "\xf1\xf2\xf3\xf4"
       it "fails on wrong bytestring" $
-        FB.byteString (B.pack [1, 2, 5, 4]) `shouldParseFail` "\x01\x02\x03\x04"
+        FB.getByteStringOf (B.pack [1, 2, 5, 4]) `shouldParseFail` "\x01\x02\x03\x04"
       it "fails when out of space" $
-        FB.byteString (B.pack [1, 2, 3, 4]) `shouldParseFail` "\x01\x02\x03"
-
+        FB.getByteStringOf (B.pack [1, 2, 3, 4]) `shouldParseFail` "\x01\x02\x03"
 
-    describe "string" $ do
-      it "succeeds on the right string" $ $(FB.string "foo") `shouldParse` "foo"
+    describe "getStringOf" $ do
+      it "succeeds on the right string" $ $(FB.getStringOf "foo") `shouldParse` "foo"
       it "succeeds with multibyte chars" $
-        $(FB.string "foȩ") `shouldParse` FB.packUTF8 "foȩ"
-      it "fails on the wrong string" $ $(FB.string "foo") `shouldParseFail` "bar"
-      it "fails when out of space" $ $(FB.string "foo") `shouldParseFail` "fo"
+        $(FB.getStringOf "foȩ") `shouldParse` FB.packUTF8 "foȩ"
+      it "fails on the wrong string" $ $(FB.getStringOf "foo") `shouldParseFail` "bar"
+      it "fails when out of space" $ $(FB.getStringOf "foo") `shouldParseFail` "fo"
 
     describe "switch" $ do
       it "parses simple words" $
@@ -181,7 +184,7 @@ basicSpec = describe "FlatParse.Basic" $ do
     describe "switchWithPost" $ do
       it "applies post after match" $
         $( FB.switchWithPost
-             (Just [|$(FB.string "bar")|])
+             (Just [|$(FB.getStringOf "bar")|])
              [|
                case _ of
                  "foo" -> pure ()
@@ -191,7 +194,7 @@ basicSpec = describe "FlatParse.Basic" $ do
 
       it "doesn't apply post after default" $
         $( FB.switchWithPost
-             (Just [|$(FB.string "bar")|])
+             (Just [|$(FB.getStringOf "bar")|])
              [|
                case _ of
                  "foo" -> pure ()
@@ -202,7 +205,7 @@ basicSpec = describe "FlatParse.Basic" $ do
 
       it "requires the post must match" $
         $( FB.switchWithPost
-             (Just [|$(FB.string "bar")|])
+             (Just [|$(FB.getStringOf "bar")|])
              [|
                case _ of
                  "foo" -> pure ()
@@ -253,7 +256,7 @@ basicSpec = describe "FlatParse.Basic" $ do
 
       it "applies post after match" $
         $( FB.rawSwitchWithPost
-             (Just [|$(FB.string "bar")|])
+             (Just [|$(FB.getStringOf "bar")|])
              [("foo", [|pure ()|])]
              Nothing
          )
@@ -261,7 +264,7 @@ basicSpec = describe "FlatParse.Basic" $ do
 
       it "doesn't apply post after default" $
         $( FB.rawSwitchWithPost
-             (Just [|$(FB.string "bar")|])
+             (Just [|$(FB.getStringOf "bar")|])
              [("foo", [|pure ()|])]
              (Just [|pure ()|])
          )
@@ -269,7 +272,7 @@ basicSpec = describe "FlatParse.Basic" $ do
 
       it "requires the post must match" $
         $( FB.rawSwitchWithPost
-             (Just [|$(FB.string "bar")|])
+             (Just [|$(FB.getStringOf "bar")|])
              [("foo", [|pure ()|])]
              Nothing
          )
@@ -334,54 +337,54 @@ basicSpec = describe "FlatParse.Basic" $ do
         FB.fusedSatisfy (const True) (const True) (const True) (const True)
           `shouldParseFail` ""
 
-    describe "anyWord8" $ do
-      it "reads a byte" $ FB.anyWord8 `shouldParseWith` ("\xef", 0xef)
-      it "fails on FB.empty input" $ FB.anyWord8 `shouldParseFail` ""
+    describe "getWord8" $ do
+      it "reads a byte" $ FB.getWord8 `shouldParseWith` ("\xef", 0xef)
+      it "fails on empty input" $ FB.getWord8 `shouldParseFail` ""
 
-    describe "anyWord16" $ do
+    describe "getWord16" $ do
       -- Byte order is unspecified, so just assert that it succeeds.
-      it "succeeds" $ FB.anyWord16 `shouldParse` "\xef\xbe"
+      it "succeeds" $ FB.getWord16 `shouldParse` "\xef\xbe"
 
-      it "fails on FB.empty input" $ FB.anyWord16 `shouldParseFail` ""
-      it "fails on insufficient input" $ FB.anyWord16 `shouldParseFail` "\xff"
+      it "fails on empty input" $ FB.getWord16 `shouldParseFail` ""
+      it "fails on insufficient input" $ FB.getWord16 `shouldParseFail` "\xff"
 
-    describe "anyWord32" $ do
+    describe "getWord32" $ do
       -- Byte order is unspecified, so just assert that it succeeds.
-      it "succeeds" $ FB.anyWord32 `shouldParse` "\xef\xbe\xae\x7e"
+      it "succeeds" $ FB.getWord32 `shouldParse` "\xef\xbe\xae\x7e"
 
-      it "fails on empty input" $ FB.anyWord32 `shouldParseFail` ""
+      it "fails on empty input" $ FB.getWord32 `shouldParseFail` ""
       it "fails on insufficient input" $
-        FB.anyWord32 `shouldParseFail` "\xff\xff\xff"
+        FB.getWord32 `shouldParseFail` "\xff\xff\xff"
 
-    describe "anyWord" $ do
+    describe "getWord" $ do
       -- This combinator is inherently non-portable, but we know a Word is at
       -- least some bytes.
-      it "fails on FB.empty input" $ FB.anyWord `shouldParseFail` ""
-
-    describe "anyChar" $ do
-      it "reads 1-byte char" $ FB.anyChar `shouldParseWith` (FB.packUTF8 "$", '$')
-      it "reads 2-byte char" $ FB.anyChar `shouldParseWith` (FB.packUTF8 "¢", '¢')
-      it "reads 3-byte char" $ FB.anyChar `shouldParseWith` (FB.packUTF8 "€", '€')
-      it "reads 4-byte char" $ FB.anyChar `shouldParseWith` (FB.packUTF8 "𐍈", '𐍈')
-      it "fails on FB.empty input" $ FB.anyChar `shouldParseFail` ""
-
-    describe "anyChar_" $ do
-      it "reads 1-byte char" $ FB.anyChar_ `shouldParseWith` (FB.packUTF8 "$", ())
-      it "reads 2-byte char" $ FB.anyChar_ `shouldParseWith` (FB.packUTF8 "¢", ())
-      it "reads 3-byte char" $ FB.anyChar_ `shouldParseWith` (FB.packUTF8 "€", ())
-      it "reads 4-byte char" $ FB.anyChar_ `shouldParseWith` (FB.packUTF8 "𐍈", ())
-      it "fails on empty input" $ FB.anyChar_ `shouldParseFail` ""
-
-    describe "anyCharASCII" $ do
-      it "reads ASCII char" $ FB.anyCharASCII `shouldParseWith` (FB.packUTF8 "$", '$')
-      it "fails on non-ASCII char" $ FB.anyCharASCII `shouldParseFail` FB.packUTF8 "¢"
-      it "fails on empty input" $ FB.anyCharASCII `shouldParseFail` ""
-
-    describe "anyCharASCII_" $ do
-      it "reads ASCII char" $ FB.anyCharASCII_ `shouldParseWith` (FB.packUTF8 "$", ())
+      it "fails on empty input" $ FB.getWord `shouldParseFail` ""
+
+    describe "getChar" $ do
+      it "reads 1-byte char" $ FB.getChar `shouldParseWith` (FB.packUTF8 "$", '$')
+      it "reads 2-byte char" $ FB.getChar `shouldParseWith` (FB.packUTF8 "¢", '¢')
+      it "reads 3-byte char" $ FB.getChar `shouldParseWith` (FB.packUTF8 "€", '€')
+      it "reads 4-byte char" $ FB.getChar `shouldParseWith` (FB.packUTF8 "𐍈", '𐍈')
+      it "fails on empty input" $ FB.getChar `shouldParseFail` ""
+
+    describe "getChar_" $ do
+      it "reads 1-byte char" $ FB.getChar_ `shouldParseWith` (FB.packUTF8 "$", ())
+      it "reads 2-byte char" $ FB.getChar_ `shouldParseWith` (FB.packUTF8 "¢", ())
+      it "reads 3-byte char" $ FB.getChar_ `shouldParseWith` (FB.packUTF8 "€", ())
+      it "reads 4-byte char" $ FB.getChar_ `shouldParseWith` (FB.packUTF8 "𐍈", ())
+      it "fails on empty input" $ FB.getChar_ `shouldParseFail` ""
+
+    describe "getCharASCII" $ do
+      it "reads ASCII char" $ FB.getCharASCII `shouldParseWith` (FB.packUTF8 "$", '$')
+      it "fails on non-ASCII char" $ FB.getCharASCII `shouldParseFail` FB.packUTF8 "¢"
+      it "fails on empty input" $ FB.getCharASCII `shouldParseFail` ""
+
+    describe "getCharASCII_" $ do
+      it "reads ASCII char" $ FB.getCharASCII_ `shouldParseWith` (FB.packUTF8 "$", ())
       it "fails on non-ASCII char" $
-        FB.anyCharASCII_ `shouldParseFail` FB.packUTF8 "¢"
-      it "fails on empty input" $ FB.anyCharASCII_ `shouldParseFail` ""
+        FB.getCharASCII_ `shouldParseFail` FB.packUTF8 "¢"
+      it "fails on empty input" $ FB.getCharASCII_ `shouldParseFail` ""
 
     describe "isDigit" $ do
       it "agrees with Data.Char" $
@@ -395,104 +398,104 @@ basicSpec = describe "FlatParse.Basic" $ do
             FB.isLatinLetter c
               === (Data.Char.isAsciiUpper c || Data.Char.isAsciiLower c)
 
-    describe "readInt" $ do
+    describe "getAsciiDecimalInt" $ do
       it "round-trips on non-negative Ints" $
         property $
-          \(NonNegative i) -> FB.readInt `shouldParseWith` (FB.packUTF8 (show i), i)
+          \(NonNegative i) -> FB.getAsciiDecimalInt `shouldParseWith` (FB.packUTF8 (show i), i)
 
-      it "fails on non-integers" $ FB.readInt `shouldParseFail` "foo"
-      it "fails on negative integers" $ FB.readInt `shouldParseFail` "-5"
-      it "fails on FB.empty input" $ FB.readInt `shouldParseFail` ""
-
-    describe "readIntHex" $ do
-      it "round-trips on non-negative Ints, lowercase" $
-        property $
-          \(NonNegative i) -> FB.readIntHex `shouldParseWith` (FB.packUTF8 (showHex i ""), i)
-
-      it "round-trips on non-negative Ints, uppercase" $
-        property $
-          \(NonNegative i) -> FB.readIntHex `shouldParseWith` (FB.packUTF8 (Data.Char.toUpper <$> showHex i ""), i)
+      it "fails on non-integers" $ FB.getAsciiDecimalInt `shouldParseFail` "foo"
+      it "fails on negative integers" $ FB.getAsciiDecimalInt `shouldParseFail` "-5"
+      it "fails on empty input" $ FB.getAsciiDecimalInt `shouldParseFail` ""
 
-      it "fails on non-integers" $ FB.readIntHex `shouldParseFail` "quux"
-      it "fails on negative integers" $ FB.readIntHex `shouldParseFail` "-5"
-      it "fails on FB.empty input" $ FB.readIntHex `shouldParseFail` ""
-
-    describe "readInteger" $ do
+    describe "getAsciiDecimalInteger" $ do
       it "round-trips on non-negative Integers" $
         property $
           \(NonNegative i) ->
-            FB.readInteger `shouldParseWith` (FB.packUTF8 (show i), i)
+            FB.getAsciiDecimalInteger `shouldParseWith` (FB.packUTF8 (show i), i)
 
-      it "fails on non-integers" $ FB.readInteger `shouldParseFail` "foo"
-      it "fails on negative integers" $ FB.readInteger `shouldParseFail` "-5"
-      it "fails on FB.empty input" $ FB.readInteger `shouldParseFail` ""
+      it "fails on non-integers" $ FB.getAsciiDecimalInteger `shouldParseFail` "foo"
+      it "fails on negative integers" $ FB.getAsciiDecimalInteger `shouldParseFail` "-5"
+      it "fails on empty input" $ FB.getAsciiDecimalInteger `shouldParseFail` ""
 
-    describe "anyCString" $ do
+    describe "getCString" $ do
       prop "parses arbitrary null-terminated bytestrings" $
         \(bs :: ByteString) ->
           let bs' = B.snoc bs 0x00
               expected = B.takeWhile (/= 0x00) bs'
-          in  FB.anyCString `shouldParsePartialWith` (bs', expected)
+          in  FB.getCString `shouldParsePartialWith` (bs', expected)
+
+    describe "getAsciiHexInt" $ do
+      it "round-trips on non-negative Ints, lowercase" $
+        property $
+          \(NonNegative i) -> FB.getAsciiHexInt `shouldParseWith` (FB.packUTF8 (showHex i ""), i)
+
+      it "round-trips on non-negative Ints, uppercase" $
+        property $
+          \(NonNegative i) -> FB.getAsciiHexInt `shouldParseWith` (FB.packUTF8 (Data.Char.toUpper <$> showHex i ""), i)
+
+      it "fails on non-integers" $ FB.getAsciiHexInt `shouldParseFail` "quux"
+      it "fails on negative integers" $ FB.getAsciiHexInt `shouldParseFail` "-5"
+      it "fails on FB.empty input" $ FB.getAsciiHexInt `shouldParseFail` ""
 
     describe "Explicit-endianness machine integers" $ do
       describe "Unsigned" $ do
         prop "parses Word8s" $ do
-          \(w :: Word8)  -> FB.anyWord8    `shouldParseWith` (w8AsByteString w, w)
+          \(w :: Word8)  -> FB.getWord8    `shouldParseWith` (w8AsByteString w, w)
         prop "parses Word16s (LE)" $ do
-          \(w :: Word16) -> FB.anyWord16le `shouldParseWith` (w16leAsByteString w, w)
+          \(w :: Word16) -> FB.getWord16le `shouldParseWith` (w16leAsByteString w, w)
         prop "parses Word16s (BE)" $ do
-          \(w :: Word16) -> FB.anyWord16be `shouldParseWith` (B.reverse (w16leAsByteString w), w)
+          \(w :: Word16) -> FB.getWord16be `shouldParseWith` (B.reverse (w16leAsByteString w), w)
         prop "parses Word32s (LE)" $ do
-          \(w :: Word32) -> FB.anyWord32le `shouldParseWith` (w32leAsByteString w, w)
+          \(w :: Word32) -> FB.getWord32le `shouldParseWith` (w32leAsByteString w, w)
         prop "parses Word32s (BE)" $ do
-          \(w :: Word32) -> FB.anyWord32be `shouldParseWith` (B.reverse (w32leAsByteString w), w)
+          \(w :: Word32) -> FB.getWord32be `shouldParseWith` (B.reverse (w32leAsByteString w), w)
         prop "parses Word64s (LE)" $ do
-          \(w :: Word64) -> FB.anyWord64le `shouldParseWith` (w64leAsByteString w, w)
+          \(w :: Word64) -> FB.getWord64le `shouldParseWith` (w64leAsByteString w, w)
         prop "parses Word64s (BE)" $ do
-          \(w :: Word64) -> FB.anyWord64be `shouldParseWith` (B.reverse (w64leAsByteString w), w)
+          \(w :: Word64) -> FB.getWord64be `shouldParseWith` (B.reverse (w64leAsByteString w), w)
 
       describe "Signed" $ do
         prop "parses Int8s" $ do
-          \(i :: Int8)   -> FB.anyInt8     `shouldParseWith` (w8AsByteString i, i)
+          \(i :: Int8)   -> FB.getInt8     `shouldParseWith` (w8AsByteString i, i)
         prop "parses Int16s (LE)" $ do
-          \(i :: Int16)  -> FB.anyInt16le  `shouldParseWith` (w16leAsByteString i, i)
+          \(i :: Int16)  -> FB.getInt16le  `shouldParseWith` (w16leAsByteString i, i)
         prop "parses Int16s (BE)" $ do
-          \(i :: Int16)  -> FB.anyInt16be  `shouldParseWith` (B.reverse (w16leAsByteString i), i)
+          \(i :: Int16)  -> FB.getInt16be  `shouldParseWith` (B.reverse (w16leAsByteString i), i)
         prop "parses Int32s (LE)" $ do
-          \(i :: Int32)  -> FB.anyInt32le  `shouldParseWith` (w32leAsByteString i, i)
+          \(i :: Int32)  -> FB.getInt32le  `shouldParseWith` (w32leAsByteString i, i)
         prop "parses Int32s (BE)" $ do
-          \(i :: Int32)  -> FB.anyInt32be  `shouldParseWith` (B.reverse (w32leAsByteString i), i)
+          \(i :: Int32)  -> FB.getInt32be  `shouldParseWith` (B.reverse (w32leAsByteString i), i)
         prop "parses Int64s (LE)" $ do
-          \(i :: Int64)  -> FB.anyInt64le  `shouldParseWith` (w64leAsByteString i, i)
+          \(i :: Int64)  -> FB.getInt64le  `shouldParseWith` (w64leAsByteString i, i)
         prop "parses Int64s (BE)" $ do
-          \(i :: Int64)  -> FB.anyInt64be  `shouldParseWith` (B.reverse (w64leAsByteString i), i)
+          \(i :: Int64)  -> FB.getInt64be  `shouldParseWith` (B.reverse (w64leAsByteString i), i)
 
   describe "Combinators" $ do
     describe "Functor instance" $ do
       it "fmaps over the result" $
-        ((+ 2) <$> FB.readInt) `shouldParseWith` ("2", 4)
+        ((+ 2) <$> FB.getAsciiDecimalInt) `shouldParseWith` ("2", 4)
 
     describe "Applicative instance" $ do
       it "combines using <*>" $
-        ((+) <$> FB.readInt <* $(FB.string "+") <*> FB.readInt)
+        ((+) <$> FB.getAsciiDecimalInt <* $(FB.getStringOf "+") <*> FB.getAsciiDecimalInt)
           `shouldParseWith` ("2+3", 5)
 
     describe "Monad instance" $ do
       it "combines with a do block" $ do
         let parser = do
-              i <- FB.readInt
-              $(FB.string "+")
-              j <- FB.readInt
+              i <- FB.getAsciiDecimalInt
+              $(FB.getStringOf "+")
+              j <- FB.getAsciiDecimalInt
               pure (i + j)
         parser `shouldParseWith` ("2+3", 5)
 
     describe "(<|>)" $ do
       it "chooses first option on success" $
-        (("A" <$ $(FB.string "foo")) FB.<|> ("B" <$ $(FB.string "foo")))
+        (("A" <$ $(FB.getStringOf "foo")) <|> ("B" <$ $(FB.getStringOf "foo")))
           `shouldParseWith` ("foo", "A")
 
       it "chooses second option when first fails" $
-        (("A" <$ $(FB.string "bar")) FB.<|> ("B" <$ $(FB.string "foo")))
+        (("A" <$ $(FB.getStringOf "bar")) <|> ("B" <$ $(FB.getStringOf "foo")))
           `shouldParseWith` ("foo", "B")
 
     describe "branch" $ do
@@ -505,28 +508,28 @@ basicSpec = describe "FlatParse.Basic" $ do
 
     describe "chainl" $ do
       it "parses a chain of numbers" $
-        FB.chainl (+) FB.readInt ($(FB.char '+') *> FB.readInt)
+        FB.chainl (+) FB.getAsciiDecimalInt ($(FB.getCharOf '+') *> FB.getAsciiDecimalInt)
           `shouldParseWith` ("1+2+3", 6)
 
-      it "allows the right chain to be FB.empty" $
-        FB.chainl (+) FB.readInt ($(FB.char '+') *> FB.readInt)
+      it "allows the right chain to be empty" $
+        FB.chainl (+) FB.getAsciiDecimalInt ($(FB.getCharOf '+') *> FB.getAsciiDecimalInt)
           `shouldParseWith` ("1", 1)
 
       it "requires at least the leftmost parser to match" $
-        FB.chainl (+) FB.readInt ($(FB.char '+') *> FB.readInt)
+        FB.chainl (+) FB.getAsciiDecimalInt ($(FB.getCharOf '+') *> FB.getAsciiDecimalInt)
           `shouldParseFail` ""
 
     describe "chainr" $ do
       it "parses a chain of numbers" $
-        FB.chainr (+) (FB.readInt <* $(FB.char '+')) FB.readInt
+        FB.chainr (+) (FB.getAsciiDecimalInt <* $(FB.getCharOf '+')) FB.getAsciiDecimalInt
           `shouldParseWith` ("1+2+3", 6)
 
-      it "allows the left chain to be FB.empty" $
-        FB.chainr (+) (FB.readInt <* $(FB.char '+')) FB.readInt
+      it "allows the left chain to be empty" $
+        FB.chainr (+) (FB.getAsciiDecimalInt <* $(FB.getCharOf '+')) FB.getAsciiDecimalInt
           `shouldParseWith` ("1", 1)
 
       it "requires at least the rightmost parser to match" $
-        FB.chainr (+) (FB.readInt <* $(FB.char '+')) FB.readInt
+        FB.chainr (+) (FB.getAsciiDecimalInt <* $(FB.getCharOf '+')) FB.getAsciiDecimalInt
           `shouldParseFail` ""
 
     describe "many" $ do
@@ -563,19 +566,19 @@ basicSpec = describe "FlatParse.Basic" $ do
 
     describe "notFollowedBy" $ do
       it "succeeds when it should" $
-        FB.readInt `FB.notFollowedBy` $(FB.char '.') `shouldParsePartial` "123+5"
+        FB.getAsciiDecimalInt `FB.notFollowedBy` $(FB.getCharOf '.') `shouldParsePartial` "123+5"
       it "fails when first parser doesn't match" $
-        FB.readInt `FB.notFollowedBy` $(FB.char '.') `shouldParseFail` "a"
+        FB.getAsciiDecimalInt `FB.notFollowedBy` $(FB.getCharOf '.') `shouldParseFail` "a"
       it "fails when followed by the wrong thing" $
-        FB.readInt `FB.notFollowedBy` $(FB.char '.') `shouldParseFail` "123.0"
+        FB.getAsciiDecimalInt `FB.notFollowedBy` $(FB.getCharOf '.') `shouldParseFail` "123.0"
 
     describe "isolate" $ do
-      prop "isolate takeRestBs is identity" $ do
+      prop "isolate takeRest is identity" $ do
         \(bs :: ByteString) ->
-          FB.isolate (B.length bs) FB.takeRestBs `shouldParseWith` (bs, bs)
-      prop "isolate takeBs length is identity" $ do
+          FB.isolate (B.length bs) FB.takeRest `shouldParseWith` (bs, bs)
+      prop "isolate take length is identity" $ do
         \(bs :: ByteString) ->
-          FB.isolate (B.length bs) (FB.takeBs (B.length bs)) `shouldParseWith` (bs, bs)
+          FB.isolate (B.length bs) (FB.take (B.length bs)) `shouldParseWith` (bs, bs)
 
   describe "Positions and spans" $ do
     describe "Pos Ord instance" $ do
@@ -631,10 +634,10 @@ basicSpec = describe "FlatParse.Basic" $ do
     describe "traceLine" $ do
       pure ()
 
-    describe "takeRest" $ do
+    describe "takeRestString" $ do
       pure ()
 
-    describe "traceRest" $ do
+    describe "traceRestString" $ do
       pure ()
 
   describe "String conversions" $ do
@@ -649,8 +652,9 @@ basicSpec = describe "FlatParse.Basic" $ do
       -- use Int#/Int64# directly because Word8# -> Int# is annoying on old GHCs
       let bs = B.pack [ 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
                       , 0xFF, 0x31, 0x32, 0x33, 0x00, 0xFF]
-          p = FB.withAddr# $ \addr# -> FB.withAnyInt64# $ \os# ->
-                  FB.lookaheadFromAddr# addr# $ FB.atSkip# os# $ FB.anyCString
+          p = FB.withAddr# $ \addr# -> FB.withInt64 $ \os ->
+                  let !(I# os#) = fromIntegral os
+                  in  FB.lookaheadFromAddr# addr# $ FB.atSkip# os# $ FB.getCString
       p `shouldParsePartialWith` (bs, "123")
 
 --------------------------------------------------------------------------------

From a634c0d36e89b18001473f3ba4d4b9d99ae7d48f Mon Sep 17 00:00:00 2001
From: Ben Orchard <thefirstmuffinman@gmail.com>
Date: Wed, 26 Oct 2022 13:28:32 +0100
Subject: [PATCH 02/11] fix benchmarks

---
 bench/Bench.hs       | 8 ++++----
 bench/FPBasic.hs     | 8 ++++----
 bench/FPStateful.hs  | 5 ++++-
 bench/ReadInteger.hs | 4 ++--
 4 files changed, 14 insertions(+), 11 deletions(-)

diff --git a/bench/Bench.hs b/bench/Bench.hs
index 7ec34ea..bf38a1f 100644
--- a/bench/Bench.hs
+++ b/bench/Bench.hs
@@ -10,7 +10,7 @@ import qualified Data.ByteString.Char8 as B
 import qualified Attoparsec
 import qualified Megaparsec
 import qualified Parsec
-import qualified FPStateful
+--import qualified FPStateful
 import qualified FPBasic
 import qualified ReadInteger
 
@@ -31,7 +31,7 @@ main :: IO ()
 main = defaultMain [
   bgroup "sexp" [
     bench "fpbasic"     $ whnf FPBasic.runSexp    sexpInp,
-    bench "fpstateful"  $ whnf FPStateful.runSexp sexpInp,
+    --bench "fpstateful"  $ whnf FPStateful.runSexp sexpInp,
     bench "attoparsec"  $ whnf Attoparsec.runSexp sexpInp,
     bench "megaparsec"  $ whnf Megaparsec.runSexp sexpInp,
     bench "parsec"      $ whnf Parsec.runSexp     sexpInp
@@ -39,7 +39,7 @@ main = defaultMain [
 
   bgroup "long keyword" [
     bench "fpbasic"    $ whnf FPBasic.runLongws    longwsInp,
-    bench "fpstateful" $ whnf FPStateful.runLongws longwsInp,
+    --bench "fpstateful" $ whnf FPStateful.runLongws longwsInp,
     bench "attoparsec" $ whnf Attoparsec.runLongws longwsInp,
     bench "megaparsec" $ whnf Megaparsec.runLongws longwsInp,
     bench "parsec"     $ whnf Parsec.runLongws     longwsInp
@@ -47,7 +47,7 @@ main = defaultMain [
 
   bgroup "numeral csv" [
     bench "fpbasic"    $ whnf FPBasic.runNumcsv    numcsvInp,
-    bench "fpstateful" $ whnf FPStateful.runNumcsv numcsvInp,
+    --bench "fpstateful" $ whnf FPStateful.runNumcsv numcsvInp,
     bench "attoparsec" $ whnf Attoparsec.runNumcsv numcsvInp,
     bench "megaparsec" $ whnf Megaparsec.runNumcsv numcsvInp,
     bench "parsec"     $ whnf Parsec.runNumcsv     numcsvInp
diff --git a/bench/FPBasic.hs b/bench/FPBasic.hs
index d555b44..b837822 100644
--- a/bench/FPBasic.hs
+++ b/bench/FPBasic.hs
@@ -7,18 +7,18 @@ module FPBasic (
 import FlatParse.Basic
 
 ws      = many_ $(switch [| case _ of " " -> pure (); "\n" -> pure () |])
-open    = $(char '(') >> ws
-close   = $(char ')') >> ws
+open    = $(getCharOf '(') >> ws
+close   = $(getCharOf ')') >> ws
 ident   = some_ (satisfyASCII_ isLatinLetter) >> ws
 sexp    = branch open (some_ sexp >> close) ident
 src     = sexp >> eof
 runSexp = runParser src
 
-longw     = $(string "thisisalongkeyword")
+longw     = $(getStringOf "thisisalongkeyword")
 longws    = some_ (longw >> ws) >> eof
 runLongws = runParser longws
 
 numeral   = some_ (satisfyASCII_ isDigit) >> ws
-comma     = $(char ',') >> ws
+comma     = $(getCharOf ',') >> ws
 numcsv    = numeral >> many_ (comma >> numeral) >> eof
 runNumcsv = runParser numcsv
diff --git a/bench/FPStateful.hs b/bench/FPStateful.hs
index 79e80b3..2632946 100644
--- a/bench/FPStateful.hs
+++ b/bench/FPStateful.hs
@@ -1,4 +1,4 @@
-
+{-
 module FPStateful (
     runSexp
   , runLongws
@@ -25,3 +25,6 @@ numeral   = some_ (satisfyASCII_ isDigit) >> ws
 comma     = $(char ',') >> ws
 numcsv    = numeral >> many_ (comma >> numeral) >> eof
 runNumcsv = runParser numcsv () 0
+-}
+
+module FPStateful where
diff --git a/bench/ReadInteger.hs b/bench/ReadInteger.hs
index 7109ff8..5f0988c 100644
--- a/bench/ReadInteger.hs
+++ b/bench/ReadInteger.hs
@@ -3,5 +3,5 @@ module ReadInteger where
 
 import FlatParse.Basic as FPBasic
 
-readInt     = runParser FPBasic.readInt
-readInteger = runParser FPBasic.readInteger
+readInt     = runParser FPBasic.getAsciiDecimalInt
+readInteger = runParser FPBasic.getAsciiDecimalInteger

From 3a00c4f31288e2ce3491895496f03f060db0dbd6 Mon Sep 17 00:00:00 2001
From: Ben Orchard <thefirstmuffinman@gmail.com>
Date: Wed, 26 Oct 2022 13:53:16 +0100
Subject: [PATCH 03/11] CI: +GHC 9.4

---
 .github/workflows/haskell.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/haskell.yml b/.github/workflows/haskell.yml
index 77dfefd..df8aa0e 100644
--- a/.github/workflows/haskell.yml
+++ b/.github/workflows/haskell.yml
@@ -13,7 +13,7 @@ jobs:
     strategy:
       matrix:
         cabal: ["3.6"]
-        ghc: ["8.6.5", "8.8.4", "8.10.7", "9.0.2", "9.2.3"]
+        ghc: ["8.6.5", "8.8.4", "8.10.7", "9.0.2", "9.2.3", "9.4.2"]
     env:
       CONFIG: "--enable-tests --enable-benchmarks"
     steps:

From 5aef3551cc303db2383f76616ad7e6f2e2a7ce21 Mon Sep 17 00:00:00 2001
From: Ben Orchard <thefirstmuffinman@gmail.com>
Date: Wed, 26 Oct 2022 13:53:29 +0100
Subject: [PATCH 04/11] CI: build with LLVM

---
 .github/workflows/haskell.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/haskell.yml b/.github/workflows/haskell.yml
index df8aa0e..3cb84ad 100644
--- a/.github/workflows/haskell.yml
+++ b/.github/workflows/haskell.yml
@@ -15,7 +15,7 @@ jobs:
         cabal: ["3.6"]
         ghc: ["8.6.5", "8.8.4", "8.10.7", "9.0.2", "9.2.3", "9.4.2"]
     env:
-      CONFIG: "--enable-tests --enable-benchmarks"
+      CONFIG: "--enable-tests --enable-benchmarks --flags llvm"
     steps:
       - uses: actions/checkout@v2.3.4
       - uses: actions/cache@v2

From 64e1b0427ea1610a17cebca79c30a9724c4d9eb6 Mon Sep 17 00:00:00 2001
From: Ben Orchard <thefirstmuffinman@gmail.com>
Date: Thu, 27 Oct 2022 18:16:42 +0100
Subject: [PATCH 05/11] split Addr# primitives into module

---
 flatparse.cabal                 |  1 +
 src/FlatParse/Basic.hs          |  8 ++--
 src/FlatParse/Basic/Addr.hs     | 83 +++++++++++++++++++++++++++++++++
 src/FlatParse/Basic/Internal.hs | 57 ++++------------------
 4 files changed, 99 insertions(+), 50 deletions(-)
 create mode 100644 src/FlatParse/Basic/Addr.hs

diff --git a/flatparse.cabal b/flatparse.cabal
index 4ea5335..84d133f 100644
--- a/flatparse.cabal
+++ b/flatparse.cabal
@@ -44,6 +44,7 @@ flag llvm
 library
   exposed-modules:
       FlatParse.Basic
+      FlatParse.Basic.Addr
       FlatParse.Basic.Chars
       FlatParse.Basic.Integers
       FlatParse.Basic.Internal
diff --git a/src/FlatParse/Basic.hs b/src/FlatParse/Basic.hs
index 14858ce..08df697 100644
--- a/src/FlatParse/Basic.hs
+++ b/src/FlatParse/Basic.hs
@@ -115,14 +115,15 @@ module FlatParse.Basic (
   , ensureBytes#
 
   -- ** Unboxed arguments
-  , takeBs#
+  , take#
   , atSkip#
 
 
   -- ** Location & address primitives
   , setBack#
   , withAddr#
-  , takeBsOffAddr#
+  , takeOffAddr#
+  , withOffAddr#
   , lookaheadFromAddr#
   , atAddr#
 
@@ -164,6 +165,7 @@ import FlatParse.Basic.Integers
 import FlatParse.Basic.Internal
 import FlatParse.Basic.Chars
 import FlatParse.Basic.Position
+import FlatParse.Basic.Addr
 
 -- | Higher-level boxed data type for parsing results.
 data Result e a =
@@ -268,7 +270,7 @@ eof = Parser \fp eob s -> case eqAddr# eob s of
 --
 -- Throws a runtime error if given a negative integer.
 take :: Int -> Parser e B.ByteString
-take (I# n#) = takeBs# n#
+take (I# n#) = take# n#
 {-# inline take #-}
 
 -- | Consume the rest of the input. May return the empty bytestring.
diff --git a/src/FlatParse/Basic/Addr.hs b/src/FlatParse/Basic/Addr.hs
new file mode 100644
index 0000000..a8e0138
--- /dev/null
+++ b/src/FlatParse/Basic/Addr.hs
@@ -0,0 +1,83 @@
+{- | Highly dangerous parsing primitives using 'Addr#'.
+
+Ensure to read the documentation before using any definitions from this module.
+
+This module exports primitives useful for efficiently parsing binary files that
+store data using an internal index.
+
+Often, such indices describes records using a starting offset and a length.
+Offsets are often relative to the file start, or some dynamic address in the
+file. This way, individual records can be read out efficiently (much faster than
+opening lots of small files!).
+
+We may parse these in-place extremely efficiently by adding record offsets to a
+base memory address somewhere in the parsing data. This is also extremely
+unsafe, and easy to get catastrophically wrong. Thus, we provide as much utility
+as reasonable to enable performing such parsing safely. (That means CPS
+functions.)
+-}
+
+module FlatParse.Basic.Addr where
+
+import FlatParse.Basic.Parser
+import FlatParse.Basic.Internal
+
+import GHC.Exts
+
+import qualified Data.ByteString as B
+
+-- | Run a parser, passing it the current address the parser is at.
+--
+-- Useful for parsing offset-based data tables. For example, you may use this to
+-- save the base address to use together with various relative offsets.
+withAddr# :: (Addr# -> Parser e a) -> Parser e a
+withAddr# p = Parser \fp eob s -> runParser# (p s) fp eob s
+{-# inline withAddr# #-}
+
+-- | @takeOffAddr# addr# offset# len#@ moves to @addr#@, skips @offset#@
+--   bytes, reads @len#@ bytes into a 'ByteString', and restores the original
+--   address.
+--
+-- The 'Addr#' should be from 'withAddr#'.
+--
+-- Useful for parsing offset-based data tables. Ex: Your file contains an index
+-- storing @(OFFSET, LENGTH)@ entries where the offset is the byte position in
+-- the file. Begin with @'withAddr#' $ \tableBase# -> ...@, then read each entry
+-- like @'takeOffAddr#' tableBase# OFFSET LENGTH@.
+--
+-- Fails if you attempt to read outside the parsing data.
+--
+-- Name adopted from the similar-ish @indexXOffAddr#@ primops.
+takeOffAddr# :: Addr# -> Int# -> Int# -> Parser e B.ByteString
+takeOffAddr# addr# offset# len# = withOffAddr# addr# offset# (take# len#)
+{-# inline takeOffAddr# #-}
+
+-- | @withOffAddr# addr# offset# p@ moves to @addr#@, skips @offset#@
+--   bytes, then runs the given parser @p@.
+--
+-- The 'Addr#' should be from 'withAddr#'.
+--
+-- Fails if you attempt to read outside the parsing data.
+--
+-- Name adopted from the similar-ish @indexXOffAddr#@ primops.
+withOffAddr# :: Addr# -> Int# -> Parser e a -> Parser e a
+withOffAddr# addr# offset# =
+    lookaheadFromAddr# addr# . atSkip# offset#
+{-# inline withOffAddr# #-}
+
+-- | 'lookahead', but specify the address to lookahead from.
+--
+-- The 'Addr#' should be from 'withAddr#'.
+lookaheadFromAddr# :: Addr# -> Parser e a -> Parser e a
+lookaheadFromAddr# s = lookahead . atAddr# s
+{-# inline lookaheadFromAddr# #-}
+
+-- | Run a parser at the given address.
+--
+-- The 'Addr#' should be from 'withAddr#'.
+--
+-- This is a highly internal function -- you likely want 'lookaheadFromAddr#',
+-- which will reset the address after running the parser.
+atAddr# :: Addr# -> Parser e a -> Parser e a
+atAddr# s (Parser p) = Parser \fp eob _ -> p fp eob s
+{-# inline atAddr# #-}
diff --git a/src/FlatParse/Basic/Internal.hs b/src/FlatParse/Basic/Internal.hs
index c8fee49..02992b4 100644
--- a/src/FlatParse/Basic/Internal.hs
+++ b/src/FlatParse/Basic/Internal.hs
@@ -71,21 +71,23 @@ withNotEob (Parser p) = Parser \fp eob s -> case eqAddr# eob s of
 --------------------------------------------------------------------------------
 -- Low level unboxed combinators
 
--- | Read the given number of bytes as a 'ByteString'.
+-- | Read @n@ bytes as a 'ByteString'. Fails if newer than @n@ bytes are
+--   available.
 --
 -- Throws a runtime error if given a negative integer.
-takeBs# :: Int# -> Parser e B.ByteString
-takeBs# n# = withPosInt# n# takeBsUnsafe#
-{-# inline takeBs# #-}
+take# :: Int# -> Parser e B.ByteString
+take# n# = withPosInt# n# takeUnsafe#
+{-# inline take# #-}
 
--- | Read the given number of bytes as a 'ByteString'.
+-- | Read @n@ bytes as a 'ByteString'. Fails if newer than @n@ bytes are
+--   available.
 --
 -- Undefined behaviour if given a negative integer.
-takeBsUnsafe# :: Int# -> Parser e B.ByteString
-takeBsUnsafe# n# = Parser \fp eob s -> case n# <=# minusAddr# eob s of
+takeUnsafe# :: Int# -> Parser e B.ByteString
+takeUnsafe# n# = Parser \fp eob s -> case n# <=# minusAddr# eob s of
   1# -> OK# (B.PS (ForeignPtr s fp) 0 (I# n#)) (plusAddr# s n#)
   _  -> Fail#
-{-# inline takeBsUnsafe# #-}
+{-# inline takeUnsafe# #-}
 
 -- | Skip forward @n@ bytes and run the given parser. Fails if fewer than @n@
 --   bytes are available.
@@ -112,45 +114,6 @@ skip# :: Int# -> Parser e ()
 skip# os# = atSkip# os# (pure ())
 {-# inline skip# #-}
 
--- | Run a parser, passing it the current address the parser is at.
---
--- Useful for parsing offset-based data tables. For example, you may use this to
--- save the base address to use together with various 0-indexed offsets.
-withAddr# :: (Addr# -> Parser e a) -> Parser e a
-withAddr# p = Parser \fp eob s -> runParser# (p s) fp eob s
-{-# inline withAddr# #-}
-
--- | @takeBsOffAddr# addr# offset# len#@ moves to @addr#@, skips @offset#@
---   bytes, reads @len#@ bytes into a 'ByteString', and restores the original
---   address.
---
--- The 'Addr#' should be from 'withAddr#'.
---
--- Useful for parsing offset-based data tables. For example, you may use this
--- together with 'withAddr#' to jump to an offset in your input and read some
--- data.
-takeBsOffAddr# :: Addr# -> Int# -> Int# -> Parser e B.ByteString
-takeBsOffAddr# addr# offset# len# =
-    lookaheadFromAddr# addr# $ atSkip# offset# $ takeBs# len#
-{-# inline takeBsOffAddr# #-}
-
--- | 'lookahead', but specify the address to lookahead from.
---
--- The 'Addr#' should be from 'withAddr#'.
-lookaheadFromAddr# :: Addr# -> Parser e a -> Parser e a
-lookaheadFromAddr# s = lookahead . atAddr# s
-{-# inline lookaheadFromAddr# #-}
-
--- | Run a parser at the given address.
---
--- The 'Addr#' should be from 'withAddr#'.
---
--- This is a highly internal function -- you likely want 'lookaheadFromAddr#',
--- which will reset the address after running the parser.
-atAddr# :: Addr# -> Parser e a -> Parser e a
-atAddr# s (Parser p) = Parser \fp eob _ -> p fp eob s
-{-# inline atAddr# #-}
-
 --------------------------------------------------------------------------------
 -- Low-level boxed combinators
 

From 82e58011739e93664539c21650ee6619a3a41df1 Mon Sep 17 00:00:00 2001
From: Ben Orchard <thefirstmuffinman@gmail.com>
Date: Thu, 27 Oct 2022 19:13:24 +0100
Subject: [PATCH 06/11] cleanup

---
 src/FlatParse/Basic.hs          | 130 +++++++++++++++++++-------------
 src/FlatParse/Basic/Integers.hs |   4 +-
 src/FlatParse/Basic/Internal.hs |  46 -----------
 src/FlatParse/Basic/Parser.hs   |  15 +---
 src/FlatParse/Basic/Position.hs |   5 +-
 5 files changed, 89 insertions(+), 111 deletions(-)

diff --git a/src/FlatParse/Basic.hs b/src/FlatParse/Basic.hs
index 08df697..d9bef66 100644
--- a/src/FlatParse/Basic.hs
+++ b/src/FlatParse/Basic.hs
@@ -10,45 +10,57 @@ denoted by a @#@ hash suffix.
 
 module FlatParse.Basic (
 
-  -- * Parser types and constructors
-    type Parser(..)
-  , type Res#
-  , pattern OK#
-  , pattern Fail#
-  , pattern Err#
-  , Result(..)
+  -- * Parser monad
+    type Parser
 
-  -- * Running parsers
+  -- ** Executing parsers
+  , Result(..)
   , runParser
   , runParserS
 
   -- * Errors and failures
-  , failed
-  , Control.Applicative.empty
   , err
   , lookahead
   , fails
   , try
-  , Control.Applicative.optional
   , optional_
   , withOption
   , cut
   , cutting
 
-  -- * Basic lexing and parsing
+  -- * Combinators
+  , (Control.Applicative.<|>)
+  , Control.Applicative.empty
+  , branch
+  , chainl
+  , chainr
+  , Control.Applicative.many
+  , many_
+  , Control.Applicative.some
+  , some_
+  , notFollowedBy
+  , isolate
+
+  -- * Primitive parsers
   , eof
-  , take
-  , takeRest
-  , skip
   , switch
   , switchWithPost
   , rawSwitchWithPost
 
-  , getCharOf
+  -- ** Byte-wise
+  , take
+  , takeRest
+  , skip
   , getBytesOf
   , getByteStringOf
-  , getStringOf
+  , getCString
 
+  -- ** Machine integers
+  , module FlatParse.Basic.Integers
+
+  -- ** 'Char', 'String'
+  , getCharOf
+  , getStringOf
   , getChar
   , getChar_
   , getCharASCII
@@ -56,12 +68,9 @@ module FlatParse.Basic (
   , getAsciiDecimalInt
   , getAsciiDecimalInteger
   , getAsciiHexInt
-  , getCString
-
   , Common.isDigit
   , Common.isGreekLetter
   , Common.isLatinLetter
-
   , satisfy
   , satisfy_
   , satisfyASCII
@@ -69,35 +78,13 @@ module FlatParse.Basic (
   , fusedSatisfy
   , fusedSatisfy_
 
-  -- * Combinators
-  , (<|>)
-  , branch
-  , chainl
-  , chainr
-  , Control.Applicative.many
-  , many_
-  , Control.Applicative.some
-  , some_
-  , notFollowedBy
-  , isolate
-
-  -- * Positions and spans
-  , Pos(..)
-  , Span(..)
-  , getPos
-  , setPos
-  , endPos
-  , spanOf
-  , withSpan
-  , byteStringOf
-  , withByteString
-  , inSpan
+  -- ** Positions and spans
+  , module FlatParse.Basic.Position
 
   -- ** Position and span conversions
   , validPos
   , posLineCols
   , unsafeSpanToByteString
-  , unsafeSlice
   , mkPos
   , FlatParse.Basic.lines
 
@@ -121,17 +108,11 @@ module FlatParse.Basic (
 
   -- ** Location & address primitives
   , setBack#
-  , withAddr#
-  , takeOffAddr#
-  , withOffAddr#
-  , lookaheadFromAddr#
-  , atAddr#
+  , module FlatParse.Basic.Addr
 
   -- ** Unsafe
   , getCStringUnsafe
 
-  , module FlatParse.Basic.Integers
-
   ) where
 
 import Prelude hiding ( take, getChar )
@@ -580,7 +561,7 @@ genSwitchTrie' postAction cases fallback =
           Nothing    -> pure ((Just i, rhs), (i, str))
           Just !post -> pure ((Just i, (VarE '(>>)) `AppE` post `AppE` rhs), (i, str))
 
-      !m    = M.fromList ((Nothing, maybe (VarE 'failed) id fallback) : branches)
+      !m    = M.fromList ((Nothing, maybe (VarE 'empty) id fallback) : branches)
       !trie = compileTrie strings
   in (m , trie)
 
@@ -728,3 +709,48 @@ unsafeSpanToByteString :: Span -> Parser e B.ByteString
 unsafeSpanToByteString (Span l r) =
   lookahead (setPos l >> byteStringOf (setPos r))
 {-# inline unsafeSpanToByteString #-}
+
+--------------------------------------------------------------------------------
+-- Low-level boxed combinators
+
+-- | Read a null-terminated bytestring (a C-style string).
+--
+-- Consumes the null terminator.
+getCString :: Parser e B.ByteString
+getCString = Parser \fp eob s -> go' fp eob s
+  where
+    go' fp eob s0 = go 0# s0
+      where
+        go n# s = case eqAddr# eob s of
+          1# -> Fail#
+          _  ->
+            let s' = plusAddr# s 1#
+                w# = indexWord8OffAddr# s 0#
+            in  if   W8# w# == 0x00
+                then OK# (B.PS (ForeignPtr s0 fp) 0 (I# n#)) s'
+                else go (n# +# 1#) s'
+{-# inline getCString #-}
+
+-- | Read a null-terminated bytestring (a C-style string), where the bytestring
+--   is known to be null-terminated somewhere in the input.
+--
+-- Undefined behaviour if your bytestring isn't null-terminated somewhere.
+-- You almost certainly want 'getCString' instead.
+--
+-- Fails on GHC versions older than 9.0, since we make use of the
+-- 'cstringLength#' primop introduced in GHC 9.0, and we aren't very useful
+-- without it.
+--
+-- Consumes the null terminator.
+getCStringUnsafe :: Parser e B.ByteString
+{-# inline getCStringUnsafe #-}
+#if MIN_VERSION_base(4,15,0)
+getCStringUnsafe = Parser \fp eob s ->
+  case eqAddr# eob s of
+    1# -> Fail#
+    _  -> let n#  = cstringLength# s
+              s'# = plusAddr# s (n# +# 1#)
+           in OK# (B.PS (ForeignPtr s fp) 0 (I# n#)) s'#
+#else
+getCStringUnsafe = error "Flatparse.Basic.getCStringUnsafe: requires GHC 9.0 / base-4.15, not available on this compiler"
+#endif
diff --git a/src/FlatParse/Basic/Integers.hs b/src/FlatParse/Basic/Integers.hs
index f5d7516..6e52487 100644
--- a/src/FlatParse/Basic/Integers.hs
+++ b/src/FlatParse/Basic/Integers.hs
@@ -60,6 +60,8 @@ import GHC.Int
 import FlatParse.Basic.Parser
 import FlatParse.Common.Assorted ( word16ToInt16, word32ToInt32, word64ToInt64 )
 
+import Control.Applicative ( Alternative(empty) )
+
 --------------------------------------------------------------------------------
 
 -- | Helper for defining CPS parsers for types of a constant byte size (i.e.
@@ -289,7 +291,7 @@ getSizedOfUnsafe# size# indexOffAddr aExpected =
     go aParsed =
         if   aParsed == aExpected
         then pure ()
-        else failed
+        else empty
 {-# inline getSizedOfUnsafe# #-}
 
 -- | Read the next 1 byte and assert its value as a 'Word8'.
diff --git a/src/FlatParse/Basic/Internal.hs b/src/FlatParse/Basic/Internal.hs
index 02992b4..ec8691f 100644
--- a/src/FlatParse/Basic/Internal.hs
+++ b/src/FlatParse/Basic/Internal.hs
@@ -5,7 +5,6 @@ module FlatParse.Basic.Internal where
 import FlatParse.Basic.Parser
 
 import GHC.Exts
-import GHC.Word
 import GHC.ForeignPtr
 
 import qualified Data.ByteString as B
@@ -113,48 +112,3 @@ atSkipUnsafe# os# (Parser p) = Parser \fp eob s -> case os# <=# minusAddr# eob s
 skip# :: Int# -> Parser e ()
 skip# os# = atSkip# os# (pure ())
 {-# inline skip# #-}
-
---------------------------------------------------------------------------------
--- Low-level boxed combinators
-
--- | Read a null-terminated bytestring (a C-style string).
---
--- Consumes the null terminator.
-getCString :: Parser e B.ByteString
-getCString = Parser \fp eob s -> go' fp eob s
-  where
-    go' fp eob s0 = go 0# s0
-      where
-        go n# s = case eqAddr# eob s of
-          1# -> Fail#
-          _  ->
-            let s' = plusAddr# s 1#
-                w# = indexWord8OffAddr# s 0#
-            in  if   W8# w# == 0x00
-                then OK# (B.PS (ForeignPtr s0 fp) 0 (I# n#)) s'
-                else go (n# +# 1#) s'
-{-# inline getCString #-}
-
--- | Read a null-terminated bytestring (a C-style string), where the bytestring
---   is known to be null-terminated somewhere in the input.
---
--- Undefined behaviour if your bytestring isn't null-terminated somewhere.
--- You almost certainly want 'getCString' instead.
---
--- Fails on GHC versions older than 9.0, since we make use of the
--- 'cstringLength#' primop introduced in GHC 9.0, and we aren't very useful
--- without it.
---
--- Consumes the null terminator.
-getCStringUnsafe :: Parser e B.ByteString
-{-# inline getCStringUnsafe #-}
-#if MIN_VERSION_base(4,15,0)
-getCStringUnsafe = Parser \fp eob s ->
-  case eqAddr# eob s of
-    1# -> Fail#
-    _  -> let n#  = cstringLength# s
-              s'# = plusAddr# s (n# +# 1#)
-           in OK# (B.PS (ForeignPtr s fp) 0 (I# n#)) s'#
-#else
-getCStringUnsafe = error "Flatparse.Basic.getCStringUnsafe: requires GHC 9.0 / base-4.15, not available on this compiler"
-#endif
diff --git a/src/FlatParse/Basic/Parser.hs b/src/FlatParse/Basic/Parser.hs
index a3117cd..4c08aef 100644
--- a/src/FlatParse/Basic/Parser.hs
+++ b/src/FlatParse/Basic/Parser.hs
@@ -6,9 +6,6 @@ module FlatParse.Basic.Parser
     Parser(..)
   , Res#
   , pattern OK#, pattern Err#, pattern Fail#
-
-  -- * Primitive combinators
-  , failed
   ) where
 
 import GHC.Exts ( Addr#, unsafeCoerce# )
@@ -61,8 +58,9 @@ instance Monad (Parser e) where
   (>>) = (*>)
   {-# inline (>>) #-}
 
+-- | By default, parser choice `(<|>)` arbitrarily backtracks on parser failure.
 instance Base.Alternative (Parser e) where
-  empty = failed
+  empty = Parser \fp eob s -> Fail#
   {-# inline empty #-}
 
   (<|>) = (<|>)
@@ -81,12 +79,7 @@ instance Base.Alternative (Parser e) where
   some p = (:) <$> p <*> Base.many p
   {-# inline some #-}
 
--- | The failing parser. By default, parser choice `(<|>)` arbitrarily backtracks
---   on parser failure.
-failed :: Parser e a
-failed = Parser \fp eob s -> Fail#
-{-# inline failed #-}
-
+-- TODO don't get how to handle this, whether I can inline it safely
 infixr 6 <|>
 (<|>) :: Parser e a -> Parser e a -> Parser e a
 (<|>) (Parser f) (Parser g) = Parser \fp eob s ->
@@ -102,7 +95,7 @@ infixr 6 <|>
 #-}
 
 instance MonadPlus (Parser e) where
-  mzero = failed
+  mzero = Base.empty
   {-# inline mzero #-}
   mplus = (<|>)
   {-# inline mplus #-}
diff --git a/src/FlatParse/Basic/Position.hs b/src/FlatParse/Basic/Position.hs
index f566610..3b1c473 100644
--- a/src/FlatParse/Basic/Position.hs
+++ b/src/FlatParse/Basic/Position.hs
@@ -1,4 +1,7 @@
-module FlatParse.Basic.Position where
+module FlatParse.Basic.Position
+  ( module FlatParse.Common.Position
+  , module FlatParse.Basic.Position
+  ) where
 
 import FlatParse.Basic.Parser
 import FlatParse.Common.Position

From 9b9bc66053b896c4d40bdc7f635afd2307f8f6df Mon Sep 17 00:00:00 2001
From: Ben Orchard <thefirstmuffinman@gmail.com>
Date: Thu, 27 Oct 2022 20:18:58 +0100
Subject: [PATCH 07/11] more cleanup

---
 flatparse.cabal                              |   4 +-
 src/FlatParse/Basic.hs                       | 243 ++-----------------
 src/FlatParse/Basic/Bytes.hs                 |  54 +++++
 src/FlatParse/Basic/Internal.hs              |  21 +-
 src/FlatParse/Basic/{Chars.hs => Strings.hs} |  43 +++-
 src/FlatParse/BasicString.hs                 | 137 +++++++++++
 src/FlatParse/Examples/BasicLambda/Lexer.hs  |   5 +-
 test/Test.hs                                 |  13 +-
 8 files changed, 270 insertions(+), 250 deletions(-)
 create mode 100644 src/FlatParse/Basic/Bytes.hs
 rename src/FlatParse/Basic/{Chars.hs => Strings.hs} (81%)
 create mode 100644 src/FlatParse/BasicString.hs

diff --git a/flatparse.cabal b/flatparse.cabal
index 84d133f..11fd3ae 100644
--- a/flatparse.cabal
+++ b/flatparse.cabal
@@ -45,11 +45,13 @@ library
   exposed-modules:
       FlatParse.Basic
       FlatParse.Basic.Addr
-      FlatParse.Basic.Chars
+      FlatParse.Basic.Bytes
       FlatParse.Basic.Integers
       FlatParse.Basic.Internal
       FlatParse.Basic.Parser
       FlatParse.Basic.Position
+      FlatParse.Basic.Strings
+      FlatParse.BasicString
       FlatParse.Common.Assorted
       FlatParse.Common.Numbers
       FlatParse.Common.Position
diff --git a/src/FlatParse/Basic.hs b/src/FlatParse/Basic.hs
index d9bef66..1e1ff4d 100644
--- a/src/FlatParse/Basic.hs
+++ b/src/FlatParse/Basic.hs
@@ -16,13 +16,13 @@ module FlatParse.Basic (
   -- ** Executing parsers
   , Result(..)
   , runParser
-  , runParserS
 
   -- * Errors and failures
   , err
   , lookahead
   , fails
   , try
+  , Control.Applicative.optional
   , optional_
   , withOption
   , cut
@@ -40,6 +40,7 @@ module FlatParse.Basic (
   , some_
   , notFollowedBy
   , isolate
+  , isolateUnsafe#
 
   -- * Primitive parsers
   , eof
@@ -49,11 +50,14 @@ module FlatParse.Basic (
 
   -- ** Byte-wise
   , take
+  , take#
   , takeRest
   , skip
+  , atSkip#
   , getBytesOf
   , getByteStringOf
   , getCString
+  , getCStringUnsafe
 
   -- ** Machine integers
   , module FlatParse.Basic.Integers
@@ -81,49 +85,30 @@ module FlatParse.Basic (
   -- ** Positions and spans
   , module FlatParse.Basic.Position
 
-  -- ** Position and span conversions
-  , validPos
-  , posLineCols
-  , unsafeSpanToByteString
-  , mkPos
-  , FlatParse.Basic.lines
-
-  -- * Getting the rest of the input as a 'String'
-  , takeLine
-  , traceLine
-  , takeRestString
-  , traceRestString
-
-  -- * `String` conversions
-  , packUTF8
-  , unpackUTF8
-
-  -- * Internal functions
-  , ensureBytes#
-
-  -- ** Unboxed arguments
-  , take#
-  , atSkip#
-
-
   -- ** Location & address primitives
-  , setBack#
   , module FlatParse.Basic.Addr
 
-  -- ** Unsafe
-  , getCStringUnsafe
-
   ) where
 
 import Prelude hiding ( take, getChar )
 
+import qualified FlatParse.Common.Assorted as Common
+import FlatParse.Common.Position
+import FlatParse.Common.Trie
+
+import FlatParse.Basic.Parser
+import FlatParse.Basic.Integers
+import FlatParse.Basic.Internal
+import FlatParse.Basic.Bytes
+import FlatParse.Basic.Strings
+import FlatParse.Basic.Position
+import FlatParse.Basic.Addr
+
 import Control.Applicative
 
 import Control.Monad
 import Data.Foldable
-import Data.List (sortBy)
 import Data.Map (Map)
-import Data.Ord (comparing)
 import GHC.Exts
 import GHC.Word
 import GHC.ForeignPtr ( ForeignPtr(..) )
@@ -135,19 +120,6 @@ import qualified Data.ByteString.Unsafe as B
 import qualified Data.ByteString.Internal as B
 import qualified Data.Map.Strict as M
 
-import qualified FlatParse.Common.Numbers as Common
-import qualified FlatParse.Common.Assorted as Common
-import FlatParse.Common.Position
-import FlatParse.Common.Trie
-import FlatParse.Common.Assorted ( packBytes, splitBytes, strToBytes, packUTF8 )
-
-import FlatParse.Basic.Parser
-import FlatParse.Basic.Integers
-import FlatParse.Basic.Internal
-import FlatParse.Basic.Chars
-import FlatParse.Basic.Position
-import FlatParse.Basic.Addr
-
 -- | Higher-level boxed data type for parsing results.
 data Result e a =
     OK a !(B.ByteString)  -- ^ Contains return value and unconsumed input.
@@ -268,16 +240,6 @@ skip :: Int -> Parser e ()
 skip (I# os#) = atSkip# os# (pure ())
 {-# inline skip #-}
 
--- | Parse a UTF-8 character literal. This is a template function, you can use it as
---   @$(char \'x\')@, for example, and the splice in this case has type @Parser e ()@.
-getCharOf :: Char -> Q Exp
-getCharOf c = getStringOf [c]
-
--- | Parse a UTF-8 string literal. This is a template function, you can use it as @$(string "foo")@,
---   for example, and the splice has type @Parser e ()@.
-getStringOf :: String -> Q Exp
-getStringOf str = getBytesOf (strToBytes str)
-
 {-|
 This is a template function which makes it possible to branch on a collection of string literals in
 an efficient way. By using `switch`, such branching is compiled to a trie of primitive parsing
@@ -345,30 +307,6 @@ rawSwitchWithPost postAction cases fallback = do
   !fallback <- sequence fallback
   genTrie $! genSwitchTrie' postAction cases fallback
 
--- | Read a non-negative `Int` from the input, as a non-empty digit sequence.
--- The `Int` may overflow in the result.
-getAsciiDecimalInt :: Parser e Int
-getAsciiDecimalInt = Parser \fp eob s -> case Common.readInt eob s of
-  (# (##) | #)        -> Fail#
-  (# | (# n, s' #) #) -> OK# (I# n) s'
-{-# inline getAsciiDecimalInt #-}
-
--- | Read an `Int` from the input, as a non-empty case-insensitive ASCII
---   hexadecimal digit sequence. The `Int` may overflow in the result.
-getAsciiHexInt :: Parser e Int
-getAsciiHexInt = Parser \fp eob s -> case Common.readIntHex eob s of
-  (# (##) | #)        -> Fail#
-  (# | (# n, s' #) #) -> OK# (I# n) s'
-{-# inline getAsciiHexInt #-}
-
--- | Read a non-negative `Integer` from the input, as a non-empty digit
--- sequence.
-getAsciiDecimalInteger :: Parser e Integer
-getAsciiDecimalInteger = Parser \fp eob s -> case Common.readInteger fp eob s of
-  (# (##) | #)        -> Fail#
-  (# | (# i, s' #) #) -> OK# i s'
-{-# inline getAsciiDecimalInteger #-}
-
 --------------------------------------------------------------------------------
 
 -- | An analogue of the list `foldl` function: first parse a @b@, then parse zero or more @a@-s,
@@ -442,43 +380,6 @@ getByteStringOf (B.PS (ForeignPtr bs fcontent) _ (I# len)) =
        _  -> Fail#
 {-# inline getByteStringOf #-}
 
--- | Read a sequence of bytes. This is a template function, you can use it as
---   @$(getBytesOf [3, 4, 5])@, for example, and the splice has type @Parser e
---   ()@.
-getBytesOf :: [Word] -> Q Exp
-getBytesOf bytes = do
-  let !len = length bytes
-  [| ensureBytes# len >> $(scanBytes# bytes) |]
-
--- | Template function, creates a @Parser e ()@ which unsafely scans a given
---   sequence of bytes.
-scanBytes# :: [Word] -> Q Exp
-scanBytes# bytes = do
-  let !(leading, w8s) = splitBytes bytes
-      !scanw8s        = go w8s where
-                         go (w8:[] ) = [| getWord64OfUnsafe w8 |]
-                         go (w8:w8s) = [| getWord64OfUnsafe w8 >> $(go w8s) |]
-                         go []       = [| pure () |]
-  case w8s of
-    [] -> go leading
-          where
-            go (a:b:c:d:[]) = let !w = packBytes [a, b, c, d] in [| getWord32OfUnsafe w |]
-            go (a:b:c:d:ws) = let !w = packBytes [a, b, c, d] in [| getWord32OfUnsafe w >> $(go ws) |]
-            go (a:b:[])     = let !w = packBytes [a, b]       in [| getWord16OfUnsafe w |]
-            go (a:b:ws)     = let !w = packBytes [a, b]       in [| getWord16OfUnsafe w >> $(go ws) |]
-            go (a:[])       = [| getWord8OfUnsafe a |]
-            go []           = [| pure () |]
-    _  -> case leading of
-
-      []              -> scanw8s
-      [a]             -> [| getWord8OfUnsafe a >> $scanw8s |]
-      ws@[a, b]       -> let !w = packBytes ws in [| getWord16OfUnsafe w >> $scanw8s |]
-      ws@[a, b, c, d] -> let !w = packBytes ws in [| getWord32OfUnsafe w >> $scanw8s |]
-      ws              -> let !w = packBytes ws
-                             !l = length ws
-                         in [| scanPartial64# l w >> $scanw8s |]
-
-
 -- Switching code generation
 --------------------------------------------------------------------------------
 
@@ -503,7 +404,7 @@ genTrie (rules, t) = do
 
       fallback :: Rule -> Int ->  Q Exp
       fallback rule 0 = pure $ VarE $ fst $ ix branches rule
-      fallback rule n = [| setBack# n >> $(pure $ VarE $ fst $ ix branches rule) |]
+      fallback rule n = [| skipBack# n >> $(pure $ VarE $ fst $ ix branches rule) |]
 
   let go :: Trie' (Rule, Int, Maybe Int) -> Q Exp
       go = \case
@@ -602,114 +503,6 @@ isolateUnsafe# n# p = Parser \fp eob s ->
         _  -> Fail# -- you tried to isolate more than we have left
 {-# inline isolateUnsafe# #-}
 
---------------------------------------------------------------------------------
-
--- | Convert an UTF-8-coded `B.ByteString` to a `String`.
-unpackUTF8 :: B.ByteString -> String
-unpackUTF8 str = case runParser takeRestString str of
-  OK a _ -> a
-  _      -> error "unpackUTF8: invalid encoding"
-
--- | Take the rest of the input as a `String`. Assumes UTF-8 encoding.
-takeRestString :: Parser e String
-takeRestString = branch eof (pure "") do
-  c <- getChar
-  cs <- takeRestString
-  pure (c:cs)
-
--- | Get the rest of the input as a `String`, but restore the parsing state. Assumes UTF-8 encoding.
---   This can be used for debugging.
-traceRestString :: Parser e String
-traceRestString = lookahead takeRestString
-
---------------------------------------------------------------------------------
-
--- | Parse the rest of the current line as a `String`. Assumes UTF-8 encoding,
---   throws an error if the encoding is invalid.
-takeLine :: Parser e String
-takeLine = branch eof (pure "") do
-  c <- getChar
-  case c of
-    '\n' -> pure ""
-    _    -> (c:) <$> takeLine
-
--- | Parse the rest of the current line as a `String`, but restore the parsing state.
---   Assumes UTF-8 encoding. This can be used for debugging.
-traceLine :: Parser e String
-traceLine = lookahead takeLine
-
--- | Run a parser on a `String` input. Reminder: @OverloadedStrings@ for `B.ByteString` does not
---   yield a valid UTF-8 encoding! For non-ASCII `B.ByteString` literal input, use `runParserS` or
---   `packUTF8` for testing.
-runParserS :: Parser e a -> String -> Result e a
-runParserS pa s = runParser pa (packUTF8 s)
-
--- | Create a `Pos` from a line and column number. Throws an error on out-of-bounds
---   line and column numbers.
-mkPos :: B.ByteString -> (Int, Int) -> Pos
-mkPos str (line', col') =
-  let go line col | line == line' && col == col' = getPos
-      go line col = (do
-        c <- getChar
-        if c == '\n' then go (line + 1) 0
-                     else go line (col + 1)) <|> error "mkPos: invalid position"
-  in case runParser (go 0 0) str of
-    OK res _ -> res
-    _        -> error "impossible"
-
--- | Break an UTF-8-coded `B.ByteString` to lines. Throws an error on invalid
---   input. This is mostly useful for grabbing specific source lines for
---   displaying error messages.
-lines :: B.ByteString -> [String]
-lines str =
-  let go = ([] <$ eof) <|> ((:) <$> takeLine <*> go)
-  in case runParser go str of
-    OK ls _ -> ls
-    _       -> error "linesUTF8: invalid input"
-
-
--- | Check whether a `Pos` points into a `B.ByteString`.
-validPos :: B.ByteString -> Pos -> Bool
-validPos str pos =
-  let go = do
-        start <- getPos
-        pure (start <= pos && pos <= endPos)
-  in case runParser go str of
-    OK b _ -> b
-    _      -> error "impossible"
-{-# inline validPos #-}
-
--- | Compute corresponding line and column numbers for each `Pos` in a list. Throw an error
---   on invalid positions. Note: computing lines and columns may traverse the `B.ByteString`,
---   but it traverses it only once regardless of the length of the position list.
-posLineCols :: B.ByteString -> [Pos] -> [(Int, Int)]
-posLineCols str poss =
-  let go !line !col [] = pure []
-      go line col ((i, pos):poss) = do
-        p <- getPos
-        if pos == p then
-          ((i, (line, col)):) <$> go line col poss
-        else do
-          c <- getChar
-          if '\n' == c then
-            go (line + 1) 0 ((i, pos):poss)
-          else
-            go line (col + 1) ((i, pos):poss)
-
-      sorted :: [(Int, Pos)]
-      sorted = sortBy (comparing snd) (zip [0..] poss)
-
-  in case runParser (go 0 0 sorted) str of
-       OK res _ -> snd <$> sortBy (comparing fst) res
-       _        -> error "invalid position"
-
--- | Create a `B.ByteString` from a `Span`. The result is invalid if the `Span` points
---   outside the current buffer, or if the `Span` start is greater than the end position.
-unsafeSpanToByteString :: Span -> Parser e B.ByteString
-unsafeSpanToByteString (Span l r) =
-  lookahead (setPos l >> byteStringOf (setPos r))
-{-# inline unsafeSpanToByteString #-}
-
 --------------------------------------------------------------------------------
 -- Low-level boxed combinators
 
diff --git a/src/FlatParse/Basic/Bytes.hs b/src/FlatParse/Basic/Bytes.hs
new file mode 100644
index 0000000..91eafc4
--- /dev/null
+++ b/src/FlatParse/Basic/Bytes.hs
@@ -0,0 +1,54 @@
+module FlatParse.Basic.Bytes where
+
+import Language.Haskell.TH
+import FlatParse.Basic.Parser
+import qualified FlatParse.Common.Assorted as Common
+import FlatParse.Basic.Integers ( getWord64OfUnsafe
+                                , getWord32OfUnsafe
+                                , getWord16OfUnsafe
+                                , getWord8OfUnsafe )
+import GHC.Exts
+
+-- | Check that the input has at least the given number of bytes.
+ensureBytes# :: Int -> Parser e ()
+ensureBytes# (I# len) = Parser \fp eob s ->
+  case len  <=# minusAddr# eob s of
+    1# -> OK# () s
+    _  -> Fail#
+{-# inline ensureBytes# #-}
+
+-- | Read a sequence of bytes. This is a template function, you can use it as
+--   @$(getBytesOf [3, 4, 5])@, for example, and the splice has type @Parser e
+--   ()@.
+getBytesOf :: [Word] -> Q Exp
+getBytesOf bytes = do
+  let !len = length bytes
+  [| ensureBytes# len >> $(scanBytes# bytes) |]
+
+-- | Template function, creates a @Parser e ()@ which unsafely scans a given
+--   sequence of bytes.
+scanBytes# :: [Word] -> Q Exp
+scanBytes# bytes = do
+  let !(leading, w8s) = Common.splitBytes bytes
+      !scanw8s        = go w8s where
+                         go (w8:[] ) = [| getWord64OfUnsafe w8 |]
+                         go (w8:w8s) = [| getWord64OfUnsafe w8 >> $(go w8s) |]
+                         go []       = [| pure () |]
+  case w8s of
+    [] -> go leading
+          where
+            go (a:b:c:d:[]) = let !w = Common.packBytes [a, b, c, d] in [| getWord32OfUnsafe w |]
+            go (a:b:c:d:ws) = let !w = Common.packBytes [a, b, c, d] in [| getWord32OfUnsafe w >> $(go ws) |]
+            go (a:b:[])     = let !w = Common.packBytes [a, b]       in [| getWord16OfUnsafe w |]
+            go (a:b:ws)     = let !w = Common.packBytes [a, b]       in [| getWord16OfUnsafe w >> $(go ws) |]
+            go (a:[])       = [| getWord8OfUnsafe a |]
+            go []           = [| pure () |]
+    _  -> case leading of
+
+      []              -> scanw8s
+      [a]             -> [| getWord8OfUnsafe a >> $scanw8s |]
+      ws@[a, b]       -> let !w = Common.packBytes ws in [| getWord16OfUnsafe w >> $scanw8s |]
+      ws@[a, b, c, d] -> let !w = Common.packBytes ws in [| getWord32OfUnsafe w >> $scanw8s |]
+      ws              -> let !w = Common.packBytes ws
+                             !l = length ws
+                         in [| scanPartial64# l w >> $scanw8s |]
diff --git a/src/FlatParse/Basic/Internal.hs b/src/FlatParse/Basic/Internal.hs
index ec8691f..7f3c4f5 100644
--- a/src/FlatParse/Basic/Internal.hs
+++ b/src/FlatParse/Basic/Internal.hs
@@ -22,14 +22,6 @@ lookahead (Parser f) = Parser \fp eob s ->
 
 --------------------------------------------------------------------------------
 
--- | Check that the input has at least the given number of bytes.
-ensureBytes# :: Int -> Parser e ()
-ensureBytes# (I# len) = Parser \fp eob s ->
-  case len  <=# minusAddr# eob s of
-    1# -> OK# () s
-    _  -> Fail#
-{-# inline ensureBytes# #-}
-
 scanPartial64# :: Int -> Word -> Parser e ()
 scanPartial64# (I# len) (W# w) = Parser \fp eob s ->
   case indexWordOffAddr# s 0# of
@@ -41,12 +33,6 @@ scanPartial64# (I# len) (W# w) = Parser \fp eob s ->
             _  -> Fail#
 {-# inline scanPartial64# #-}
 
--- | Decrease the current input position by the given number of bytes.
-setBack# :: Int -> Parser e ()
-setBack# (I# i) = Parser \fp eob s ->
-  OK# () (plusAddr# s (negateInt# i))
-{-# inline setBack# #-}
-
 --------------------------------------------------------------------------------
 -- Helpers for common internal operations
 
@@ -112,3 +98,10 @@ atSkipUnsafe# os# (Parser p) = Parser \fp eob s -> case os# <=# minusAddr# eob s
 skip# :: Int# -> Parser e ()
 skip# os# = atSkip# os# (pure ())
 {-# inline skip# #-}
+
+-- | Go back @n@ bytes.
+--
+-- Highly unsafe. Makes no checks.
+skipBack# :: Int -> Parser e ()
+skipBack# (I# i) = Parser \fp eob s -> OK# () (plusAddr# s (negateInt# i))
+{-# inline skipBack# #-}
diff --git a/src/FlatParse/Basic/Chars.hs b/src/FlatParse/Basic/Strings.hs
similarity index 81%
rename from src/FlatParse/Basic/Chars.hs
rename to src/FlatParse/Basic/Strings.hs
index de44048..2a3bdf0 100644
--- a/src/FlatParse/Basic/Chars.hs
+++ b/src/FlatParse/Basic/Strings.hs
@@ -1,13 +1,20 @@
-module FlatParse.Basic.Chars where
+{-# language UnboxedTuples #-}
+
+module FlatParse.Basic.Strings where
 
 import Prelude hiding ( getChar )
 
 import FlatParse.Basic.Parser
+import FlatParse.Basic.Bytes ( getBytesOf )
 
 import FlatParse.Common.Assorted ( derefChar8# )
 
 import GHC.Exts
 
+import Language.Haskell.TH
+import qualified FlatParse.Common.Numbers as Common
+import qualified FlatParse.Common.Assorted as Common
+
 -- | Parse a UTF-8 `Char` for which a predicate holds.
 satisfy :: (Char -> Bool) -> Parser e Char
 satisfy f = Parser \fp eob s -> case runParser# getChar fp eob s of
@@ -169,3 +176,37 @@ getCharASCII = Parser \fp eob buf -> case eqAddr# eob buf of
 getCharASCII_ :: Parser e ()
 getCharASCII_ = () <$ getCharASCII
 {-# inline getCharASCII_ #-}
+
+-- | Parse a UTF-8 character literal. This is a template function, you can use it as
+--   @$(char \'x\')@, for example, and the splice in this case has type @Parser e ()@.
+getCharOf :: Char -> Q Exp
+getCharOf c = getStringOf [c]
+
+-- | Parse a UTF-8 string literal. This is a template function, you can use it as @$(string "foo")@,
+--   for example, and the splice has type @Parser e ()@.
+getStringOf :: String -> Q Exp
+getStringOf str = getBytesOf (Common.strToBytes str)
+
+-- | Read a non-negative `Int` from the input, as a non-empty digit sequence.
+-- The `Int` may overflow in the result.
+getAsciiDecimalInt :: Parser e Int
+getAsciiDecimalInt = Parser \fp eob s -> case Common.readInt eob s of
+  (# (##) | #)        -> Fail#
+  (# | (# n, s' #) #) -> OK# (I# n) s'
+{-# inline getAsciiDecimalInt #-}
+
+-- | Read an `Int` from the input, as a non-empty case-insensitive ASCII
+--   hexadecimal digit sequence. The `Int` may overflow in the result.
+getAsciiHexInt :: Parser e Int
+getAsciiHexInt = Parser \fp eob s -> case Common.readIntHex eob s of
+  (# (##) | #)        -> Fail#
+  (# | (# n, s' #) #) -> OK# (I# n) s'
+{-# inline getAsciiHexInt #-}
+
+-- | Read a non-negative `Integer` from the input, as a non-empty digit
+-- sequence.
+getAsciiDecimalInteger :: Parser e Integer
+getAsciiDecimalInteger = Parser \fp eob s -> case Common.readInteger fp eob s of
+  (# (##) | #)        -> Fail#
+  (# | (# i, s' #) #) -> OK# i s'
+{-# inline getAsciiDecimalInteger #-}
diff --git a/src/FlatParse/BasicString.hs b/src/FlatParse/BasicString.hs
new file mode 100644
index 0000000..5595c50
--- /dev/null
+++ b/src/FlatParse/BasicString.hs
@@ -0,0 +1,137 @@
+module FlatParse.BasicString
+  (
+    runParserS
+
+  , takeLine
+  , traceLine
+  , takeRestString
+  , traceRestString
+
+  , packUTF8
+  , unpackUTF8
+
+  , validPos
+  , posLineCols
+  , unsafeSpanToByteString
+  , mkPos
+  , lines
+
+  ) where
+
+import Prelude hiding ( getChar, lines )
+
+import FlatParse.Basic
+
+import Data.Ord ( comparing )
+import Data.List ( sortBy )
+import FlatParse.Common.Assorted ( packUTF8 )
+
+import qualified Data.ByteString as B
+
+-- | Run a parser on a `String` input. Reminder: @OverloadedStrings@ for `B.ByteString` does not
+--   yield a valid UTF-8 encoding! For non-ASCII `B.ByteString` literal input, use `runParserS` or
+--   `packUTF8` for testing.
+runParserS :: Parser e a -> String -> Result e a
+runParserS pa s = runParser pa (packUTF8 s)
+
+-- | Create a `Pos` from a line and column number. Throws an error on out-of-bounds
+--   line and column numbers.
+mkPos :: B.ByteString -> (Int, Int) -> Pos
+mkPos str (line', col') =
+  let go line col | line == line' && col == col' = getPos
+      go line col = (do
+        c <- getChar
+        if c == '\n' then go (line + 1) 0
+                     else go line (col + 1)) <|> error "mkPos: invalid position"
+  in case runParser (go 0 0) str of
+    OK res _ -> res
+    _        -> error "impossible"
+
+-- | Break an UTF-8-coded `B.ByteString` to lines. Throws an error on invalid
+--   input. This is mostly useful for grabbing specific source lines for
+--   displaying error messages.
+lines :: B.ByteString -> [String]
+lines str =
+  let go = ([] <$ eof) <|> ((:) <$> takeLine <*> go)
+  in case runParser go str of
+    OK ls _ -> ls
+    _       -> error "linesUTF8: invalid input"
+
+
+-- | Check whether a `Pos` points into a `B.ByteString`.
+validPos :: B.ByteString -> Pos -> Bool
+validPos str pos =
+  let go = do
+        start <- getPos
+        pure (start <= pos && pos <= endPos)
+  in case runParser go str of
+    OK b _ -> b
+    _      -> error "impossible"
+{-# inline validPos #-}
+
+-- | Compute corresponding line and column numbers for each `Pos` in a list. Throw an error
+--   on invalid positions. Note: computing lines and columns may traverse the `B.ByteString`,
+--   but it traverses it only once regardless of the length of the position list.
+posLineCols :: B.ByteString -> [Pos] -> [(Int, Int)]
+posLineCols str poss =
+  let go !line !col [] = pure []
+      go line col ((i, pos):poss) = do
+        p <- getPos
+        if pos == p then
+          ((i, (line, col)):) <$> go line col poss
+        else do
+          c <- getChar
+          if '\n' == c then
+            go (line + 1) 0 ((i, pos):poss)
+          else
+            go line (col + 1) ((i, pos):poss)
+
+      sorted :: [(Int, Pos)]
+      sorted = sortBy (comparing snd) (zip [0..] poss)
+
+  in case runParser (go 0 0 sorted) str of
+       OK res _ -> snd <$> sortBy (comparing fst) res
+       _        -> error "invalid position"
+
+-- | Create a `B.ByteString` from a `Span`. The result is invalid if the `Span` points
+--   outside the current buffer, or if the `Span` start is greater than the end position.
+unsafeSpanToByteString :: Span -> Parser e B.ByteString
+unsafeSpanToByteString (Span l r) =
+  lookahead (setPos l >> byteStringOf (setPos r))
+{-# inline unsafeSpanToByteString #-}
+
+--------------------------------------------------------------------------------
+
+-- | Parse the rest of the current line as a `String`. Assumes UTF-8 encoding,
+--   throws an error if the encoding is invalid.
+takeLine :: Parser e String
+takeLine = branch eof (pure "") do
+  c <- getChar
+  case c of
+    '\n' -> pure ""
+    _    -> (c:) <$> takeLine
+
+-- | Parse the rest of the current line as a `String`, but restore the parsing state.
+--   Assumes UTF-8 encoding. This can be used for debugging.
+traceLine :: Parser e String
+traceLine = lookahead takeLine
+
+--------------------------------------------------------------------------------
+
+-- | Convert an UTF-8-coded `B.ByteString` to a `String`.
+unpackUTF8 :: B.ByteString -> String
+unpackUTF8 str = case runParser takeRestString str of
+  OK a _ -> a
+  _      -> error "unpackUTF8: invalid encoding"
+
+-- | Take the rest of the input as a `String`. Assumes UTF-8 encoding.
+takeRestString :: Parser e String
+takeRestString = branch eof (pure "") do
+  c <- getChar
+  cs <- takeRestString
+  pure (c:cs)
+
+-- | Get the rest of the input as a `String`, but restore the parsing state. Assumes UTF-8 encoding.
+--   This can be used for debugging.
+traceRestString :: Parser e String
+traceRestString = lookahead takeRestString
diff --git a/src/FlatParse/Examples/BasicLambda/Lexer.hs b/src/FlatParse/Examples/BasicLambda/Lexer.hs
index 7a3935a..75d58cf 100644
--- a/src/FlatParse/Examples/BasicLambda/Lexer.hs
+++ b/src/FlatParse/Examples/BasicLambda/Lexer.hs
@@ -11,6 +11,7 @@ module FlatParse.Examples.BasicLambda.Lexer where
 import FlatParse.Basic hiding ( Parser, runParser )
 
 import qualified FlatParse.Basic as FP
+import qualified FlatParse.BasicString as FP
 import qualified Data.ByteString as B
 import Language.Haskell.TH
 
@@ -65,7 +66,7 @@ prettyError b e =
       pos      = case e of Imprecise pos e -> pos
                            Precise pos e   -> pos
       ls       = FP.lines b
-      (l, c)   = head $ posLineCols b [pos]
+      (l, c)   = head $ FP.posLineCols b [pos]
       line     = if l < length ls then ls !! l else ""
       linum    = show l
       lpad     = map (const ' ') linum
@@ -108,7 +109,7 @@ runParser = FP.runParser
 
 -- | Run parser, print pretty error on failure.
 testParser :: Show a => Parser a -> String -> IO ()
-testParser p str = case packUTF8 str of
+testParser p str = case FP.packUTF8 str of
   b -> case runParser p b of
     Err e  -> putStrLn $ prettyError b e
     OK a _ -> print a
diff --git a/test/Test.hs b/test/Test.hs
index da4641d..a88bf33 100644
--- a/test/Test.hs
+++ b/test/Test.hs
@@ -8,6 +8,7 @@ import Data.ByteString (ByteString)
 import qualified Data.ByteString as B
 import qualified Data.Char
 import qualified FlatParse.Basic as FB
+import qualified FlatParse.BasicString as FB
 -- import qualified FlatParse.Stateful as FS
 import Test.HUnit
 import Test.Hspec
@@ -19,8 +20,6 @@ import GHC.Int
 import Data.Bits
 import Test.QuickCheck.Instances.ByteString()
 
-import Control.Applicative
-
 main :: IO ()
 main = hspec $ do
   basicSpec
@@ -55,9 +54,9 @@ basicSpec = describe "FlatParse.Basic" $ do
       it "propagates errors" $ FB.optional (FB.err "nope") `shouldParseErr` ""
 
     describe "optional_" $ do
-      it "can succeed" $ FB.optional (pure ()) `shouldParse` ""
-      it "can succeed when argument missing" $ FB.optional FB.empty `shouldParse` ""
-      it "propagates errors" $ FB.optional (FB.err "nope") `shouldParseErr` ""
+      it "can succeed" $ FB.optional_ (pure ()) `shouldParse` ""
+      it "can succeed when argument missing" $ FB.optional_ FB.empty `shouldParse` ""
+      it "propagates errors" $ FB.optional_ (FB.err "nope") `shouldParseErr` ""
 
     describe "withOption" $ do
       let opt p = FB.withOption p (pure . reverse) (pure "bar")
@@ -491,11 +490,11 @@ basicSpec = describe "FlatParse.Basic" $ do
 
     describe "(<|>)" $ do
       it "chooses first option on success" $
-        (("A" <$ $(FB.getStringOf "foo")) <|> ("B" <$ $(FB.getStringOf "foo")))
+        (("A" <$ $(FB.getStringOf "foo")) FB.<|> ("B" <$ $(FB.getStringOf "foo")))
           `shouldParseWith` ("foo", "A")
 
       it "chooses second option when first fails" $
-        (("A" <$ $(FB.getStringOf "bar")) <|> ("B" <$ $(FB.getStringOf "foo")))
+        (("A" <$ $(FB.getStringOf "bar")) FB.<|> ("B" <$ $(FB.getStringOf "foo")))
           `shouldParseWith` ("foo", "B")
 
     describe "branch" $ do

From f6e8091a2e7a7183d17bcca15a434926640bf7a0 Mon Sep 17 00:00:00 2001
From: Ben Orchard <thefirstmuffinman@gmail.com>
Date: Thu, 27 Oct 2022 21:03:47 +0100
Subject: [PATCH 08/11] more fiddling

---
 TODO.md                            |   1 -
 flatparse.cabal                    |   3 +-
 src/FlatParse/Basic.hs             | 416 +----------------------------
 src/FlatParse/Basic/Addr.hs        |   2 +-
 src/FlatParse/Basic/Bytes.hs       |  11 +
 src/FlatParse/Basic/Combinators.hs | 255 ++++++++++++++++++
 src/FlatParse/Basic/Internal.hs    | 107 --------
 src/FlatParse/Basic/Other.hs       | 251 +++++++++++++++++
 8 files changed, 526 insertions(+), 520 deletions(-)
 delete mode 100644 TODO.md
 create mode 100644 src/FlatParse/Basic/Combinators.hs
 delete mode 100644 src/FlatParse/Basic/Internal.hs
 create mode 100644 src/FlatParse/Basic/Other.hs

diff --git a/TODO.md b/TODO.md
deleted file mode 100644
index 4501b18..0000000
--- a/TODO.md
+++ /dev/null
@@ -1 +0,0 @@
-  * add unsafeTakeBs# which doesn't assert >=0. can cause hangs so bad
diff --git a/flatparse.cabal b/flatparse.cabal
index 11fd3ae..06147b5 100644
--- a/flatparse.cabal
+++ b/flatparse.cabal
@@ -46,8 +46,9 @@ library
       FlatParse.Basic
       FlatParse.Basic.Addr
       FlatParse.Basic.Bytes
+      FlatParse.Basic.Combinators
       FlatParse.Basic.Integers
-      FlatParse.Basic.Internal
+      FlatParse.Basic.Other
       FlatParse.Basic.Parser
       FlatParse.Basic.Position
       FlatParse.Basic.Strings
diff --git a/src/FlatParse/Basic.hs b/src/FlatParse/Basic.hs
index 1e1ff4d..82b316f 100644
--- a/src/FlatParse/Basic.hs
+++ b/src/FlatParse/Basic.hs
@@ -1,11 +1,8 @@
 {-# language UnboxedTuples #-}
 
 {-|
-This module implements a `Parser` supporting custom error types.  If you need efficient indentation
-parsing, use "FlatParse.Stateful" instead.
-
-Many internals are exposed for hacking on and extending. These are generally
-denoted by a @#@ hash suffix.
+This module implements a `Parser` supporting custom error types.  If you need
+efficient indentation parsing, use "FlatParse.Stateful" instead.
 -}
 
 module FlatParse.Basic (
@@ -54,6 +51,7 @@ module FlatParse.Basic (
   , takeRest
   , skip
   , atSkip#
+  , skipBack#
   , getBytesOf
   , getByteStringOf
   , getCString
@@ -94,11 +92,11 @@ import Prelude hiding ( take, getChar )
 
 import qualified FlatParse.Common.Assorted as Common
 import FlatParse.Common.Position
-import FlatParse.Common.Trie
 
 import FlatParse.Basic.Parser
 import FlatParse.Basic.Integers
-import FlatParse.Basic.Internal
+import FlatParse.Basic.Combinators
+import FlatParse.Basic.Other
 import FlatParse.Basic.Bytes
 import FlatParse.Basic.Strings
 import FlatParse.Basic.Position
@@ -106,19 +104,13 @@ import FlatParse.Basic.Addr
 
 import Control.Applicative
 
-import Control.Monad
-import Data.Foldable
-import Data.Map (Map)
 import GHC.Exts
-import GHC.Word
-import GHC.ForeignPtr ( ForeignPtr(..) )
-import Language.Haskell.TH
 import System.IO.Unsafe
 
 import qualified Data.ByteString as B
 import qualified Data.ByteString.Unsafe as B
 import qualified Data.ByteString.Internal as B
-import qualified Data.Map.Strict as M
+import GHC.ForeignPtr ( ForeignPtr(..) )
 
 -- | Higher-level boxed data type for parsing results.
 data Result e a =
@@ -135,7 +127,6 @@ instance Functor (Result e) where
   (<$) _ r        = unsafeCoerce# r
   {-# inline (<$) #-}
 
-
 --------------------------------------------------------------------------------
 
 -- | Run a parser.
@@ -152,398 +143,3 @@ runParser (Parser f) b@(B.PS (ForeignPtr _ fp) _ (I# len)) = unsafeDupablePerfor
       Fail# ->
         pure Fail
 {-# inlinable runParser #-}
-
-
---------------------------------------------------------------------------------
-
--- | Throw a parsing error. By default, parser choice `(<|>)` can't backtrack
---   on parser error. Use `try` to convert an error to a recoverable failure.
-err :: e -> Parser e a
-err e = Parser \fp eob s -> Err# e
-{-# inline err #-}
-
--- | Convert a parsing failure to a success.
-fails :: Parser e a -> Parser e ()
-fails (Parser f) = Parser \fp eob s ->
-  case f fp eob s of
-    OK# _ _ -> Fail#
-    Fail#   -> OK# () s
-    Err# e  -> Err# e
-{-# inline fails #-}
-
--- | Convert a parsing error into failure.
-try :: Parser e a -> Parser e a
-try (Parser f) = Parser \fp eob s -> case f fp eob s of
-  Err# _ -> Fail#
-  x      -> x
-{-# inline try #-}
-
--- | Convert a parsing failure to a `()`.
-optional_ :: Parser e a -> Parser e ()
-optional_ p = (() <$ p) <|> pure ()
-{-# inline optional_ #-}
-
--- | CPS'd version of `optional`. This is usually more efficient, since it gets
---   rid of the extra `Maybe` allocation.
-withOption :: Parser e a -> (a -> Parser e r) -> Parser e r -> Parser e r
-withOption (Parser f) just (Parser nothing) = Parser \fp eob s -> case f fp eob s of
-  OK# a s -> runParser# (just a) fp eob s
-  Fail#   -> nothing fp eob s
-  Err# e  -> Err# e
-{-# inline withOption #-}
-
--- | Convert a parsing failure to an error.
-cut :: Parser e a -> e -> Parser e a
-cut (Parser f) e = Parser \fp eob s -> case f fp eob s of
-  Fail# -> Err# e
-  x     -> x
-{-# inline cut #-}
-
--- | Run the parser, if we get a failure, throw the given error, but if we get an error, merge the
---   inner and the newly given errors using the @e -> e -> e@ function. This can be useful for
---   implementing parsing errors which may propagate hints or accummulate contextual information.
-cutting :: Parser e a -> e -> (e -> e -> e) -> Parser e a
-cutting (Parser f) e merge = Parser \fp eob s -> case f fp eob s of
-  Fail#   -> Err# e
-  Err# e' -> let !e'' = merge e' e in Err# e''
-  x       -> x
-{-# inline cutting #-}
-
---------------------------------------------------------------------------------
-
-
--- | Succeed if the input is empty.
-eof :: Parser e ()
-eof = Parser \fp eob s -> case eqAddr# eob s of
-  1# -> OK# () s
-  _  -> Fail#
-{-# inline eof #-}
-
--- | Read the given number of bytes as a 'ByteString'.
---
--- Throws a runtime error if given a negative integer.
-take :: Int -> Parser e B.ByteString
-take (I# n#) = take# n#
-{-# inline take #-}
-
--- | Consume the rest of the input. May return the empty bytestring.
-takeRest :: Parser e B.ByteString
-takeRest = Parser \fp eob s ->
-  let n# = minusAddr# eob s
-  in  OK# (B.PS (ForeignPtr s fp) 0 (I# n#)) eob
-{-# inline takeRest #-}
-
--- | Skip forward @n@ bytes. Fails if fewer than @n@ bytes are available.
---
--- Throws a runtime error if given a negative integer.
-skip :: Int -> Parser e ()
-skip (I# os#) = atSkip# os# (pure ())
-{-# inline skip #-}
-
-{-|
-This is a template function which makes it possible to branch on a collection of string literals in
-an efficient way. By using `switch`, such branching is compiled to a trie of primitive parsing
-operations, which has optimized control flow, vectorized reads and grouped checking for needed input
-bytes.
-
-The syntax is slightly magical, it overloads the usual @case@ expression. An example:
-
-@
-    $(switch [| case _ of
-        "foo" -> pure True
-        "bar" -> pure False |])
-@
-
-The underscore is mandatory in @case _ of@. Each branch must be a string literal, but optionally
-we may have a default case, like in
-
-@
-    $(switch [| case _ of
-        "foo" -> pure 10
-        "bar" -> pure 20
-        _     -> pure 30 |])
-@
-
-All case right hand sides must be parsers with the same type. That type is also the type
-of the whole `switch` expression.
-
-A `switch` has longest match semantics, and the order of cases does not matter, except for
-the default case, which may only appear as the last case.
-
-If a `switch` does not have a default case, and no case matches the input, then it returns with
-failure, \without\ having consumed any input. A fallthrough to the default case also does not
-consume any input.
--}
-switch :: Q Exp -> Q Exp
-switch = switchWithPost Nothing
-
-{-|
-Switch expression with an optional first argument for performing a post-processing action after
-every successful branch matching, not including the default branch. For example, if we have
-@ws :: Parser e ()@ for a whitespace parser, we might want to consume whitespace after matching
-on any of the switch cases. For that case, we can define a "lexeme" version of `switch` as
-follows.
-
-@
-  switch' :: Q Exp -> Q Exp
-  switch' = switchWithPost (Just [| ws |])
-@
-
-Note that this @switch'@ function cannot be used in the same module it's defined in, because of the
-stage restriction of Template Haskell.
--}
-switchWithPost :: Maybe (Q Exp) -> Q Exp -> Q Exp
-switchWithPost postAction exp = do
-  !postAction <- sequence postAction
-  (!cases, !fallback) <- parseSwitch exp
-  genTrie $! genSwitchTrie' postAction cases fallback
-
--- | Version of `switchWithPost` without syntactic sugar. The second argument is the
---   list of cases, the third is the default case.
-rawSwitchWithPost :: Maybe (Q Exp) -> [(String, Q Exp)] -> Maybe (Q Exp) -> Q Exp
-rawSwitchWithPost postAction cases fallback = do
-  !postAction <- sequence postAction
-  !cases <- forM cases \(str, rhs) -> (str,) <$> rhs
-  !fallback <- sequence fallback
-  genTrie $! genSwitchTrie' postAction cases fallback
-
---------------------------------------------------------------------------------
-
--- | An analogue of the list `foldl` function: first parse a @b@, then parse zero or more @a@-s,
---   and combine the results in a left-nested way by the @b -> a -> b@ function. Note: this is not
---   the usual `chainl` function from the parsec libraries!
-chainl :: (b -> a -> b) -> Parser e b -> Parser e a -> Parser e b
-chainl f start elem = start >>= go where
-  go b = do {!a <- elem; go $! f b a} <|> pure b
-{-# inline chainl #-}
-
--- | An analogue of the list `foldr` function: parse zero or more @a@-s, terminated by a @b@, and
---   combine the results in a right-nested way using the @a -> b -> b@ function. Note: this is not
---   the usual `chainr` function from the parsec libraries!
-chainr :: (a -> b -> b) -> Parser e a -> Parser e b -> Parser e b
-chainr f (Parser elem) (Parser end) = Parser go where
-  go fp eob s = case elem fp eob s of
-    OK# a s -> case go fp eob s of
-      OK# b s -> let !b' = f a b in OK# b' s
-      x       -> x
-    Fail# -> end fp eob s
-    Err# e -> Err# e
-{-# inline chainr #-}
-
--- | Skip a parser zero or more times.
-many_ :: Parser e a -> Parser e ()
-many_ (Parser f) = Parser go where
-  go fp eob s = case f fp eob s of
-    OK# a s -> go fp eob s
-    Fail#   -> OK# () s
-    Err# e  -> Err# e
-{-# inline many_ #-}
-
--- | Skip a parser one or more times.
-some_ :: Parser e a -> Parser e ()
-some_ pa = pa >> many_ pa
-{-# inline some_ #-}
-
--- | Succeed if the first parser succeeds and the second one fails.
-notFollowedBy :: Parser e a -> Parser e b -> Parser e a
-notFollowedBy p1 p2 = p1 <* fails p2
-{-# inline notFollowedBy #-}
-
---------------------------------------------------------------------------------
-
--- | Parse a given `B.ByteString`. If the bytestring is statically known,
---   consider using 'bytes' instead.
-getByteStringOf :: B.ByteString -> Parser e ()
-getByteStringOf (B.PS (ForeignPtr bs fcontent) _ (I# len)) =
-
-  let go64 :: Addr# -> Addr# -> Addr# -> State# RealWorld -> (# Res# e (), State# RealWorld #)
-      go64 bs bsend s w =
-        let bs' = plusAddr# bs 8# in
-        case gtAddr# bs' bsend of
-          1# -> go8 bs bsend s w
-          _  -> if   W64# (indexWord64OffAddr# bs 0#) == W64# (indexWord64OffAddr# s 0#)
-                then go64 bs' bsend (plusAddr# s 8#) w
-                else (# Fail#, w #)
-
-      go8 :: Addr# -> Addr# -> Addr# -> State# RealWorld -> (# Res# e (), State# RealWorld #)
-      go8 bs bsend s w =
-        case ltAddr# bs bsend of
-          1# -> if   W8# (indexWord8OffAddr# bs 0#) == W8# (indexWord8OffAddr# s 0#)
-                then go8 (plusAddr# bs 1#) bsend (plusAddr# s 1#) w
-                else (# Fail#, w #)
-          _  -> (# OK# () s, w #)
-
-  in Parser \fp eob s -> case len <=# minusAddr# eob s of
-       1# -> runRW# \w -> case go64 bs (plusAddr# bs len) s w of
-               (# res, w #) -> case touch# fcontent w of
-                 w -> res
-       _  -> Fail#
-{-# inline getByteStringOf #-}
-
--- Switching code generation
---------------------------------------------------------------------------------
-
-#if MIN_VERSION_base(4,15,0)
-mkDoE = DoE Nothing
-{-# inline mkDoE #-}
-#else
-mkDoE = DoE
-{-# inline mkDoE #-}
-#endif
-
-genTrie :: (Map (Maybe Int) Exp, Trie' (Rule, Int, Maybe Int)) -> Q Exp
-genTrie (rules, t) = do
-  branches <- traverse (\e -> (,) <$> (newName "rule") <*> pure e) rules
-
-  let ix m k = case M.lookup k m of
-        Nothing -> error ("key not in map: " ++ show k)
-        Just a  -> a
-
-  let ensure :: Maybe Int -> Maybe (Q Exp)
-      ensure = fmap (\n -> [| ensureBytes# n |])
-
-      fallback :: Rule -> Int ->  Q Exp
-      fallback rule 0 = pure $ VarE $ fst $ ix branches rule
-      fallback rule n = [| skipBack# n >> $(pure $ VarE $ fst $ ix branches rule) |]
-
-  let go :: Trie' (Rule, Int, Maybe Int) -> Q Exp
-      go = \case
-        Branch' (r, n, alloc) ts
-          | M.null ts -> pure $ VarE $ fst $ branches M.! r
-          | otherwise -> do
-              !next         <- (traverse . traverse) go (M.toList ts)
-              !defaultCase  <- fallback r (n + 1)
-
-              let cases = mkDoE $
-                    [BindS (VarP (mkName "c")) (VarE 'getWord8Unsafe),
-                      NoBindS (CaseE (VarE (mkName "c"))
-                         (map (\(w, t) ->
-                                 Match (LitP (IntegerL (fromIntegral w)))
-                                       (NormalB t)
-                                       [])
-                              next
-                          ++ [Match WildP (NormalB defaultCase) []]))]
-
-              case ensure alloc of
-                Nothing    -> pure cases
-                Just alloc -> [| branch $alloc $(pure cases) $(fallback r n) |]
-
-        Path (r, n, alloc) ws t ->
-          case ensure alloc of
-            Nothing    -> [| branch $(scanBytes# ws) $(go t) $(fallback r n)|]
-            Just alloc -> [| branch ($alloc >> $(scanBytes# ws)) $(go t) $(fallback r n) |]
-
-  letE
-    (map (\(x, rhs) -> valD (varP x) (normalB (pure rhs)) []) (Data.Foldable.toList branches))
-    (go t)
-
-parseSwitch :: Q Exp -> Q ([(String, Exp)], Maybe Exp)
-parseSwitch exp = exp >>= \case
-  CaseE (UnboundVarE _) []    -> error "switch: empty clause list"
-  CaseE (UnboundVarE _) cases -> do
-    (!cases, !last) <- pure (init cases, last cases)
-    !cases <- forM cases \case
-      Match (LitP (StringL str)) (NormalB rhs) [] -> pure (str, rhs)
-      _ -> error "switch: expected a match clause on a string literal"
-    (!cases, !last) <- case last of
-      Match (LitP (StringL str)) (NormalB rhs) [] -> pure (cases ++ [(str, rhs)], Nothing)
-      Match WildP                (NormalB rhs) [] -> pure (cases, Just rhs)
-      _ -> error "switch: expected a match clause on a string literal or a wildcard"
-    pure (cases, last)
-  _ -> error "switch: expected a \"case _ of\" expression"
-
-genSwitchTrie' :: Maybe Exp -> [(String, Exp)] -> Maybe Exp
-              -> (Map (Maybe Int) Exp, Trie' (Rule, Int, Maybe Int))
-genSwitchTrie' postAction cases fallback =
-
-  let (!branches, !strings) = unzip do
-        (!i, (!str, !rhs)) <- zip [0..] cases
-        case postAction of
-          Nothing    -> pure ((Just i, rhs), (i, str))
-          Just !post -> pure ((Just i, (VarE '(>>)) `AppE` post `AppE` rhs), (i, str))
-
-      !m    = M.fromList ((Nothing, maybe (VarE 'empty) id fallback) : branches)
-      !trie = compileTrie strings
-  in (m , trie)
-
---------------------------------------------------------------------------------
-
--- | Branch on a parser: if the first argument succeeds, continue with the second, else with the third.
---   This can produce slightly more efficient code than `(<|>)`. Moreover, `ḃranch` does not
---   backtrack from the true/false cases.
-branch :: Parser e a -> Parser e b -> Parser e b -> Parser e b
-branch pa pt pf = Parser \fp eob s -> case runParser# pa fp eob s of
-  OK# _ s -> runParser# pt fp eob s
-  Fail#   -> runParser# pf fp eob s
-  Err# e  -> Err# e
-{-# inline branch #-}
-
--- | @isolate n p@ runs the parser @p@ isolated to the next @n@ bytes. All
---   isolated bytes must be consumed.
---
--- Throws a runtime error if given a negative integer.
-isolate :: Int -> Parser e a -> Parser e a
-isolate (I# n#) p = withPosInt# n# (\n'# -> isolateUnsafe# n'# p)
-{-# inline isolate #-}
-
--- | @isolate n p@ runs the parser @p@ isolated to the next @n@ bytes. All
---   isolated bytes must be consumed.
---
--- Undefined behaviour if given a negative integer.
-isolateUnsafe# :: Int# -> Parser e a -> Parser e a
-isolateUnsafe# n# p = Parser \fp eob s ->
-  let s' = plusAddr# s n#
-  in  case n# <=# minusAddr# eob s of
-        1# -> case runParser# p fp s' s of
-          OK# a s'' -> case eqAddr# s' s'' of
-            1# -> OK# a s''
-            _  -> Fail# -- isolated segment wasn't fully consumed
-          Fail#     -> Fail#
-          Err# e    -> Err# e
-        _  -> Fail# -- you tried to isolate more than we have left
-{-# inline isolateUnsafe# #-}
-
---------------------------------------------------------------------------------
--- Low-level boxed combinators
-
--- | Read a null-terminated bytestring (a C-style string).
---
--- Consumes the null terminator.
-getCString :: Parser e B.ByteString
-getCString = Parser \fp eob s -> go' fp eob s
-  where
-    go' fp eob s0 = go 0# s0
-      where
-        go n# s = case eqAddr# eob s of
-          1# -> Fail#
-          _  ->
-            let s' = plusAddr# s 1#
-                w# = indexWord8OffAddr# s 0#
-            in  if   W8# w# == 0x00
-                then OK# (B.PS (ForeignPtr s0 fp) 0 (I# n#)) s'
-                else go (n# +# 1#) s'
-{-# inline getCString #-}
-
--- | Read a null-terminated bytestring (a C-style string), where the bytestring
---   is known to be null-terminated somewhere in the input.
---
--- Undefined behaviour if your bytestring isn't null-terminated somewhere.
--- You almost certainly want 'getCString' instead.
---
--- Fails on GHC versions older than 9.0, since we make use of the
--- 'cstringLength#' primop introduced in GHC 9.0, and we aren't very useful
--- without it.
---
--- Consumes the null terminator.
-getCStringUnsafe :: Parser e B.ByteString
-{-# inline getCStringUnsafe #-}
-#if MIN_VERSION_base(4,15,0)
-getCStringUnsafe = Parser \fp eob s ->
-  case eqAddr# eob s of
-    1# -> Fail#
-    _  -> let n#  = cstringLength# s
-              s'# = plusAddr# s (n# +# 1#)
-           in OK# (B.PS (ForeignPtr s fp) 0 (I# n#)) s'#
-#else
-getCStringUnsafe = error "Flatparse.Basic.getCStringUnsafe: requires GHC 9.0 / base-4.15, not available on this compiler"
-#endif
diff --git a/src/FlatParse/Basic/Addr.hs b/src/FlatParse/Basic/Addr.hs
index a8e0138..0d76c55 100644
--- a/src/FlatParse/Basic/Addr.hs
+++ b/src/FlatParse/Basic/Addr.hs
@@ -20,7 +20,7 @@ functions.)
 module FlatParse.Basic.Addr where
 
 import FlatParse.Basic.Parser
-import FlatParse.Basic.Internal
+import FlatParse.Basic.Combinators ( take#, atSkip#, lookahead )
 
 import GHC.Exts
 
diff --git a/src/FlatParse/Basic/Bytes.hs b/src/FlatParse/Basic/Bytes.hs
index 91eafc4..7c47195 100644
--- a/src/FlatParse/Basic/Bytes.hs
+++ b/src/FlatParse/Basic/Bytes.hs
@@ -52,3 +52,14 @@ scanBytes# bytes = do
       ws              -> let !w = Common.packBytes ws
                              !l = length ws
                          in [| scanPartial64# l w >> $scanw8s |]
+
+scanPartial64# :: Int -> Word -> Parser e ()
+scanPartial64# (I# len) (W# w) = Parser \fp eob s ->
+  case indexWordOffAddr# s 0# of
+    w' -> case uncheckedIShiftL# (8# -# len) 3# of
+      sh -> case uncheckedShiftL# w' sh of
+        w' -> case uncheckedShiftRL# w' sh of
+          w' -> case eqWord# w w' of
+            1# -> OK# () (plusAddr# s len)
+            _  -> Fail#
+{-# inline scanPartial64# #-}
diff --git a/src/FlatParse/Basic/Combinators.hs b/src/FlatParse/Basic/Combinators.hs
new file mode 100644
index 0000000..16a4e5d
--- /dev/null
+++ b/src/FlatParse/Basic/Combinators.hs
@@ -0,0 +1,255 @@
+{-# language UnboxedTuples #-}
+
+module FlatParse.Basic.Combinators where
+
+import FlatParse.Basic.Parser
+import FlatParse.Basic.Integers
+import FlatParse.Basic.Bytes
+
+import FlatParse.Common.Trie
+
+import GHC.Exts
+import GHC.Word
+import Control.Applicative ( empty )
+import qualified Data.ByteString as B
+import qualified Data.ByteString.Internal as B
+import GHC.ForeignPtr ( ForeignPtr(..) )
+import qualified Data.Foldable
+import Control.Monad ( forM )
+import Control.Applicative ( (<|>) )
+
+import Language.Haskell.TH
+import qualified Data.Map.Strict as M
+import           Data.Map.Strict ( Map )
+
+-- | Throw a parsing error. By default, parser choice `(<|>)` can't backtrack
+--   on parser error. Use `try` to convert an error to a recoverable failure.
+err :: e -> Parser e a
+err e = Parser \fp eob s -> Err# e
+{-# inline err #-}
+
+-- | Convert a parsing failure to a success.
+fails :: Parser e a -> Parser e ()
+fails (Parser f) = Parser \fp eob s ->
+  case f fp eob s of
+    OK# _ _ -> Fail#
+    Fail#   -> OK# () s
+    Err# e  -> Err# e
+{-# inline fails #-}
+
+-- | Convert a parsing error into failure.
+try :: Parser e a -> Parser e a
+try (Parser f) = Parser \fp eob s -> case f fp eob s of
+  Err# _ -> Fail#
+  x      -> x
+{-# inline try #-}
+
+-- | Convert a parsing failure to a `()`.
+optional_ :: Parser e a -> Parser e ()
+optional_ p = (() <$ p) <|> pure ()
+{-# inline optional_ #-}
+
+-- | CPS'd version of `optional`. This is usually more efficient, since it gets
+--   rid of the extra `Maybe` allocation.
+withOption :: Parser e a -> (a -> Parser e r) -> Parser e r -> Parser e r
+withOption (Parser f) just (Parser nothing) = Parser \fp eob s -> case f fp eob s of
+  OK# a s -> runParser# (just a) fp eob s
+  Fail#   -> nothing fp eob s
+  Err# e  -> Err# e
+{-# inline withOption #-}
+
+-- | Convert a parsing failure to an error.
+cut :: Parser e a -> e -> Parser e a
+cut (Parser f) e = Parser \fp eob s -> case f fp eob s of
+  Fail# -> Err# e
+  x     -> x
+{-# inline cut #-}
+
+-- | Run the parser, if we get a failure, throw the given error, but if we get an error, merge the
+--   inner and the newly given errors using the @e -> e -> e@ function. This can be useful for
+--   implementing parsing errors which may propagate hints or accummulate contextual information.
+cutting :: Parser e a -> e -> (e -> e -> e) -> Parser e a
+cutting (Parser f) e merge = Parser \fp eob s -> case f fp eob s of
+  Fail#   -> Err# e
+  Err# e' -> let !e'' = merge e' e in Err# e''
+  x       -> x
+{-# inline cutting #-}
+
+--------------------------------------------------------------------------------
+
+-- | Save the parsing state, then run a parser, then restore the state.
+lookahead :: Parser e a -> Parser e a
+lookahead (Parser f) = Parser \fp eob s ->
+  case f fp eob s of
+    OK# a _ -> OK# a s
+    x       -> x
+{-# inline lookahead #-}
+
+-- | @isolate n p@ runs the parser @p@ isolated to the next @n@ bytes. All
+--   isolated bytes must be consumed.
+--
+-- Throws a runtime error if given a negative integer.
+isolate :: Int -> Parser e a -> Parser e a
+isolate (I# n#) p = withPosInt# n# (\n'# -> isolateUnsafe# n'# p)
+{-# inline isolate #-}
+
+-- | @isolate n p@ runs the parser @p@ isolated to the next @n@ bytes. All
+--   isolated bytes must be consumed.
+--
+-- Undefined behaviour if given a negative integer.
+isolateUnsafe# :: Int# -> Parser e a -> Parser e a
+isolateUnsafe# n# p = Parser \fp eob s ->
+  let s' = plusAddr# s n#
+  in  case n# <=# minusAddr# eob s of
+        1# -> case runParser# p fp s' s of
+          OK# a s'' -> case eqAddr# s' s'' of
+            1# -> OK# a s''
+            _  -> Fail# -- isolated segment wasn't fully consumed
+          Fail#     -> Fail#
+          Err# e    -> Err# e
+        _  -> Fail# -- you tried to isolate more than we have left
+{-# inline isolateUnsafe# #-}
+
+-- | An analogue of the list `foldl` function: first parse a @b@, then parse zero or more @a@-s,
+--   and combine the results in a left-nested way by the @b -> a -> b@ function. Note: this is not
+--   the usual `chainl` function from the parsec libraries!
+chainl :: (b -> a -> b) -> Parser e b -> Parser e a -> Parser e b
+chainl f start elem = start >>= go where
+  go b = do {!a <- elem; go $! f b a} <|> pure b
+{-# inline chainl #-}
+
+-- | An analogue of the list `foldr` function: parse zero or more @a@-s, terminated by a @b@, and
+--   combine the results in a right-nested way using the @a -> b -> b@ function. Note: this is not
+--   the usual `chainr` function from the parsec libraries!
+chainr :: (a -> b -> b) -> Parser e a -> Parser e b -> Parser e b
+chainr f (Parser elem) (Parser end) = Parser go where
+  go fp eob s = case elem fp eob s of
+    OK# a s -> case go fp eob s of
+      OK# b s -> let !b' = f a b in OK# b' s
+      x       -> x
+    Fail# -> end fp eob s
+    Err# e -> Err# e
+{-# inline chainr #-}
+
+-- | Branch on a parser: if the first argument succeeds, continue with the second, else with the third.
+--   This can produce slightly more efficient code than `(<|>)`. Moreover, `ḃranch` does not
+--   backtrack from the true/false cases.
+branch :: Parser e a -> Parser e b -> Parser e b -> Parser e b
+branch pa pt pf = Parser \fp eob s -> case runParser# pa fp eob s of
+  OK# _ s -> runParser# pt fp eob s
+  Fail#   -> runParser# pf fp eob s
+  Err# e  -> Err# e
+{-# inline branch #-}
+
+-- | Skip a parser zero or more times.
+many_ :: Parser e a -> Parser e ()
+many_ (Parser f) = Parser go where
+  go fp eob s = case f fp eob s of
+    OK# a s -> go fp eob s
+    Fail#   -> OK# () s
+    Err# e  -> Err# e
+{-# inline many_ #-}
+
+-- | Skip a parser one or more times.
+some_ :: Parser e a -> Parser e ()
+some_ pa = pa >> many_ pa
+{-# inline some_ #-}
+
+-- | Succeed if the first parser succeeds and the second one fails.
+notFollowedBy :: Parser e a -> Parser e b -> Parser e a
+notFollowedBy p1 p2 = p1 <* fails p2
+{-# inline notFollowedBy #-}
+
+--------------------------------------------------------------------------------
+
+-- | Succeed if the input is empty.
+eof :: Parser e ()
+eof = Parser \fp eob s -> case eqAddr# eob s of
+  1# -> OK# () s
+  _  -> Fail#
+{-# inline eof #-}
+
+-- | Read the given number of bytes as a 'ByteString'.
+--
+-- Throws a runtime error if given a negative integer.
+take :: Int -> Parser e B.ByteString
+take (I# n#) = take# n#
+{-# inline take #-}
+
+-- | Read @n@ bytes as a 'ByteString'. Fails if newer than @n@ bytes are
+--   available.
+--
+-- Throws a runtime error if given a negative integer.
+take# :: Int# -> Parser e B.ByteString
+take# n# = withPosInt# n# takeUnsafe#
+{-# inline take# #-}
+
+-- | Read @n@ bytes as a 'ByteString'. Fails if newer than @n@ bytes are
+--   available.
+--
+-- Undefined behaviour if given a negative integer.
+takeUnsafe# :: Int# -> Parser e B.ByteString
+takeUnsafe# n# = Parser \fp eob s -> case n# <=# minusAddr# eob s of
+  1# -> OK# (B.PS (ForeignPtr s fp) 0 (I# n#)) (plusAddr# s n#)
+  _  -> Fail#
+{-# inline takeUnsafe# #-}
+
+-- | Consume the rest of the input. May return the empty bytestring.
+takeRest :: Parser e B.ByteString
+takeRest = Parser \fp eob s ->
+  let n# = minusAddr# eob s
+  in  OK# (B.PS (ForeignPtr s fp) 0 (I# n#)) eob
+{-# inline takeRest #-}
+
+-- | Skip forward @n@ bytes. Fails if fewer than @n@ bytes are available.
+--
+-- Throws a runtime error if given a negative integer.
+skip :: Int -> Parser e ()
+skip (I# os#) = skip# os#
+{-# inline skip #-}
+
+-- | Skip forward @n@ bytes. Fails if fewer than @n@ bytes are available.
+--
+-- Throws a runtime error if given a negative integer.
+skip# :: Int# -> Parser e ()
+skip# os# = atSkip# os# (pure ())
+{-# inline skip# #-}
+
+-- | Skip forward @n@ bytes and run the given parser. Fails if fewer than @n@
+--   bytes are available.
+--
+-- Throws a runtime error if given a negative integer.
+atSkip# :: Int# -> Parser e a -> Parser e a
+atSkip# os# p = withPosInt# os# (\n# -> atSkipUnsafe# n# p)
+{-# inline atSkip# #-}
+
+-- | Skip forward @n@ bytes and run the given parser. Fails if fewer than @n@
+--   bytes are available.
+--
+-- Undefined behaviour if given a negative integer.
+atSkipUnsafe# :: Int# -> Parser e a -> Parser e a
+atSkipUnsafe# os# (Parser p) = Parser \fp eob s ->
+    case os# <=# minusAddr# eob s of
+      1# -> p fp eob (plusAddr# s os#)
+      _  -> Fail#
+{-# inline atSkipUnsafe# #-}
+
+-- | Go back @n@ bytes.
+--
+-- Highly unsafe. Makes no checks.
+skipBack# :: Int -> Parser e ()
+skipBack# (I# i) = Parser \fp eob s -> OK# () (plusAddr# s (negateInt# i))
+{-# inline skipBack# #-}
+
+--------------------------------------------------------------------------------
+-- Helpers for common internal operations
+
+-- | Assert for the given 'Int#' that @n >= 0@, and pass it on to the given
+--   function.
+--
+-- Throws a runtime error if given a negative integer.
+withPosInt# :: Int# -> (Int# -> a) -> a
+withPosInt# n# f = case n# >=# 0# of
+  1# -> f n#
+  _  -> error "FlatParse.Basic.Internal.withPosInt#: negative integer"
+{-# inline withPosInt# #-}
diff --git a/src/FlatParse/Basic/Internal.hs b/src/FlatParse/Basic/Internal.hs
deleted file mode 100644
index 7f3c4f5..0000000
--- a/src/FlatParse/Basic/Internal.hs
+++ /dev/null
@@ -1,107 +0,0 @@
-{-# language UnboxedTuples #-}
-
-module FlatParse.Basic.Internal where
-
-import FlatParse.Basic.Parser
-
-import GHC.Exts
-import GHC.ForeignPtr
-
-import qualified Data.ByteString as B
-import qualified Data.ByteString.Internal as B
-
---------------------------------------------------------------------------------
-
--- | Save the parsing state, then run a parser, then restore the state.
-lookahead :: Parser e a -> Parser e a
-lookahead (Parser f) = Parser \fp eob s ->
-  case f fp eob s of
-    OK# a _ -> OK# a s
-    x       -> x
-{-# inline lookahead #-}
-
---------------------------------------------------------------------------------
-
-scanPartial64# :: Int -> Word -> Parser e ()
-scanPartial64# (I# len) (W# w) = Parser \fp eob s ->
-  case indexWordOffAddr# s 0# of
-    w' -> case uncheckedIShiftL# (8# -# len) 3# of
-      sh -> case uncheckedShiftL# w' sh of
-        w' -> case uncheckedShiftRL# w' sh of
-          w' -> case eqWord# w w' of
-            1# -> OK# () (plusAddr# s len)
-            _  -> Fail#
-{-# inline scanPartial64# #-}
-
---------------------------------------------------------------------------------
--- Helpers for common internal operations
-
--- | Assert for the given 'Int#' that @n >= 0@, and pass it on to the given
---   function.
---
--- Throws a runtime error if given a negative integer.
-withPosInt# :: Int# -> (Int# -> a) -> a
-withPosInt# n# f = case n# >=# 0# of
-  1# -> f n#
-  _  -> error "FlatParse.Basic.Internal.withPosInt#: negative integer"
-{-# inline withPosInt# #-}
-
--- | Run the given parser only if we have not yet reached the end of the buffer.
-withNotEob :: Parser e a -> Parser e a
-withNotEob (Parser p) = Parser \fp eob s -> case eqAddr# eob s of
-  1# -> Fail#
-  _  -> p fp eob s
-{-# inline withNotEob #-}
-
---------------------------------------------------------------------------------
--- Low level unboxed combinators
-
--- | Read @n@ bytes as a 'ByteString'. Fails if newer than @n@ bytes are
---   available.
---
--- Throws a runtime error if given a negative integer.
-take# :: Int# -> Parser e B.ByteString
-take# n# = withPosInt# n# takeUnsafe#
-{-# inline take# #-}
-
--- | Read @n@ bytes as a 'ByteString'. Fails if newer than @n@ bytes are
---   available.
---
--- Undefined behaviour if given a negative integer.
-takeUnsafe# :: Int# -> Parser e B.ByteString
-takeUnsafe# n# = Parser \fp eob s -> case n# <=# minusAddr# eob s of
-  1# -> OK# (B.PS (ForeignPtr s fp) 0 (I# n#)) (plusAddr# s n#)
-  _  -> Fail#
-{-# inline takeUnsafe# #-}
-
--- | Skip forward @n@ bytes and run the given parser. Fails if fewer than @n@
---   bytes are available.
---
--- Throws a runtime error if given a negative integer.
-atSkip# :: Int# -> Parser e a -> Parser e a
-atSkip# os# p = withPosInt# os# (\n# -> atSkipUnsafe# n# p)
-{-# inline atSkip# #-}
-
--- | Skip forward @n@ bytes and run the given parser. Fails if fewer than @n@
---   bytes are available.
---
--- Undefined behaviour if given a negative integer.
-atSkipUnsafe# :: Int# -> Parser e a -> Parser e a
-atSkipUnsafe# os# (Parser p) = Parser \fp eob s -> case os# <=# minusAddr# eob s of
-  1# -> p fp eob (plusAddr# s os#)
-  _  -> Fail#
-{-# inline atSkipUnsafe# #-}
-
--- | Skip forward @n@ bytes. Fails if fewer than @n@ bytes are available.
---
--- Throws a runtime error if given a negative integer.
-skip# :: Int# -> Parser e ()
-skip# os# = atSkip# os# (pure ())
-{-# inline skip# #-}
-
--- | Go back @n@ bytes.
---
--- Highly unsafe. Makes no checks.
-skipBack# :: Int -> Parser e ()
-skipBack# (I# i) = Parser \fp eob s -> OK# () (plusAddr# s (negateInt# i))
-{-# inline skipBack# #-}
diff --git a/src/FlatParse/Basic/Other.hs b/src/FlatParse/Basic/Other.hs
new file mode 100644
index 0000000..4cb108c
--- /dev/null
+++ b/src/FlatParse/Basic/Other.hs
@@ -0,0 +1,251 @@
+{-# language UnboxedTuples #-}
+
+module FlatParse.Basic.Other where
+
+import FlatParse.Basic.Parser
+
+-- TODO TH!!
+import FlatParse.Basic.Combinators ( branch, skipBack# )
+import FlatParse.Basic.Bytes ( scanBytes#, ensureBytes# )
+import FlatParse.Basic.Integers ( getWord8Unsafe )
+
+import FlatParse.Common.Trie
+
+import GHC.Exts
+import GHC.Word
+import qualified Data.ByteString as B
+import qualified Data.ByteString.Internal as B
+import GHC.ForeignPtr ( ForeignPtr(..) )
+import Control.Applicative ( empty )
+import qualified Data.Foldable
+import Control.Monad ( forM )
+
+import Language.Haskell.TH
+import qualified Data.Map.Strict as M
+import           Data.Map.Strict ( Map )
+
+-- | Parse a given `B.ByteString`. If the bytestring is statically known,
+--   consider using 'bytes' instead.
+getByteStringOf :: B.ByteString -> Parser e ()
+getByteStringOf (B.PS (ForeignPtr bs fcontent) _ (I# len)) =
+
+  let go64 :: Addr# -> Addr# -> Addr# -> State# RealWorld -> (# Res# e (), State# RealWorld #)
+      go64 bs bsend s w =
+        let bs' = plusAddr# bs 8# in
+        case gtAddr# bs' bsend of
+          1# -> go8 bs bsend s w
+          _  -> if   W64# (indexWord64OffAddr# bs 0#) == W64# (indexWord64OffAddr# s 0#)
+                then go64 bs' bsend (plusAddr# s 8#) w
+                else (# Fail#, w #)
+
+      go8 :: Addr# -> Addr# -> Addr# -> State# RealWorld -> (# Res# e (), State# RealWorld #)
+      go8 bs bsend s w =
+        case ltAddr# bs bsend of
+          1# -> if   W8# (indexWord8OffAddr# bs 0#) == W8# (indexWord8OffAddr# s 0#)
+                then go8 (plusAddr# bs 1#) bsend (plusAddr# s 1#) w
+                else (# Fail#, w #)
+          _  -> (# OK# () s, w #)
+
+  in Parser \fp eob s -> case len <=# minusAddr# eob s of
+       1# -> runRW# \w -> case go64 bs (plusAddr# bs len) s w of
+               (# res, w #) -> case touch# fcontent w of
+                 w -> res
+       _  -> Fail#
+{-# inline getByteStringOf #-}
+
+-- | Read a null-terminated bytestring (a C-style string).
+--
+-- Consumes the null terminator.
+getCString :: Parser e B.ByteString
+getCString = Parser \fp eob s -> go' fp eob s
+  where
+    go' fp eob s0 = go 0# s0
+      where
+        go n# s = case eqAddr# eob s of
+          1# -> Fail#
+          _  ->
+            let s' = plusAddr# s 1#
+                w# = indexWord8OffAddr# s 0#
+            in  if   W8# w# == 0x00
+                then OK# (B.PS (ForeignPtr s0 fp) 0 (I# n#)) s'
+                else go (n# +# 1#) s'
+{-# inline getCString #-}
+
+-- | Read a null-terminated bytestring (a C-style string), where the bytestring
+--   is known to be null-terminated somewhere in the input.
+--
+-- Undefined behaviour if your bytestring isn't null-terminated somewhere.
+-- You almost certainly want 'getCString' instead.
+--
+-- Fails on GHC versions older than 9.0, since we make use of the
+-- 'cstringLength#' primop introduced in GHC 9.0, and we aren't very useful
+-- without it.
+--
+-- Consumes the null terminator.
+getCStringUnsafe :: Parser e B.ByteString
+{-# inline getCStringUnsafe #-}
+#if MIN_VERSION_base(4,15,0)
+getCStringUnsafe = Parser \fp eob s ->
+  case eqAddr# eob s of
+    1# -> Fail#
+    _  -> let n#  = cstringLength# s
+              s'# = plusAddr# s (n# +# 1#)
+           in OK# (B.PS (ForeignPtr s fp) 0 (I# n#)) s'#
+#else
+getCStringUnsafe = error "Flatparse.Basic.Combinators.getCStringUnsafe: requires GHC 9.0 / base-4.15, not available on this compiler"
+#endif
+
+-- Switching code generation
+--------------------------------------------------------------------------------
+
+#if MIN_VERSION_base(4,15,0)
+mkDoE = DoE Nothing
+{-# inline mkDoE #-}
+#else
+mkDoE = DoE
+{-# inline mkDoE #-}
+#endif
+
+genTrie :: (Map (Maybe Int) Exp, Trie' (Rule, Int, Maybe Int)) -> Q Exp
+genTrie (rules, t) = do
+  branches <- traverse (\e -> (,) <$> (newName "rule") <*> pure e) rules
+
+  let ix m k = case M.lookup k m of
+        Nothing -> error ("key not in map: " ++ show k)
+        Just a  -> a
+
+  let ensure :: Maybe Int -> Maybe (Q Exp)
+      ensure = fmap (\n -> [| ensureBytes# n |])
+
+      fallback :: Rule -> Int ->  Q Exp
+      fallback rule 0 = pure $ VarE $ fst $ ix branches rule
+      fallback rule n = [| skipBack# n >> $(pure $ VarE $ fst $ ix branches rule) |]
+
+  let go :: Trie' (Rule, Int, Maybe Int) -> Q Exp
+      go = \case
+        Branch' (r, n, alloc) ts
+          | M.null ts -> pure $ VarE $ fst $ branches M.! r
+          | otherwise -> do
+              !next         <- (traverse . traverse) go (M.toList ts)
+              !defaultCase  <- fallback r (n + 1)
+
+              let cases = mkDoE $
+                    [BindS (VarP (mkName "c")) (VarE 'getWord8Unsafe),
+                      NoBindS (CaseE (VarE (mkName "c"))
+                         (map (\(w, t) ->
+                                 Match (LitP (IntegerL (fromIntegral w)))
+                                       (NormalB t)
+                                       [])
+                              next
+                          ++ [Match WildP (NormalB defaultCase) []]))]
+
+              case ensure alloc of
+                Nothing    -> pure cases
+                Just alloc -> [| branch $alloc $(pure cases) $(fallback r n) |]
+
+        Path (r, n, alloc) ws t ->
+          case ensure alloc of
+            Nothing    -> [| branch $(scanBytes# ws) $(go t) $(fallback r n)|]
+            Just alloc -> [| branch ($alloc >> $(scanBytes# ws)) $(go t) $(fallback r n) |]
+
+  letE
+    (map (\(x, rhs) -> valD (varP x) (normalB (pure rhs)) []) (Data.Foldable.toList branches))
+    (go t)
+
+parseSwitch :: Q Exp -> Q ([(String, Exp)], Maybe Exp)
+parseSwitch exp = exp >>= \case
+  CaseE (UnboundVarE _) []    -> error "switch: empty clause list"
+  CaseE (UnboundVarE _) cases -> do
+    (!cases, !last) <- pure (init cases, last cases)
+    !cases <- forM cases \case
+      Match (LitP (StringL str)) (NormalB rhs) [] -> pure (str, rhs)
+      _ -> error "switch: expected a match clause on a string literal"
+    (!cases, !last) <- case last of
+      Match (LitP (StringL str)) (NormalB rhs) [] -> pure (cases ++ [(str, rhs)], Nothing)
+      Match WildP                (NormalB rhs) [] -> pure (cases, Just rhs)
+      _ -> error "switch: expected a match clause on a string literal or a wildcard"
+    pure (cases, last)
+  _ -> error "switch: expected a \"case _ of\" expression"
+
+genSwitchTrie' :: Maybe Exp -> [(String, Exp)] -> Maybe Exp
+              -> (Map (Maybe Int) Exp, Trie' (Rule, Int, Maybe Int))
+genSwitchTrie' postAction cases fallback =
+
+  let (!branches, !strings) = unzip do
+        (!i, (!str, !rhs)) <- zip [0..] cases
+        case postAction of
+          Nothing    -> pure ((Just i, rhs), (i, str))
+          Just !post -> pure ((Just i, (VarE '(>>)) `AppE` post `AppE` rhs), (i, str))
+
+      !m    = M.fromList ((Nothing, maybe (VarE 'empty) id fallback) : branches)
+      !trie = compileTrie strings
+  in (m , trie)
+
+--------------------------------------------------------------------------------
+
+{-|
+This is a template function which makes it possible to branch on a collection of string literals in
+an efficient way. By using `switch`, such branching is compiled to a trie of primitive parsing
+operations, which has optimized control flow, vectorized reads and grouped checking for needed input
+bytes.
+
+The syntax is slightly magical, it overloads the usual @case@ expression. An example:
+
+@
+    $(switch [| case _ of
+        "foo" -> pure True
+        "bar" -> pure False |])
+@
+
+The underscore is mandatory in @case _ of@. Each branch must be a string literal, but optionally
+we may have a default case, like in
+
+@
+    $(switch [| case _ of
+        "foo" -> pure 10
+        "bar" -> pure 20
+        _     -> pure 30 |])
+@
+
+All case right hand sides must be parsers with the same type. That type is also the type
+of the whole `switch` expression.
+
+A `switch` has longest match semantics, and the order of cases does not matter, except for
+the default case, which may only appear as the last case.
+
+If a `switch` does not have a default case, and no case matches the input, then it returns with
+failure, \without\ having consumed any input. A fallthrough to the default case also does not
+consume any input.
+-}
+switch :: Q Exp -> Q Exp
+switch = switchWithPost Nothing
+
+{-|
+Switch expression with an optional first argument for performing a post-processing action after
+every successful branch matching, not including the default branch. For example, if we have
+@ws :: Parser e ()@ for a whitespace parser, we might want to consume whitespace after matching
+on any of the switch cases. For that case, we can define a "lexeme" version of `switch` as
+follows.
+
+@
+  switch' :: Q Exp -> Q Exp
+  switch' = switchWithPost (Just [| ws |])
+@
+
+Note that this @switch'@ function cannot be used in the same module it's defined in, because of the
+stage restriction of Template Haskell.
+-}
+switchWithPost :: Maybe (Q Exp) -> Q Exp -> Q Exp
+switchWithPost postAction exp = do
+  !postAction <- sequence postAction
+  (!cases, !fallback) <- parseSwitch exp
+  genTrie $! genSwitchTrie' postAction cases fallback
+
+-- | Version of `switchWithPost` without syntactic sugar. The second argument is the
+--   list of cases, the third is the default case.
+rawSwitchWithPost :: Maybe (Q Exp) -> [(String, Q Exp)] -> Maybe (Q Exp) -> Q Exp
+rawSwitchWithPost postAction cases fallback = do
+  !postAction <- sequence postAction
+  !cases <- forM cases \(str, rhs) -> (str,) <$> rhs
+  !fallback <- sequence fallback
+  genTrie $! genSwitchTrie' postAction cases fallback

From 19bd2081dccea5a4e3fba4102a0bb471383f39b6 Mon Sep 17 00:00:00 2001
From: Ben Orchard <thefirstmuffinman@gmail.com>
Date: Thu, 27 Oct 2022 21:38:22 +0100
Subject: [PATCH 09/11] last tweaking

---
 flatparse.cabal                    |   1 -
 src/FlatParse/Basic.hs             |   1 -
 src/FlatParse/Basic/Bytes.hs       |  65 ---------------
 src/FlatParse/Basic/Combinators.hs |  14 +---
 src/FlatParse/Basic/Integers.hs    |   7 +-
 src/FlatParse/Basic/Other.hs       | 124 +++++++++++++++++++++--------
 src/FlatParse/Basic/Strings.hs     |   4 +-
 7 files changed, 102 insertions(+), 114 deletions(-)
 delete mode 100644 src/FlatParse/Basic/Bytes.hs

diff --git a/flatparse.cabal b/flatparse.cabal
index 06147b5..5f20196 100644
--- a/flatparse.cabal
+++ b/flatparse.cabal
@@ -45,7 +45,6 @@ library
   exposed-modules:
       FlatParse.Basic
       FlatParse.Basic.Addr
-      FlatParse.Basic.Bytes
       FlatParse.Basic.Combinators
       FlatParse.Basic.Integers
       FlatParse.Basic.Other
diff --git a/src/FlatParse/Basic.hs b/src/FlatParse/Basic.hs
index 82b316f..954359c 100644
--- a/src/FlatParse/Basic.hs
+++ b/src/FlatParse/Basic.hs
@@ -97,7 +97,6 @@ import FlatParse.Basic.Parser
 import FlatParse.Basic.Integers
 import FlatParse.Basic.Combinators
 import FlatParse.Basic.Other
-import FlatParse.Basic.Bytes
 import FlatParse.Basic.Strings
 import FlatParse.Basic.Position
 import FlatParse.Basic.Addr
diff --git a/src/FlatParse/Basic/Bytes.hs b/src/FlatParse/Basic/Bytes.hs
deleted file mode 100644
index 7c47195..0000000
--- a/src/FlatParse/Basic/Bytes.hs
+++ /dev/null
@@ -1,65 +0,0 @@
-module FlatParse.Basic.Bytes where
-
-import Language.Haskell.TH
-import FlatParse.Basic.Parser
-import qualified FlatParse.Common.Assorted as Common
-import FlatParse.Basic.Integers ( getWord64OfUnsafe
-                                , getWord32OfUnsafe
-                                , getWord16OfUnsafe
-                                , getWord8OfUnsafe )
-import GHC.Exts
-
--- | Check that the input has at least the given number of bytes.
-ensureBytes# :: Int -> Parser e ()
-ensureBytes# (I# len) = Parser \fp eob s ->
-  case len  <=# minusAddr# eob s of
-    1# -> OK# () s
-    _  -> Fail#
-{-# inline ensureBytes# #-}
-
--- | Read a sequence of bytes. This is a template function, you can use it as
---   @$(getBytesOf [3, 4, 5])@, for example, and the splice has type @Parser e
---   ()@.
-getBytesOf :: [Word] -> Q Exp
-getBytesOf bytes = do
-  let !len = length bytes
-  [| ensureBytes# len >> $(scanBytes# bytes) |]
-
--- | Template function, creates a @Parser e ()@ which unsafely scans a given
---   sequence of bytes.
-scanBytes# :: [Word] -> Q Exp
-scanBytes# bytes = do
-  let !(leading, w8s) = Common.splitBytes bytes
-      !scanw8s        = go w8s where
-                         go (w8:[] ) = [| getWord64OfUnsafe w8 |]
-                         go (w8:w8s) = [| getWord64OfUnsafe w8 >> $(go w8s) |]
-                         go []       = [| pure () |]
-  case w8s of
-    [] -> go leading
-          where
-            go (a:b:c:d:[]) = let !w = Common.packBytes [a, b, c, d] in [| getWord32OfUnsafe w |]
-            go (a:b:c:d:ws) = let !w = Common.packBytes [a, b, c, d] in [| getWord32OfUnsafe w >> $(go ws) |]
-            go (a:b:[])     = let !w = Common.packBytes [a, b]       in [| getWord16OfUnsafe w |]
-            go (a:b:ws)     = let !w = Common.packBytes [a, b]       in [| getWord16OfUnsafe w >> $(go ws) |]
-            go (a:[])       = [| getWord8OfUnsafe a |]
-            go []           = [| pure () |]
-    _  -> case leading of
-
-      []              -> scanw8s
-      [a]             -> [| getWord8OfUnsafe a >> $scanw8s |]
-      ws@[a, b]       -> let !w = Common.packBytes ws in [| getWord16OfUnsafe w >> $scanw8s |]
-      ws@[a, b, c, d] -> let !w = Common.packBytes ws in [| getWord32OfUnsafe w >> $scanw8s |]
-      ws              -> let !w = Common.packBytes ws
-                             !l = length ws
-                         in [| scanPartial64# l w >> $scanw8s |]
-
-scanPartial64# :: Int -> Word -> Parser e ()
-scanPartial64# (I# len) (W# w) = Parser \fp eob s ->
-  case indexWordOffAddr# s 0# of
-    w' -> case uncheckedIShiftL# (8# -# len) 3# of
-      sh -> case uncheckedShiftL# w' sh of
-        w' -> case uncheckedShiftRL# w' sh of
-          w' -> case eqWord# w w' of
-            1# -> OK# () (plusAddr# s len)
-            _  -> Fail#
-{-# inline scanPartial64# #-}
diff --git a/src/FlatParse/Basic/Combinators.hs b/src/FlatParse/Basic/Combinators.hs
index 16a4e5d..3da3c1b 100644
--- a/src/FlatParse/Basic/Combinators.hs
+++ b/src/FlatParse/Basic/Combinators.hs
@@ -1,27 +1,17 @@
 {-# language UnboxedTuples #-}
 
+-- | Basic parser building blocks.
+
 module FlatParse.Basic.Combinators where
 
 import FlatParse.Basic.Parser
-import FlatParse.Basic.Integers
-import FlatParse.Basic.Bytes
-
-import FlatParse.Common.Trie
 
 import GHC.Exts
-import GHC.Word
-import Control.Applicative ( empty )
 import qualified Data.ByteString as B
 import qualified Data.ByteString.Internal as B
 import GHC.ForeignPtr ( ForeignPtr(..) )
-import qualified Data.Foldable
-import Control.Monad ( forM )
 import Control.Applicative ( (<|>) )
 
-import Language.Haskell.TH
-import qualified Data.Map.Strict as M
-import           Data.Map.Strict ( Map )
-
 -- | Throw a parsing error. By default, parser choice `(<|>)` can't backtrack
 --   on parser error. Use `try` to convert an error to a recoverable failure.
 err :: e -> Parser e a
diff --git a/src/FlatParse/Basic/Integers.hs b/src/FlatParse/Basic/Integers.hs
index 6e52487..50240b9 100644
--- a/src/FlatParse/Basic/Integers.hs
+++ b/src/FlatParse/Basic/Integers.hs
@@ -53,13 +53,14 @@ module FlatParse.Basic.Integers
   , getSizedOfUnsafe#
   ) where
 
+import FlatParse.Basic.Parser
+
+import FlatParse.Common.Assorted ( word16ToInt16, word32ToInt32, word64ToInt64 )
+
 import GHC.Exts
 import GHC.Word
 import GHC.Int
 
-import FlatParse.Basic.Parser
-import FlatParse.Common.Assorted ( word16ToInt16, word32ToInt32, word64ToInt64 )
-
 import Control.Applicative ( Alternative(empty) )
 
 --------------------------------------------------------------------------------
diff --git a/src/FlatParse/Basic/Other.hs b/src/FlatParse/Basic/Other.hs
index 4cb108c..f6c79a6 100644
--- a/src/FlatParse/Basic/Other.hs
+++ b/src/FlatParse/Basic/Other.hs
@@ -1,15 +1,21 @@
 {-# language UnboxedTuples #-}
 
+-- | Parser building blocks.
+
 module FlatParse.Basic.Other where
 
 import FlatParse.Basic.Parser
 
 -- TODO TH!!
 import FlatParse.Basic.Combinators ( branch, skipBack# )
-import FlatParse.Basic.Bytes ( scanBytes#, ensureBytes# )
-import FlatParse.Basic.Integers ( getWord8Unsafe )
+import FlatParse.Basic.Integers ( getWord64OfUnsafe
+                                , getWord32OfUnsafe
+                                , getWord16OfUnsafe
+                                , getWord8OfUnsafe
+                                , getWord8Unsafe )
 
 import FlatParse.Common.Trie
+import qualified FlatParse.Common.Assorted as Common
 
 import GHC.Exts
 import GHC.Word
@@ -24,35 +30,6 @@ import Language.Haskell.TH
 import qualified Data.Map.Strict as M
 import           Data.Map.Strict ( Map )
 
--- | Parse a given `B.ByteString`. If the bytestring is statically known,
---   consider using 'bytes' instead.
-getByteStringOf :: B.ByteString -> Parser e ()
-getByteStringOf (B.PS (ForeignPtr bs fcontent) _ (I# len)) =
-
-  let go64 :: Addr# -> Addr# -> Addr# -> State# RealWorld -> (# Res# e (), State# RealWorld #)
-      go64 bs bsend s w =
-        let bs' = plusAddr# bs 8# in
-        case gtAddr# bs' bsend of
-          1# -> go8 bs bsend s w
-          _  -> if   W64# (indexWord64OffAddr# bs 0#) == W64# (indexWord64OffAddr# s 0#)
-                then go64 bs' bsend (plusAddr# s 8#) w
-                else (# Fail#, w #)
-
-      go8 :: Addr# -> Addr# -> Addr# -> State# RealWorld -> (# Res# e (), State# RealWorld #)
-      go8 bs bsend s w =
-        case ltAddr# bs bsend of
-          1# -> if   W8# (indexWord8OffAddr# bs 0#) == W8# (indexWord8OffAddr# s 0#)
-                then go8 (plusAddr# bs 1#) bsend (plusAddr# s 1#) w
-                else (# Fail#, w #)
-          _  -> (# OK# () s, w #)
-
-  in Parser \fp eob s -> case len <=# minusAddr# eob s of
-       1# -> runRW# \w -> case go64 bs (plusAddr# bs len) s w of
-               (# res, w #) -> case touch# fcontent w of
-                 w -> res
-       _  -> Fail#
-{-# inline getByteStringOf #-}
-
 -- | Read a null-terminated bytestring (a C-style string).
 --
 -- Consumes the null terminator.
@@ -95,6 +72,91 @@ getCStringUnsafe = Parser \fp eob s ->
 getCStringUnsafe = error "Flatparse.Basic.Combinators.getCStringUnsafe: requires GHC 9.0 / base-4.15, not available on this compiler"
 #endif
 
+-- | Parse a given `B.ByteString`. If the bytestring is statically known,
+--   consider using 'bytes' instead.
+getByteStringOf :: B.ByteString -> Parser e ()
+getByteStringOf (B.PS (ForeignPtr bs fcontent) _ (I# len)) =
+
+  let go64 :: Addr# -> Addr# -> Addr# -> State# RealWorld -> (# Res# e (), State# RealWorld #)
+      go64 bs bsend s w =
+        let bs' = plusAddr# bs 8# in
+        case gtAddr# bs' bsend of
+          1# -> go8 bs bsend s w
+          _  -> if   W64# (indexWord64OffAddr# bs 0#) == W64# (indexWord64OffAddr# s 0#)
+                then go64 bs' bsend (plusAddr# s 8#) w
+                else (# Fail#, w #)
+
+      go8 :: Addr# -> Addr# -> Addr# -> State# RealWorld -> (# Res# e (), State# RealWorld #)
+      go8 bs bsend s w =
+        case ltAddr# bs bsend of
+          1# -> if   W8# (indexWord8OffAddr# bs 0#) == W8# (indexWord8OffAddr# s 0#)
+                then go8 (plusAddr# bs 1#) bsend (plusAddr# s 1#) w
+                else (# Fail#, w #)
+          _  -> (# OK# () s, w #)
+
+  in Parser \fp eob s -> case len <=# minusAddr# eob s of
+       1# -> runRW# \w -> case go64 bs (plusAddr# bs len) s w of
+               (# res, w #) -> case touch# fcontent w of
+                 w -> res
+       _  -> Fail#
+{-# inline getByteStringOf #-}
+
+-- | Check that the input has at least the given number of bytes.
+ensureBytes# :: Int -> Parser e ()
+ensureBytes# (I# len) = Parser \fp eob s ->
+  case len  <=# minusAddr# eob s of
+    1# -> OK# () s
+    _  -> Fail#
+{-# inline ensureBytes# #-}
+
+-- | Read a sequence of bytes. This is a template function, you can use it as
+--   @$(getBytesOf [3, 4, 5])@, for example, and the splice has type @Parser e
+--   ()@.
+getBytesOf :: [Word] -> Q Exp
+getBytesOf bytes = do
+  let !len = length bytes
+  [| ensureBytes# len >> $(scanBytes# bytes) |]
+
+-- | Template function, creates a @Parser e ()@ which unsafely scans a given
+--   sequence of bytes.
+scanBytes# :: [Word] -> Q Exp
+scanBytes# bytes = do
+  let !(leading, w8s) = Common.splitBytes bytes
+      !scanw8s        = go w8s where
+                         go (w8:[] ) = [| getWord64OfUnsafe w8 |]
+                         go (w8:w8s) = [| getWord64OfUnsafe w8 >> $(go w8s) |]
+                         go []       = [| pure () |]
+  case w8s of
+    [] -> go leading
+          where
+            go (a:b:c:d:[]) = let !w = Common.packBytes [a, b, c, d] in [| getWord32OfUnsafe w |]
+            go (a:b:c:d:ws) = let !w = Common.packBytes [a, b, c, d] in [| getWord32OfUnsafe w >> $(go ws) |]
+            go (a:b:[])     = let !w = Common.packBytes [a, b]       in [| getWord16OfUnsafe w |]
+            go (a:b:ws)     = let !w = Common.packBytes [a, b]       in [| getWord16OfUnsafe w >> $(go ws) |]
+            go (a:[])       = [| getWord8OfUnsafe a |]
+            go []           = [| pure () |]
+    _  -> case leading of
+
+      []              -> scanw8s
+      [a]             -> [| getWord8OfUnsafe a >> $scanw8s |]
+      ws@[a, b]       -> let !w = Common.packBytes ws in [| getWord16OfUnsafe w >> $scanw8s |]
+      ws@[a, b, c, d] -> let !w = Common.packBytes ws in [| getWord32OfUnsafe w >> $scanw8s |]
+      ws              -> let !w = Common.packBytes ws
+                             !l = length ws
+                         in [| scanPartial64# l w >> $scanw8s |]
+
+scanPartial64# :: Int -> Word -> Parser e ()
+scanPartial64# (I# len) (W# w) = Parser \fp eob s ->
+  case indexWordOffAddr# s 0# of
+    w' -> case uncheckedIShiftL# (8# -# len) 3# of
+      sh -> case uncheckedShiftL# w' sh of
+        w' -> case uncheckedShiftRL# w' sh of
+          w' -> case eqWord# w w' of
+            1# -> OK# () (plusAddr# s len)
+            _  -> Fail#
+{-# inline scanPartial64# #-}
+
+
 -- Switching code generation
 --------------------------------------------------------------------------------
 
diff --git a/src/FlatParse/Basic/Strings.hs b/src/FlatParse/Basic/Strings.hs
index 2a3bdf0..6953a44 100644
--- a/src/FlatParse/Basic/Strings.hs
+++ b/src/FlatParse/Basic/Strings.hs
@@ -1,11 +1,13 @@
 {-# language UnboxedTuples #-}
 
+-- | Parsers and utilities for parsing (UTF-8) 'Char's and strings.
+
 module FlatParse.Basic.Strings where
 
 import Prelude hiding ( getChar )
 
 import FlatParse.Basic.Parser
-import FlatParse.Basic.Bytes ( getBytesOf )
+import FlatParse.Basic.Other ( getBytesOf )
 
 import FlatParse.Common.Assorted ( derefChar8# )
 

From dde03d7647f8656ffe9764ac49bc85904e22d12b Mon Sep 17 00:00:00 2001
From: Ben Orchard <thefirstmuffinman@gmail.com>
Date: Fri, 28 Oct 2022 13:52:56 +0100
Subject: [PATCH 10/11] more fiddling

---
 flatparse.cabal                               |  2 +-
 src/FlatParse/Basic.hs                        | 99 ++++++-------------
 src/FlatParse/Basic/Addr.hs                   |  2 +-
 .../Basic/{Combinators.hs => Base.hs}         | 93 +++++++++++------
 src/FlatParse/Basic/Integers.hs               | 22 +----
 src/FlatParse/Basic/Other.hs                  | 36 ++++---
 src/FlatParse/Basic/Position.hs               |  2 +
 src/FlatParse/Basic/Strings.hs                | 21 +++-
 8 files changed, 144 insertions(+), 133 deletions(-)
 rename src/FlatParse/Basic/{Combinators.hs => Base.hs} (94%)

diff --git a/flatparse.cabal b/flatparse.cabal
index 5f20196..7eb01c4 100644
--- a/flatparse.cabal
+++ b/flatparse.cabal
@@ -45,7 +45,7 @@ library
   exposed-modules:
       FlatParse.Basic
       FlatParse.Basic.Addr
-      FlatParse.Basic.Combinators
+      FlatParse.Basic.Base
       FlatParse.Basic.Integers
       FlatParse.Basic.Other
       FlatParse.Basic.Parser
diff --git a/src/FlatParse/Basic.hs b/src/FlatParse/Basic.hs
index 954359c..0a1c026 100644
--- a/src/FlatParse/Basic.hs
+++ b/src/FlatParse/Basic.hs
@@ -3,10 +3,21 @@
 {-|
 This module implements a `Parser` supporting custom error types.  If you need
 efficient indentation parsing, use "FlatParse.Stateful" instead.
--}
 
-module FlatParse.Basic (
+The following naming conventions are followed:
+
+  * @withX@ are continuation passing style (CPS) parsers.
+    * These are sprinkled throughout to enable better reasoning about
+      performance.
+  * @getX@ are regular monadic parsers.
+  * @getXOf@ parse and check equality with a provided value.
+  * Definitions ending with @#@ are called with unboxed values.
+  * Definitions ending with @Unsafe@ are unsafe. Read their documentation before
+    using.
+-}
 
+module FlatParse.Basic
+  (
   -- * Parser monad
     type Parser
 
@@ -14,88 +25,37 @@ module FlatParse.Basic (
   , Result(..)
   , runParser
 
-  -- * Errors and failures
-  , err
-  , lookahead
-  , fails
-  , try
-  , Control.Applicative.optional
-  , optional_
-  , withOption
-  , cut
-  , cutting
-
-  -- * Combinators
-  , (Control.Applicative.<|>)
-  , Control.Applicative.empty
-  , branch
-  , chainl
-  , chainr
-  , Control.Applicative.many
-  , many_
-  , Control.Applicative.some
-  , some_
-  , notFollowedBy
-  , isolate
-  , isolateUnsafe#
-
-  -- * Primitive parsers
-  , eof
-  , switch
-  , switchWithPost
-  , rawSwitchWithPost
-
-  -- ** Byte-wise
-  , take
-  , take#
-  , takeRest
-  , skip
-  , atSkip#
-  , skipBack#
-  , getBytesOf
-  , getByteStringOf
-  , getCString
-  , getCStringUnsafe
+  -- * Parsers
+  -- ** Base combinators, byte-wise
+  , module FlatParse.Basic.Base
+  , module FlatParse.Basic.Other
 
   -- ** Machine integers
   , module FlatParse.Basic.Integers
 
   -- ** 'Char', 'String'
-  , getCharOf
-  , getStringOf
-  , getChar
-  , getChar_
-  , getCharASCII
-  , getCharASCII_
-  , getAsciiDecimalInt
-  , getAsciiDecimalInteger
-  , getAsciiHexInt
-  , Common.isDigit
-  , Common.isGreekLetter
-  , Common.isLatinLetter
-  , satisfy
-  , satisfy_
-  , satisfyASCII
-  , satisfyASCII_
-  , fusedSatisfy
-  , fusedSatisfy_
+  , module FlatParse.Basic.Strings
 
   -- ** Positions and spans
   , module FlatParse.Basic.Position
 
-  -- ** Location & address primitives
+  -- ** Address primitives
   , module FlatParse.Basic.Addr
 
+  -- ** Re-exports
+  , (Control.Applicative.<|>)
+  , Control.Applicative.empty
+  , Control.Applicative.many
+  , Control.Applicative.some
+  , Control.Applicative.optional
+
   ) where
 
 import Prelude hiding ( take, getChar )
 
-import qualified FlatParse.Common.Assorted as Common
-import FlatParse.Common.Position
-
 import FlatParse.Basic.Parser
 import FlatParse.Basic.Integers
-import FlatParse.Basic.Combinators
+import FlatParse.Basic.Base
 import FlatParse.Basic.Other
 import FlatParse.Basic.Strings
 import FlatParse.Basic.Position
@@ -113,9 +73,10 @@ import GHC.ForeignPtr ( ForeignPtr(..) )
 
 -- | Higher-level boxed data type for parsing results.
 data Result e a =
-    OK a !(B.ByteString)  -- ^ Contains return value and unconsumed input.
+    OK a !(B.ByteString)  -- ^ Contains return value and unconsumed input
+                          --   (possibly empty, if fully consumed).
   | Fail                  -- ^ Recoverable-by-default failure.
-  | Err !e                -- ^ Unrecoverble-by-default error.
+  | Err !e                -- ^ Unrecoverable-by-default error.
   deriving Show
 
 instance Functor (Result e) where
diff --git a/src/FlatParse/Basic/Addr.hs b/src/FlatParse/Basic/Addr.hs
index 0d76c55..72bba80 100644
--- a/src/FlatParse/Basic/Addr.hs
+++ b/src/FlatParse/Basic/Addr.hs
@@ -20,7 +20,7 @@ functions.)
 module FlatParse.Basic.Addr where
 
 import FlatParse.Basic.Parser
-import FlatParse.Basic.Combinators ( take#, atSkip#, lookahead )
+import FlatParse.Basic.Base ( take#, atSkip#, lookahead )
 
 import GHC.Exts
 
diff --git a/src/FlatParse/Basic/Combinators.hs b/src/FlatParse/Basic/Base.hs
similarity index 94%
rename from src/FlatParse/Basic/Combinators.hs
rename to src/FlatParse/Basic/Base.hs
index 3da3c1b..da88aac 100644
--- a/src/FlatParse/Basic/Combinators.hs
+++ b/src/FlatParse/Basic/Base.hs
@@ -2,7 +2,42 @@
 
 -- | Basic parser building blocks.
 
-module FlatParse.Basic.Combinators where
+module FlatParse.Basic.Base
+  (
+  -- * Errors and failures
+    err
+  , try
+  , fails
+  , cut
+  , cutting
+  , optional_
+  , withOption
+
+  -- * Primitive combinators
+  , lookahead
+  , branch
+  , chainl
+  , chainr
+  , many_
+  , some_
+  , notFollowedBy
+  , isolate
+  , isolateUnsafe#
+
+  -- * Primitive byte-wise parsers
+  , eof
+  , take
+  , take#
+  , takeUnsafe#
+  , takeRest
+  , skip
+  , atSkip#
+  , atSkipUnsafe#
+  , skipBack#
+
+  ) where
+
+import Prelude hiding ( take )
 
 import FlatParse.Basic.Parser
 
@@ -18,6 +53,13 @@ err :: e -> Parser e a
 err e = Parser \fp eob s -> Err# e
 {-# inline err #-}
 
+-- | Convert a parsing error into failure.
+try :: Parser e a -> Parser e a
+try (Parser f) = Parser \fp eob s -> case f fp eob s of
+  Err# _ -> Fail#
+  x      -> x
+{-# inline try #-}
+
 -- | Convert a parsing failure to a success.
 fails :: Parser e a -> Parser e ()
 fails (Parser f) = Parser \fp eob s ->
@@ -27,27 +69,6 @@ fails (Parser f) = Parser \fp eob s ->
     Err# e  -> Err# e
 {-# inline fails #-}
 
--- | Convert a parsing error into failure.
-try :: Parser e a -> Parser e a
-try (Parser f) = Parser \fp eob s -> case f fp eob s of
-  Err# _ -> Fail#
-  x      -> x
-{-# inline try #-}
-
--- | Convert a parsing failure to a `()`.
-optional_ :: Parser e a -> Parser e ()
-optional_ p = (() <$ p) <|> pure ()
-{-# inline optional_ #-}
-
--- | CPS'd version of `optional`. This is usually more efficient, since it gets
---   rid of the extra `Maybe` allocation.
-withOption :: Parser e a -> (a -> Parser e r) -> Parser e r -> Parser e r
-withOption (Parser f) just (Parser nothing) = Parser \fp eob s -> case f fp eob s of
-  OK# a s -> runParser# (just a) fp eob s
-  Fail#   -> nothing fp eob s
-  Err# e  -> Err# e
-{-# inline withOption #-}
-
 -- | Convert a parsing failure to an error.
 cut :: Parser e a -> e -> Parser e a
 cut (Parser f) e = Parser \fp eob s -> case f fp eob s of
@@ -65,8 +86,29 @@ cutting (Parser f) e merge = Parser \fp eob s -> case f fp eob s of
   x       -> x
 {-# inline cutting #-}
 
+-- | Convert a parsing failure to a `()`.
+optional_ :: Parser e a -> Parser e ()
+optional_ p = (() <$ p) <|> pure ()
+{-# inline optional_ #-}
+
+-- | CPS'd version of `optional`. This is usually more efficient, since it gets
+--   rid of the extra `Maybe` allocation.
+withOption :: Parser e a -> (a -> Parser e r) -> Parser e r -> Parser e r
+withOption (Parser f) just (Parser nothing) = Parser \fp eob s -> case f fp eob s of
+  OK# a s -> runParser# (just a) fp eob s
+  Fail#   -> nothing fp eob s
+  Err# e  -> Err# e
+{-# inline withOption #-}
+
 --------------------------------------------------------------------------------
 
+-- | Succeed if the input is empty.
+eof :: Parser e ()
+eof = Parser \fp eob s -> case eqAddr# eob s of
+  1# -> OK# () s
+  _  -> Fail#
+{-# inline eof #-}
+
 -- | Save the parsing state, then run a parser, then restore the state.
 lookahead :: Parser e a -> Parser e a
 lookahead (Parser f) = Parser \fp eob s ->
@@ -152,13 +194,6 @@ notFollowedBy p1 p2 = p1 <* fails p2
 
 --------------------------------------------------------------------------------
 
--- | Succeed if the input is empty.
-eof :: Parser e ()
-eof = Parser \fp eob s -> case eqAddr# eob s of
-  1# -> OK# () s
-  _  -> Fail#
-{-# inline eof #-}
-
 -- | Read the given number of bytes as a 'ByteString'.
 --
 -- Throws a runtime error if given a negative integer.
diff --git a/src/FlatParse/Basic/Integers.hs b/src/FlatParse/Basic/Integers.hs
index 50240b9..eb51ba6 100644
--- a/src/FlatParse/Basic/Integers.hs
+++ b/src/FlatParse/Basic/Integers.hs
@@ -1,22 +1,10 @@
 {- | Machine integer parsers.
 
-The following naming conventions are followed:
-
-  * @withX@ are continuation passing style (CPS) parsers.
-    * These are sprinkled throughout to enable better reasoning about
-      performance.
-  * @getX@ are regular monadic parsers.
-  * @getXOf@ parse and check equality with a provided value.
-  * Definitions ending with @#@ are called with unboxed values.
-  * Definitions ending with @Unsafe@ are unsafe. Read their documentation before
-    using.
-
-Other points:
-
-  * TODO: The endianness code is currently lying. We blindly assume that our
-    host system is little-endian, and parse in big-endian by parsing normally
-    then "reversing" the resulting integer.
-  * Unless otherwise mentioned, native byte order is utilized.
+Parsers use native byte order, unless stated otherwise.
+
+TODO: The endianness code is currently lying. We blindly assume that our host
+system is little-endian, and parse in big-endian by parsing normally then
+"reversing" the resulting integer.
 -}
 
 module FlatParse.Basic.Integers
diff --git a/src/FlatParse/Basic/Other.hs b/src/FlatParse/Basic/Other.hs
index f6c79a6..98a751d 100644
--- a/src/FlatParse/Basic/Other.hs
+++ b/src/FlatParse/Basic/Other.hs
@@ -1,20 +1,25 @@
 {-# language UnboxedTuples #-}
 
--- | Parser building blocks.
+-- | Assorted parser building blocks which may use the more basic combinators,
+--   and don't belong elsewhere.
 
-module FlatParse.Basic.Other where
+module FlatParse.Basic.Other
+  ( getCString, getCStringUnsafe
+  , getByteStringOf, getBytesOf
+  , switch, switchWithPost, rawSwitchWithPost
+  ) where
 
 import FlatParse.Basic.Parser
 
 -- TODO TH!!
-import FlatParse.Basic.Combinators ( branch, skipBack# )
+import FlatParse.Basic.Base ( branch, skipBack# )
 import FlatParse.Basic.Integers ( getWord64OfUnsafe
                                 , getWord32OfUnsafe
                                 , getWord16OfUnsafe
                                 , getWord8OfUnsafe
                                 , getWord8Unsafe )
 
-import FlatParse.Common.Trie
+import FlatParse.Common.Trie hiding ( ensureBytes )
 import qualified FlatParse.Common.Assorted as Common
 
 import GHC.Exts
@@ -101,21 +106,13 @@ getByteStringOf (B.PS (ForeignPtr bs fcontent) _ (I# len)) =
        _  -> Fail#
 {-# inline getByteStringOf #-}
 
--- | Check that the input has at least the given number of bytes.
-ensureBytes# :: Int -> Parser e ()
-ensureBytes# (I# len) = Parser \fp eob s ->
-  case len  <=# minusAddr# eob s of
-    1# -> OK# () s
-    _  -> Fail#
-{-# inline ensureBytes# #-}
-
 -- | Read a sequence of bytes. This is a template function, you can use it as
 --   @$(getBytesOf [3, 4, 5])@, for example, and the splice has type @Parser e
 --   ()@.
 getBytesOf :: [Word] -> Q Exp
 getBytesOf bytes = do
   let !len = length bytes
-  [| ensureBytes# len >> $(scanBytes# bytes) |]
+  [| ensureBytes len >> $(scanBytes# bytes) |]
 
 -- | Template function, creates a @Parser e ()@ which unsafely scans a given
 --   sequence of bytes.
@@ -156,7 +153,6 @@ scanPartial64# (I# len) (W# w) = Parser \fp eob s ->
             _  -> Fail#
 {-# inline scanPartial64# #-}
 
-
 -- Switching code generation
 --------------------------------------------------------------------------------
 
@@ -177,7 +173,7 @@ genTrie (rules, t) = do
         Just a  -> a
 
   let ensure :: Maybe Int -> Maybe (Q Exp)
-      ensure = fmap (\n -> [| ensureBytes# n |])
+      ensure = fmap (\n -> [| ensureBytes n |])
 
       fallback :: Rule -> Int ->  Q Exp
       fallback rule 0 = pure $ VarE $ fst $ ix branches rule
@@ -311,3 +307,13 @@ rawSwitchWithPost postAction cases fallback = do
   !cases <- forM cases \(str, rhs) -> (str,) <$> rhs
   !fallback <- sequence fallback
   genTrie $! genSwitchTrie' postAction cases fallback
+
+--------------------------------------------------------------------------------
+
+-- | Assert that the input has at least the given number of bytes.
+ensureBytes :: Int -> Parser e ()
+ensureBytes (I# len) = Parser \fp eob s ->
+  case len <=# minusAddr# eob s of
+    1# -> OK# () s
+    _  -> Fail#
+{-# inline ensureBytes #-}
diff --git a/src/FlatParse/Basic/Position.hs b/src/FlatParse/Basic/Position.hs
index 3b1c473..3527811 100644
--- a/src/FlatParse/Basic/Position.hs
+++ b/src/FlatParse/Basic/Position.hs
@@ -1,3 +1,5 @@
+-- | Parsers and utilities concerning position in input.
+
 module FlatParse.Basic.Position
   ( module FlatParse.Common.Position
   , module FlatParse.Basic.Position
diff --git a/src/FlatParse/Basic/Strings.hs b/src/FlatParse/Basic/Strings.hs
index 6953a44..c908f82 100644
--- a/src/FlatParse/Basic/Strings.hs
+++ b/src/FlatParse/Basic/Strings.hs
@@ -2,7 +2,26 @@
 
 -- | Parsers and utilities for parsing (UTF-8) 'Char's and strings.
 
-module FlatParse.Basic.Strings where
+module FlatParse.Basic.Strings
+  ( getCharOf
+  , getStringOf
+  , getChar
+  , getChar_
+  , getCharASCII
+  , getCharASCII_
+  , getAsciiDecimalInt
+  , getAsciiDecimalInteger
+  , getAsciiHexInt
+  , Common.isDigit
+  , Common.isGreekLetter
+  , Common.isLatinLetter
+  , satisfy
+  , satisfy_
+  , satisfyASCII
+  , satisfyASCII_
+  , fusedSatisfy
+  , fusedSatisfy_
+  ) where
 
 import Prelude hiding ( getChar )
 

From e29f98b2ea0d0a190a182770e7d8281113d23781 Mon Sep 17 00:00:00 2001
From: Ben Orchard <thefirstmuffinman@gmail.com>
Date: Fri, 28 Oct 2022 18:11:25 +0100
Subject: [PATCH 11/11] add efficient varint (protobuf) parser

---
 flatparse.cabal                         |  1 +
 src/FlatParse/Basic/Integers/Varints.hs | 27 +++++++++++++++++++++++++
 src/FlatParse/Common/Numbers.hs         | 22 +++++++++++++++++++-
 3 files changed, 49 insertions(+), 1 deletion(-)
 create mode 100644 src/FlatParse/Basic/Integers/Varints.hs

diff --git a/flatparse.cabal b/flatparse.cabal
index 7eb01c4..79e64d8 100644
--- a/flatparse.cabal
+++ b/flatparse.cabal
@@ -47,6 +47,7 @@ library
       FlatParse.Basic.Addr
       FlatParse.Basic.Base
       FlatParse.Basic.Integers
+      FlatParse.Basic.Integers.Varints
       FlatParse.Basic.Other
       FlatParse.Basic.Parser
       FlatParse.Basic.Position
diff --git a/src/FlatParse/Basic/Integers/Varints.hs b/src/FlatParse/Basic/Integers/Varints.hs
new file mode 100644
index 0000000..495bca0
--- /dev/null
+++ b/src/FlatParse/Basic/Integers/Varints.hs
@@ -0,0 +1,27 @@
+{-# language UnboxedTuples #-}
+
+module FlatParse.Basic.Integers.Varints where
+
+import FlatParse.Basic.Parser
+
+import FlatParse.Common.Assorted ( shortInteger )
+import FlatParse.Common.Numbers ( getVarintProtobuf# )
+
+import GHC.Exts
+
+getVarintProtobuf :: Parser e Int
+getVarintProtobuf = Parser \fp eob s ->
+    case getVarintProtobuf# eob s of
+      (# (##) | #) -> Fail#
+      (# | (# i#, s#, _n# #) #) -> OK# (I# i#) s#
+{-# inline getVarintProtobuf #-}
+
+getVarintProtobufInteger :: Parser e Integer
+getVarintProtobufInteger = Parser \fp eob s ->
+    case getVarintProtobuf# eob s of
+      (# (##) | #) -> Fail#
+      (# | (# i#, s#, n# #) #) ->
+        case n# >=# 62# of -- TODO unsure if 62 or 63
+          1# -> error "TODO overflow"
+          _  -> OK# (shortInteger i#) s#
+{-# inline getVarintProtobufInteger #-}
diff --git a/src/FlatParse/Common/Numbers.hs b/src/FlatParse/Common/Numbers.hs
index fbf6826..22c61db 100644
--- a/src/FlatParse/Common/Numbers.hs
+++ b/src/FlatParse/Common/Numbers.hs
@@ -1,4 +1,4 @@
-{-# language UnboxedTuples #-}
+{-# language UnboxedTuples, BinaryLiterals #-}
 
 module FlatParse.Common.Numbers where
 
@@ -68,3 +68,23 @@ readIntHex :: Addr# -> Addr# -> (# (##) | (# Int#, Addr# #) #)
 readIntHex eob s = case readIntHex' 0# s eob of
     (# n, s' #) | 1# <- eqAddr# s s' -> (# (##) | #)
                 | otherwise          -> (# | (# n, s' #) #)
+
+-- | protobuf style (LE, redundant, on continues)
+getVarintProtobuf# :: Addr# -> Addr# -> (# (##) | (# Int#, Addr#, Int# #) #)
+-- v TODO not working??
+#if !MIN_VERSION_base(4,17,0)
+getVarintProtobuf# = error "TODO not supported on this GHC bud"
+#else
+getVarintProtobuf# end# = go 0# 0#
+  where
+    go :: Int# -> Int# -> Addr# -> (# (##) | (# Int#, Addr#, Int# #) #)
+    go i# n# s# = case eqAddr# s# end# of
+      1# -> (# (##) | #)
+      _  ->
+        let w# = indexWord8OffAddr# s# 0#
+            w'# = word2Int# (word8ToWord# (w# `andWord8#` (wordToWord8# 0b01111111##)))
+            i'# = i# `orI#` (w'# `uncheckedIShiftL#` n#)
+        in  case w# `geWord8#` wordToWord8# 0b10000000## of
+              1# -> go i'# (n# +# 7#) (s# `plusAddr#` 1#)
+              _  -> (# | (# i'#, s#, n# #) #)
+#endif