# Chapter 24: Parser Combinators

In [1]:
:!stack install trifecta parsers



In [2]:
import Text.Trifecta

In [3]:
stop :: Parser a
stop = unexpected "stop"

In [4]:
one :: Parser Char
one = char '1'

In [5]:
one' :: Parser a
one' = one >> stop

In [6]:
oneTwo = char '1' >> char '2'

oneTwo' = oneTwo >> stop

testParse' :: String -> Parser Char -> Result Char
testParse' s p = parseString p mempty s

testParse :: Parser Char -> Result Char
testParse = testParse' "123"

In [7]:
:t parseString

In [8]:
testParse stop

Failure (ErrInfo {_errDoc = [1m(interactive)[0m:[1m1[0m:[1m1[0m: [91merror[0m: unexpected
    stop
123[1m[94m<EOF>[0;1m[0m 
[92m^[0m        , _errDeltas = [Columns 0 0]})

In [9]:
testParse one

Success '1'

In [10]:
testParse one'

Failure (ErrInfo {_errDoc = [1m(interactive)[0m:[1m1[0m:[1m2[0m: [91merror[0m: unexpected
    stop
123[1m[94m<EOF>[0;1m[0m 
 [92m^[0m       , _errDeltas = [Columns 1 1]})

In [11]:
testParse oneTwo

Success '2'

In [12]:
testParse oneTwo'

Failure (ErrInfo {_errDoc = [1m(interactive)[0m:[1m1[0m:[1m3[0m: [91merror[0m: unexpected
    stop
123[1m[94m<EOF>[0;1m[0m 
  [92m^[0m      , _errDeltas = [Columns 2 2]})

In [13]:
:t eof

In [14]:
one'' = one <* eof
testParse one''

Failure (ErrInfo {_errDoc = [1m(interactive)[0m:[1m1[0m:[1m2[0m: [91merror[0m: expected: end of input
123[1m[94m<EOF>[0;1m[0m 
 [92m^[0m       , _errDeltas = [Columns 1 1]})

In [15]:
testParse' "1" one''

Success '1'

In [16]:
oneTwo'' = oneTwo <* eof
testParse oneTwo''

Failure (ErrInfo {_errDoc = [1m(interactive)[0m:[1m1[0m:[1m3[0m: [91merror[0m: expected: end of input
123[1m[94m<EOF>[0;1m[0m 
  [92m^[0m      , _errDeltas = [Columns 2 2]})

In [17]:
testParse' "12" oneTwo''

Success '2'

In [18]:
:t string

In [19]:
parseString (string "123") mempty "123"

Success "123"

In [20]:
parseString (string "1") mempty "123"

Success "1"

In [21]:
parseString (string "12") mempty "123"

Success "12"

In [22]:
parseString (string "123") mempty "123"

Success "123"

In [23]:
string' :: String -> Parser String
string' = traverse char

In [24]:
parseString (string' "123") mempty "123"

Success "123"

In [25]:
import Control.Applicative

In [26]:
:info Alternative

In [27]:
:info Parser

In [28]:
parseString (stop <|> one) mempty "123"

Success '1'

In [29]:
parseString (one <|> stop) mempty "123"

Success '1'

In [30]:
parser = (string "123" <* eof) <|> (string "12" <* eof) <|> (string "1" <* eof)

In [31]:
parseString parser mempty "123"

Success "123"

In [32]:
parseString parser mempty "12"

Success "12"

In [33]:
parseString parser mempty "1"

Success "1"

In [34]:
parseString parser mempty "1234"

Failure (ErrInfo {_errDoc = [1m(interactive)[0m:[1m1[0m:[1m4[0m: [91merror[0m: expected: end of input
1234[1m[94m<EOF>[0;1m[0m 
   [92m^[0m      , _errDeltas = [Columns 3 3]})

In [35]:
parseString parser mempty "2"

Failure (ErrInfo {_errDoc = [1m(interactive)[0m:[1m1[0m:[1m1[0m: [91merror[0m: expected: "1",
    "12", "123"
2[1m[94m<EOF>[0;1m[0m 
[92m^[0m      , _errDeltas = [Columns 0 0]})

---

In [36]:
badFraction = "1/0"
alsoBad = "10"
shouldWork = "1/2"
shouldAlsoWork = "2/1"

In [37]:
import Data.Ratio ((%))

parseFraction :: Parser Rational
parseFraction = do
    numerator <- decimal
    char '/'
    denominator <- decimal
    return (numerator % denominator)

In [38]:
parseString parseFraction mempty shouldWork
parseString parseFraction mempty shouldAlsoWork

Success (1 % 2)

Success (2 % 1)

In [39]:
parseString parseFraction mempty badFraction

: 

In [40]:
parseString parseFraction mempty alsoBad

Failure (ErrInfo {_errDoc = [1m(interactive)[0m:[1m1[0m:[1m3[0m: [91merror[0m: unexpected
    EOF, expected: "/", digit
10[1m[94m<EOF>[0;1m[0m 
  [92m^[0m     , _errDeltas = [Columns 2 2]})

In [41]:
data EnsureResult
    = EnsureSuccess
    | EnsureFailure String

ensureM' :: Monad m => (a -> EnsureResult) -> a -> m a
ensureM' p a = case p a of
    EnsureSuccess -> return a
    EnsureFailure reason -> fail reason

mkEnsureP :: String -> (a -> Bool) -> (a -> EnsureResult)
mkEnsureP reason p = f . p where
    f False = EnsureFailure reason
    f True = EnsureSuccess
    
ensureM :: Monad m => String -> (a -> Bool) -> a -> m a
ensureM reason p = ensureM' (mkEnsureP reason p)

ensureNotZero :: Monad m => Integer -> m Integer
ensureNotZero = ensureM "must not be zero" (/=0)

decimalNonZero :: Parser Integer
decimalNonZero = decimal >>= ensureNotZero        

In [42]:
virtuousFraction :: Parser Rational
virtuousFraction = do
    numerator <- decimal
    char '/'
    denominator <- decimalNonZero
    return (numerator % denominator)
    
parseString virtuousFraction mempty badFraction
parseString virtuousFraction mempty shouldWork

Failure (ErrInfo {_errDoc = [1m(interactive)[0m:[1m1[0m:[1m4[0m: [91merror[0m: must
    not be zero, expected: digit
1/0[1m[94m<EOF>[0;1m[0m 
   [92m^[0m     , _errDeltas = [Columns 3 3]})

Success (1 % 2)

In [43]:
parseString (integer <* eof) mempty "123"
parseString (integer <* eof) mempty "123abc"

Success 123

Failure (ErrInfo {_errDoc = [1m(interactive)[0m:[1m1[0m:[1m4[0m: [91merror[0m: expected: digit,
    end of input
123abc[1m[94m<EOF>[0;1m[0m 
   [92m^[0m        , _errDeltas = [Columns 3 3]})

In [44]:
:t skipMany

In [45]:
:info skipMany

In [46]:
:t oneOf

In [47]:
parseString ((Left <$> virtuousFraction) <|> (Right <$> (char '1'))) mempty "123/0a"

Failure (ErrInfo {_errDoc = [1m(interactive)[0m:[1m1[0m:[1m6[0m: [91merror[0m: must
    not be zero, expected: digit
123/0a[1m[94m<EOF>[0;1m[0m 
     [92m^[0m      , _errDeltas = [Columns 5 5]})

In [48]:
parseString ((Left <$> virtuousFraction) <|> (Right <$> (char 'a'))) mempty "a1/a123"

Success (Right 'a')

In [49]:
parseString ((Left <$> (char '1' >> unexpected "hello")) <|> (Right <$> decimal)) mempty "123"

Failure (ErrInfo {_errDoc = [1m(interactive)[0m:[1m1[0m:[1m2[0m: [91merror[0m: unexpected
    hello
123[1m[94m<EOF>[0;1m[0m 
 [92m^[0m       , _errDeltas = [Columns 1 1]})

In [50]:
parseString ((decimal >> unexpected "hello") <|> decimal) mempty "123"

Failure (ErrInfo {_errDoc = [1m(interactive)[0m:[1m1[0m:[1m4[0m: [91merror[0m: unexpected
    hello, expected: digit
123[1m[94m<EOF>[0;1m[0m 
   [92m^[0m     , _errDeltas = [Columns 3 3]})

In [51]:
:t try

In [52]:
parseString (try ((decimal >> unexpected "wtf") <|> decimal)) mempty "123"

Failure (ErrInfo {_errDoc = [1m(interactive)[0m:[1m1[0m:[1m1[0m: [91merror[0m: unspecified
    error
123[1m[94m<EOF>[0;1m[0m 
[92m^[0m        , _errDeltas = [Columns 0 0]})

In [53]:
parseString (try $ char '1' >> unexpected "wtf" <|> decimal) mempty "123"

Success 23

In [54]:
-- hooray! `try` to the rescue
parseString ((Left <$> try virtuousFraction) <|> (Right <$> decimal)) mempty "123/1"

Success (Left (123 % 1))

---

In [55]:
data IntegerOrString = IOSI Integer | IOSS String deriving (Eq, Show, Ord)

type Major = Integer
type Minor = Integer
type Patch = Integer
newtype ReleaseItem = ReleaseItem IntegerOrString deriving (Eq, Show)
newtype Release = Release [ReleaseItem] deriving (Eq, Show)
type MetadataItem = String
type Metadata = [MetadataItem]

data SemVer = SemVer Major Minor Patch Release Metadata deriving (Eq, Show)

In [56]:
parseMajor :: Parser Major
parseMajor = integer

parseMinor :: Parser Minor
parseMinor = integer

parsePatch :: Parser Patch
parsePatch = integer

matchSep :: Parser ()
matchSep = () <$ char '.'

matchReleaseSep :: Parser ()
matchReleaseSep = () <$ char '-'

matchMetadataSep :: Parser ()
matchMetadataSep = () <$ char '+'

matchZeroLeadingInteger :: Parser ()
matchZeroLeadingInteger = () <$ try (char '0' >> integer)

matchNegativeInteger :: Parser ()
matchNegativeInteger = () <$ try (char '-' >> integer)

parseValidChar :: Parser Char
parseValidChar = alphaNum <|> char '-'

parseValidInteger :: Parser Integer
parseValidInteger = precondition >> (failIfZeroLeadingInteger <|> onlyInteger) where
    failIfZeroLeadingInteger = matchZeroLeadingInteger >> unexpected "zero leading integer"
    precondition = notFollowedBy matchNegativeInteger
    onlyInteger = try $ integer <* notFollowedBy parseValidChar

parseReleaseItem :: Parser ReleaseItem
parseReleaseItem = ReleaseItem <$> p where
    p = 
        fmap IOSI parseValidInteger <|> 
        fmap IOSS (some parseValidChar)

parseRelease :: Parser Release
parseRelease = Release <$> sepBy1 parseReleaseItem matchSep

parseMetadataItem :: Parser MetadataItem
parseMetadataItem = some parseValidChar

parseMetadata :: Parser Metadata
parseMetadata = sepBy1 parseMetadataItem matchSep

parseSemVer :: Parser SemVer
parseSemVer = do
    major <- parseMajor <* matchSep
    minor <- parseMinor <* matchSep
    patch <- parsePatch
    release <- (matchReleaseSep >> parseRelease) <|> return (Release [])
    metadata <- (matchMetadataSep >> parseMetadata) <|> return []
    return $ SemVer major minor patch release metadata

instance Ord ReleaseItem where
    compare (ReleaseItem (IOSI _)) (ReleaseItem (IOSS _)) = LT
    compare (ReleaseItem (IOSS _)) (ReleaseItem (IOSI _)) = GT
    compare (ReleaseItem (IOSI a)) (ReleaseItem (IOSI b)) = compare a b
    compare (ReleaseItem (IOSS a)) (ReleaseItem (IOSS b)) = compare a b

instance Ord Release where
    compare (Release []) (Release []) = EQ
    compare (Release []) _ = GT
    compare _ (Release []) = LT
    compare (Release a) (Release b) = f a b where
        f [] [] = EQ
        f [] _ = LT
        f _ [] = GT
        f (x:xs) (y:ys) = if x == y then f xs ys else compare x y

instance Ord SemVer where
    compare (SemVer major minor patch release _) (SemVer major' minor' patch' release' _)
        | major /= major' = compare major major'
        | minor /= minor' = compare minor minor'
        | patch /= patch' = compare patch patch'
        | otherwise = compare release release'

In [57]:
parseString parseSemVer mempty "2.1.1"
parseString parseSemVer mempty "2a.1.1"
parseString parseSemVer mempty "1.0.0-x.7.0.92qwer.92.1-23.-123"
parseString parseSemVer mempty "1.0.0-x.07.z.92"
parseString parseSemVer mempty "1.0.0-x.7.z.92+hello.world.0123.-123"
SemVer 2 1 1 (Release []) [] > SemVer 2 1 0 (Release []) []

Success (SemVer 2 1 1 (Release []) [])

Failure (ErrInfo {_errDoc = [1m(interactive)[0m:[1m1[0m:[1m2[0m: [91merror[0m: expected: ".",
    digit
2a.1.1[1m[94m<EOF>[0;1m[0m 
 [92m^[0m          , _errDeltas = [Columns 1 1]})

Success (SemVer 1 0 0 (Release [ReleaseItem (IOSS "x"),ReleaseItem (IOSI 7),ReleaseItem (IOSI 0),ReleaseItem (IOSS "92qwer"),ReleaseItem (IOSI 92),ReleaseItem (IOSS "1-23"),ReleaseItem (IOSS "-123")]) [])

Failure (ErrInfo {_errDoc = [1m(interactive)[0m:[1m1[0m:[1m11[0m: [91merror[0m: unexpected
    zero leading
    integer, expected: digit
1.0.0-x.07.z.92[1m[94m<EOF>[0;1m[0m 
          [92m^[0m          , _errDeltas = [Columns 10 10]})

Success (SemVer 1 0 0 (Release [ReleaseItem (IOSS "x"),ReleaseItem (IOSI 7),ReleaseItem (IOSS "z"),ReleaseItem (IOSI 92)]) ["hello","world","0123","-123"])

True

---

In [58]:
parseDigit :: Parser Char
parseDigit = oneOf "0123456789" <?> "digit"

base10Integer :: Parser Integer
base10Integer = foldr f 0 . zip [0..] . reverse <$> some parseDigit <?> "integer" where
    digit2int '0' = 0
    digit2int '1' = 1
    digit2int '2' = 2
    digit2int '3' = 3
    digit2int '4' = 4
    digit2int '5' = 5
    digit2int '6' = 6
    digit2int '7' = 7
    digit2int '8' = 8
    digit2int '9' = 9
    f (i, c) acc = digit2int c * (10^i) + acc
    
parseString parseDigit mempty "1"
parseString parseDigit mempty "a"
parseString base10Integer mempty "a"
parseString base10Integer mempty "1"
parseString base10Integer mempty "123"
parseString base10Integer mempty "123asdf"
parseString base10Integer mempty "-123asdf"

Success '1'

Failure (ErrInfo {_errDoc = [1m(interactive)[0m:[1m1[0m:[1m1[0m: [91merror[0m: expected: digit
a[1m[94m<EOF>[0;1m[0m 
[92m^[0m      , _errDeltas = [Columns 0 0]})

Failure (ErrInfo {_errDoc = [1m(interactive)[0m:[1m1[0m:[1m1[0m: [91merror[0m: expected: integer
a[1m[94m<EOF>[0;1m[0m 
[92m^[0m      , _errDeltas = [Columns 0 0]})

Success 1

Success 123

Success 123

Failure (ErrInfo {_errDoc = [1m(interactive)[0m:[1m1[0m:[1m1[0m: [91merror[0m: expected: integer
-123asdf[1m[94m<EOF>[0;1m[0m 
[92m^[0m             , _errDeltas = [Columns 0 0]})

In [59]:
base10Integer' :: Parser Integer
base10Integer' = base10Integer <|> (negativeInteger <?> "integer") where
    negativeInteger = (*(-1)) <$> (char '-' >> base10Integer)
    
parseString base10Integer' mempty "a"
parseString base10Integer' mempty "1"
parseString base10Integer' mempty "123"
parseString base10Integer' mempty "123asdf"
parseString base10Integer' mempty "-123asdf"
parseString base10Integer' mempty "-a123asdf"

Failure (ErrInfo {_errDoc = [1m(interactive)[0m:[1m1[0m:[1m1[0m: [91merror[0m: expected: integer
a[1m[94m<EOF>[0;1m[0m 
[92m^[0m      , _errDeltas = [Columns 0 0]})

Success 1

Success 123

Success 123

Success (-123)

Failure (ErrInfo {_errDoc = [1m(interactive)[0m:[1m1[0m:[1m2[0m: [91merror[0m: expected: integer
-a123asdf[1m[94m<EOF>[0;1m[0m 
 [92m^[0m             , _errDeltas = [Columns 1 1]})