Skip to content
Browse files

Add a CFG parser.

  • Loading branch information...
1 parent 3d8af0c commit 3bd8f142e3625ca1d2df8ee2ab6c2ee23225dff4 @23Skidoo committed Apr 9, 2013
Showing with 206 additions and 73 deletions.
  1. +14 −24 src/CFG/Examples.hs
  2. +61 −6 src/CFG/Helpers/CFG.hs
  3. +1 −13 src/CFG/Helpers/CNF.hs
  4. +128 −29 src/CFG/Read.hs
  5. +1 −1 src/CFG/Types.hs
  6. +1 −0 src/cfg.cabal
View
38 src/CFG/Examples.hs
@@ -1,33 +1,23 @@
module CFG.Examples (balancedParentheses, sillyGrammar, sillyGrammar2)
where
-import CFG.Helpers.CNF
-import CFG.Types
+import CFG.Types
--- | Example grammar: balanced parenthese.
+import qualified CFG.Helpers.CNF as CNF
+import CFG.Read
+
+fromRight :: Either String a -> a
+fromRight = either error id
+
+-- | Example grammar: balanced parentheses.
balancedParentheses :: CompiledCNFGrammar
-balancedParentheses =
- -- S -> SS | LH | LR
- -- H -> SR
- -- L -> '('
- -- R -> ')'
- listToGrammar
- [ ruleNonTerminal "S" [ ("S","S"), ("L","H"), ("L","R")]
- , ruleNonTerminal "H" [("S","R")]
- , ruleTerminal "L" '('
- , ruleTerminal "R" ')'
- ]
+balancedParentheses = CNF.compileGrammar . fromRight . readCNFGrammar $
+ "S -> SS, S -> LH, S -> LR, H -> SR, L -> (, R -> )"
sillyGrammar :: CompiledCNFGrammar
-sillyGrammar =
- listToGrammar
- [ ruleNonTerminal "S" [("S1", "S2")]
- , ruleTerminal "S1" 'o'
- , ruleTerminal "S2" 'o' ]
+sillyGrammar = CNF.compileGrammar . fromRight . readCNFGrammar $
+ "S -> S1S2, S1 -> o, S2 -> o"
sillyGrammar2 :: CompiledCNFGrammar
-sillyGrammar2 =
- listToGrammar
- [ ruleNonTerminal "S" [("S", "S"), ("S1", "S1")]
- , ruleTerminal "S1" '1'
- , ruleTerminal "S1" '0' ]
+sillyGrammar2 = CNF.compileGrammar . fromRight . readCNFGrammar $
+ "S -> SS, S -> S1S1, S1 -> 1, S1 -> 0"
View
67 src/CFG/Helpers/CFG.hs
@@ -1,14 +1,69 @@
-- | Helpers for working with general context-free grammars.
module CFG.Helpers.CFG (
- -- * Helpers for constructing the grammar.
- ruleTerminal, ruleNonTerminal
+ -- * Basic helpers.
+ ruleName, ruleNumber, isNonTerminalRule, nonTerminalRuleProductions
+ -- * Monad for fresh names.
+ ,NameMonad, runNameMonad, freshName, rememberName
)
where
import CFG.Types
-ruleTerminal :: RuleName -> Char -> NamedCFGRule
-ruleTerminal name prod = CFGTerminalRule name (charToSymbol prod)
+import Control.Monad.State
+import qualified Data.Set as S
-ruleNonTerminal :: RuleName -> [[RuleName]] -> NamedCFGRule
-ruleNonTerminal name prods = CFGNonTerminalRule name prods
+-- Basic helpers
+ruleName :: NamedCFGRule -> RuleName
+ruleName (CFGTerminalRule name _) = name
+ruleName (CFGNonTerminalRule name _) = name
+
+ruleNumber :: NumberedCFGRule -> RuleNumber
+ruleNumber (CFGTerminalRule num _) = num
+ruleNumber (CFGNonTerminalRule num _) = num
+
+isNonTerminalRule :: CFGRule a -> Bool
+isNonTerminalRule (CFGNonTerminalRule _ _) = True
+isNonTerminalRule _ = False
+
+nonTerminalRuleProductions :: CFGRule a -> [[a]]
+nonTerminalRuleProductions (CFGNonTerminalRule _ prods) = prods
+nonTerminalRuleProductions _ = error "Non-terminal rule expected!"
+
+
+-- Monad for generating fresh names.
+data NameState = NameState { nameStateCounter :: !Int
+ , nameStateSet :: !(S.Set RuleName) }
+ deriving Show
+type NameMonad = State NameState
+
+-- | Initial state, for using in conjunction with 'runState'.
+runNameMonad :: S.Set RuleName -> NameMonad a -> a
+runNameMonad s act = fst $ runState act (NameState 0 s)
+
+-- | Generate a fresh name.
+freshName :: NameMonad RuleName
+freshName = do
+ c <- getCounter
+ incCounter
+ let n = "Z" ++ (show c)
+ hasSeen <- nameSetContains n
+ if hasSeen
+ then freshName
+ else do rememberName n
+ return n
+ where
+ getCounter :: NameMonad Int
+ getCounter = gets nameStateCounter
+
+ incCounter :: NameMonad ()
+ incCounter = modify
+ (\s -> s { nameStateCounter = (+1) . nameStateCounter $ s })
+
+ nameSetContains :: RuleName -> NameMonad Bool
+ nameSetContains n = S.member n `fmap` gets nameStateSet
+
+-- | Remember a given name. A remembered name will never be produced by
+-- 'freshName'.
+rememberName :: RuleName -> NameMonad ()
+rememberName n = modify
+ (\s -> s { nameStateSet = S.insert n . nameStateSet $ s})
View
14 src/CFG/Helpers/CNF.hs
@@ -1,8 +1,7 @@
-- | Helpers for working with context-free grammars in Chomsky normal form.
module CFG.Helpers.CNF (
-- * Helpers for constructing the grammar.
- ruleTerminal, ruleNonTerminal
- ,compileGrammar, listToGrammar
+ compileGrammar
-- * Misc. helper functions.
,ruleName, ruleNumber
@@ -16,13 +15,6 @@ import CFG.Types
import qualified Data.Map as M
import Data.Maybe (fromJust)
-
-ruleTerminal :: RuleName -> Char -> NamedCNFRule
-ruleTerminal name prod = CNFTerminalRule name (charToSymbol prod)
-
-ruleNonTerminal :: RuleName -> [(RuleName, RuleName)] -> NamedCNFRule
-ruleNonTerminal name prods = CNFNonTerminalRule name prods
-
compileGrammar :: NamedCNFGrammar -> CompiledCNFGrammar
compileGrammar (CNFGrammar rules start e) =
CNFGrammar (map compileRule rules) (lookupName start) e
@@ -40,10 +32,6 @@ compileGrammar (CNFGrammar rules start e) =
CNFNonTerminalRule (lookupName name)
[(lookupName a, lookupName b) | (a,b) <- prods]
--- NB: this assumes that the grammar doesn't generate the empty string.
-listToGrammar :: [NamedCNFRule] -> CompiledCNFGrammar
-listToGrammar rules = compileGrammar $ CNFGrammar rules "S" False
-
-- Misc. helper functions.
ruleName :: NamedCNFRule -> RuleName
View
157 src/CFG/Read.hs
@@ -1,42 +1,134 @@
module CFG.Read (readCFGrammar, readCNFGrammar)
where
-import CFG.Helpers.CNF
-import CFG.Types
-
-import Control.Applicative hiding (many, (<|>))
-import Control.Monad (unless)
-import Data.Bifunctor (first)
-import Data.Char (isPunctuation)
-import Data.Either ()
-import Data.List (find, groupBy, sortBy)
-import Data.Maybe (isJust)
-import Data.Ord (comparing)
-import Text.Parsec
+import qualified CFG.Helpers.CFG as CFG
+import qualified CFG.Helpers.CNF as CNF
+import CFG.Types
+import Control.Applicative hiding (many, (<|>))
+import Control.Monad (foldM, unless)
+import Data.Bifunctor (first)
+import Data.Either (partitionEithers)
+import Data.List (find, groupBy, partition, sortBy)
+import qualified Data.Map as M
+import Data.Maybe (fromJust, isJust)
+import Data.Ord (comparing)
+import qualified Data.Set as S
+import Data.Tuple (swap)
+import Text.Parsec
+
+type SymbolMap = M.Map Symbol RuleName
+type SymOrName = Either Symbol RuleName
+
+-- | Parse a comma-delimited string representing a context-free grammar
+-- in. Uppercase letters followed by zero or more digits act as nonterminals and
+-- lowercase letters are terminals. The initial nonterminal is always called
+-- S. Empty productions are allowed.
readCFGrammar :: String -> Either String NamedCFGrammar
-readCFGrammar _input = undefined
+readCFGrammar input = do
+ rules <- first show $ parse cfgRulesP "<stdin>" input
+ validateCFGrammar rules
+
+-- | A list of CFG rule names separated by commas.
+cfgRulesP :: Parsec String () [(RuleName, [SymOrName])]
+cfgRulesP = sepBy cfgRuleP (char ',' >> spaces)
+
+-- | A single CFG rule. Sum type representation makes it easier to extract
+-- non-terminals (see 'processRules').
+cfgRuleP :: Parsec String () (RuleName, [SymOrName])
+cfgRuleP = do
+ name <- ruleNameP
+ _ <- spaces >> string "->" >> spaces
+ rhs <- many ruleNameOrTerminalP
+ return $ (name, rhs)
+ where
+ ruleNameOrTerminalP :: Parsec String () (SymOrName)
+ ruleNameOrTerminalP = (Left <$> terminalP) <|> (Right <$> ruleNameP)
+
+-- | Given a bunch of rules, perform various checks and produce a CFGrammar.
+validateCFGrammar :: [(RuleName, [SymOrName])] ->
+ Either String NamedCFGrammar
+validateCFGrammar g = do
+ -- Replace all Lefts with Rights in non-terminal productions. We don't want to
+ -- mix symbols and rule names in non-terminal rules.
+ let (terms, nonterms) = partition isTerm g
+ allNames = S.fromList . map fst $ g
+ allSyms = map snd namedSyms ++ concatMap extractSyms nonterms
+ namedSyms = map (\(nam, [Left sym]) -> (nam, sym)) terms
+
+ allSymsMap = CFG.runNameMonad allNames $
+ foldM bindSym M.empty allSyms
+
+ termRules = map toTermRule namedSyms
+ ++ map (toTermRule . swap) (M.toList allSymsMap)
+ nontermRules = map (toNonTermRule allSymsMap) nonterms
+ allRules = termRules ++ nontermRules
+
+ -- Merge all non-terminal productions with the same name. TODO: remove
+ -- duplication.
+ sorted = sortBy (comparing CFG.ruleName) allRules
+ grouped = groupBy
+ (\r0 r1 ->
+ CFG.isNonTerminalRule r0
+ && CFG.isNonTerminalRule r1
+ && CFG.ruleName r0 == CFG.ruleName r1) sorted
+ merged = foldr mergeProductions [] grouped
+
+ -- Check that the grammar contains the start rule.
+ unless (S.member "S" allNames) $ (Left "No start rule found!")
+
+ return (CFGrammar merged "S")
+ where
+ isTerm :: (RuleName, [SymOrName]) -> Bool
+ isTerm (_,[Left _]) = True
+ isTerm _ = False
+
+ extractSyms :: (RuleName, [SymOrName]) -> [Symbol]
+ extractSyms (_, l) = fst . partitionEithers $ l
+
+ bindSym :: SymbolMap -> Symbol -> CFG.NameMonad SymbolMap
+ bindSym m sym = do n <- CFG.freshName
+ return $ M.insert sym n m
+
+ toTermRule :: (RuleName, Symbol) -> NamedCFGRule
+ toTermRule (nam, sym) = CFGTerminalRule nam sym
+
+ toNonTermRule :: SymbolMap -> (RuleName, [SymOrName]) -> NamedCFGRule
+ toNonTermRule symMap (nam, prods) = CFGNonTerminalRule nam
+ [(map toName prods)]
+ where
+ toName :: SymOrName -> RuleName
+ toName (Left sym) = fromJust . M.lookup sym $ symMap
+ toName (Right n) = n
+
+ mergeProductions :: [NamedCFGRule] -> [NamedCFGRule] -> [NamedCFGRule]
+ mergeProductions [] rest = rest
+ mergeProductions [rule] rest = rule:rest
+ mergeProductions rules rest =
+ let name = CFG.ruleName . head $ rules
+ prods = concatMap CFG.nonTerminalRuleProductions rules
+ in (CFGNonTerminalRule name prods) : rest
+
-- | Parse a comma-delimited string representing a context-free grammar in
-- CNF. Uppercase letters followed by zero or more digits act as nonterminals
-- and lowercase letters are terminals. The initial nonterminal is always called
-- S.
readCNFGrammar :: String -> Either String NamedCNFGrammar
-readCNFGrammar input = do g <- first show $ parse rulesP "<stdin>" input
- validateCNFGrammar g
+readCNFGrammar input = do rules <- first show $ parse cnfRulesP "<stdin>" input
+ validateCNFGrammar rules
--- | A list of rule names separated by commas.
-rulesP :: Parsec String () [NamedCNFRule]
-rulesP = sepBy ruleP (char ',' >> spaces)
+-- | A list of CNF rule names separated by commas.
+cnfRulesP :: Parsec String () [NamedCNFRule]
+cnfRulesP = sepBy cnfRuleP (char ',' >> spaces)
-- | A rule name, followed by '<-', followed either by a terminal (lowercase
-- letter, digit or a punctuation symbol) or by two names of non-terminals.
-ruleP :: Parsec String () NamedCNFRule
-ruleP = do
+cnfRuleP :: Parsec String () NamedCNFRule
+cnfRuleP = do
name <- ruleNameP
_ <- spaces >> string "->" >> spaces
- mTerm <- optionMaybe (charToSymbol <$>
- (lower <|> satisfy isPunctuation <|> digit))
+ mTerm <- optionMaybe terminalP
case mTerm of
Just t -> return $ CNFTerminalRule name t
Nothing -> do
@@ -47,28 +139,35 @@ ruleP = do
ruleNameP :: Parsec String () String
ruleNameP = (:) <$> upper <*> (many digit)
+-- | A terminal: a digit, lowercase letter or a special character.
+terminalP :: Parsec String () Symbol
+terminalP = charToSymbol <$>
+ (lower <|> digit <|> satisfy (flip elem "()+-."))
+
+-- | Given a bunch of CNF rules, perform various checks on them.
validateCNFGrammar :: [NamedCNFRule] -> Either String NamedCNFGrammar
validateCNFGrammar g = do
-- Group productions of non-terminals together (not actually necessary).
- let sorted = sortBy (comparing ruleName) g
- grouped = groupBy (\r0 r1 -> isNonTerminalRule r0
- && isNonTerminalRule r1
- && ruleName r0 == ruleName r1) sorted
+ let sorted = sortBy (comparing CNF.ruleName) g
+ grouped = groupBy (\r0 r1 -> CNF.isNonTerminalRule r0
+ && CNF.isNonTerminalRule r1
+ && CNF.ruleName r0 == CNF.ruleName r1) sorted
merged = foldr mergeProductions [] grouped
-- Check that the start rule exists.
- unless (isJust $ find (\r -> ruleName r == "S") merged) $
+ unless (isJust $ find (\r -> CNF.ruleName r == "S") merged) $
(Left "No start rule found!")
-- TODO: Add more validity checks. Check whether the grammar produces an empty
-- string.
+ -- Check that terminal rules are unique and don't intersect with nonterminals.
return (CNFGrammar merged "S" False)
where
mergeProductions :: [NamedCNFRule] -> [NamedCNFRule] -> [NamedCNFRule]
mergeProductions [] rest = rest
mergeProductions [rule] rest = rule:rest
mergeProductions rules rest =
- let (CNFNonTerminalRule name _) = head rules
- prods = concatMap nonTerminalRuleProductions rules
+ let name = CNF.ruleName . head $ rules
+ prods = concatMap CNF.nonTerminalRuleProductions rules
in (CNFNonTerminalRule name prods) : rest
View
2 src/CFG/Types.hs
@@ -20,7 +20,7 @@ module CFG.Types (
-- Basic types.
newtype Symbol = SymChar Char
- deriving (Show, Eq)
+ deriving (Show, Eq, Ord)
type Symbols = [Symbol]
type RuleName = String
type RuleNumber = Int
View
1 src/cfg.cabal
@@ -25,6 +25,7 @@ library
CFG.Examples
-- other-modules:
build-depends: base < 5,
+ mtl >= 2 && < 3,
array < 0.5,
containers < 0.6,
parsec >= 3 && < 3.2,

0 comments on commit 3bd8f14

Please sign in to comment.
Something went wrong with that request. Please try again.