Skip to content

Commit

Permalink
Add a CFG parser.
Browse files Browse the repository at this point in the history
  • Loading branch information
23Skidoo committed Apr 9, 2013
1 parent 3d8af0c commit 3bd8f14
Show file tree
Hide file tree
Showing 6 changed files with 206 additions and 73 deletions.
38 changes: 14 additions & 24 deletions src/CFG/Examples.hs
@@ -1,33 +1,23 @@
module CFG.Examples (balancedParentheses, sillyGrammar, sillyGrammar2)
where

import CFG.Helpers.CNF
import CFG.Types
import CFG.Types

-- | Example grammar: balanced parenthese.
import qualified CFG.Helpers.CNF as CNF
import CFG.Read

fromRight :: Either String a -> a
fromRight = either error id

-- | Example grammar: balanced parentheses.
balancedParentheses :: CompiledCNFGrammar
balancedParentheses =
-- S -> SS | LH | LR
-- H -> SR
-- L -> '('
-- R -> ')'
listToGrammar
[ ruleNonTerminal "S" [ ("S","S"), ("L","H"), ("L","R")]
, ruleNonTerminal "H" [("S","R")]
, ruleTerminal "L" '('
, ruleTerminal "R" ')'
]
balancedParentheses = CNF.compileGrammar . fromRight . readCNFGrammar $
"S -> SS, S -> LH, S -> LR, H -> SR, L -> (, R -> )"

sillyGrammar :: CompiledCNFGrammar
sillyGrammar =
listToGrammar
[ ruleNonTerminal "S" [("S1", "S2")]
, ruleTerminal "S1" 'o'
, ruleTerminal "S2" 'o' ]
sillyGrammar = CNF.compileGrammar . fromRight . readCNFGrammar $
"S -> S1S2, S1 -> o, S2 -> o"

sillyGrammar2 :: CompiledCNFGrammar
sillyGrammar2 =
listToGrammar
[ ruleNonTerminal "S" [("S", "S"), ("S1", "S1")]
, ruleTerminal "S1" '1'
, ruleTerminal "S1" '0' ]
sillyGrammar2 = CNF.compileGrammar . fromRight . readCNFGrammar $
"S -> SS, S -> S1S1, S1 -> 1, S1 -> 0"
67 changes: 61 additions & 6 deletions src/CFG/Helpers/CFG.hs
@@ -1,14 +1,69 @@
-- | Helpers for working with general context-free grammars.
module CFG.Helpers.CFG (
-- * Helpers for constructing the grammar.
ruleTerminal, ruleNonTerminal
-- * Basic helpers.
ruleName, ruleNumber, isNonTerminalRule, nonTerminalRuleProductions
-- * Monad for fresh names.
,NameMonad, runNameMonad, freshName, rememberName
)
where

import CFG.Types

ruleTerminal :: RuleName -> Char -> NamedCFGRule
ruleTerminal name prod = CFGTerminalRule name (charToSymbol prod)
import Control.Monad.State
import qualified Data.Set as S

ruleNonTerminal :: RuleName -> [[RuleName]] -> NamedCFGRule
ruleNonTerminal name prods = CFGNonTerminalRule name prods
-- Basic helpers
ruleName :: NamedCFGRule -> RuleName
ruleName (CFGTerminalRule name _) = name
ruleName (CFGNonTerminalRule name _) = name

ruleNumber :: NumberedCFGRule -> RuleNumber
ruleNumber (CFGTerminalRule num _) = num
ruleNumber (CFGNonTerminalRule num _) = num

isNonTerminalRule :: CFGRule a -> Bool
isNonTerminalRule (CFGNonTerminalRule _ _) = True
isNonTerminalRule _ = False

nonTerminalRuleProductions :: CFGRule a -> [[a]]
nonTerminalRuleProductions (CFGNonTerminalRule _ prods) = prods
nonTerminalRuleProductions _ = error "Non-terminal rule expected!"


-- Monad for generating fresh names.
data NameState = NameState { nameStateCounter :: !Int
, nameStateSet :: !(S.Set RuleName) }
deriving Show
type NameMonad = State NameState

-- | Initial state, for using in conjunction with 'runState'.
runNameMonad :: S.Set RuleName -> NameMonad a -> a
runNameMonad s act = fst $ runState act (NameState 0 s)

-- | Generate a fresh name.
freshName :: NameMonad RuleName
freshName = do
c <- getCounter
incCounter
let n = "Z" ++ (show c)
hasSeen <- nameSetContains n
if hasSeen
then freshName
else do rememberName n
return n
where
getCounter :: NameMonad Int
getCounter = gets nameStateCounter

incCounter :: NameMonad ()
incCounter = modify
(\s -> s { nameStateCounter = (+1) . nameStateCounter $ s })

nameSetContains :: RuleName -> NameMonad Bool
nameSetContains n = S.member n `fmap` gets nameStateSet

-- | Remember a given name. A remembered name will never be produced by
-- 'freshName'.
rememberName :: RuleName -> NameMonad ()
rememberName n = modify
(\s -> s { nameStateSet = S.insert n . nameStateSet $ s})
14 changes: 1 addition & 13 deletions src/CFG/Helpers/CNF.hs
@@ -1,8 +1,7 @@
-- | Helpers for working with context-free grammars in Chomsky normal form.
module CFG.Helpers.CNF (
-- * Helpers for constructing the grammar.
ruleTerminal, ruleNonTerminal
,compileGrammar, listToGrammar
compileGrammar

-- * Misc. helper functions.
,ruleName, ruleNumber
Expand All @@ -16,13 +15,6 @@ import CFG.Types
import qualified Data.Map as M
import Data.Maybe (fromJust)


ruleTerminal :: RuleName -> Char -> NamedCNFRule
ruleTerminal name prod = CNFTerminalRule name (charToSymbol prod)

ruleNonTerminal :: RuleName -> [(RuleName, RuleName)] -> NamedCNFRule
ruleNonTerminal name prods = CNFNonTerminalRule name prods

compileGrammar :: NamedCNFGrammar -> CompiledCNFGrammar
compileGrammar (CNFGrammar rules start e) =
CNFGrammar (map compileRule rules) (lookupName start) e
Expand All @@ -40,10 +32,6 @@ compileGrammar (CNFGrammar rules start e) =
CNFNonTerminalRule (lookupName name)
[(lookupName a, lookupName b) | (a,b) <- prods]

-- NB: this assumes that the grammar doesn't generate the empty string.
listToGrammar :: [NamedCNFRule] -> CompiledCNFGrammar
listToGrammar rules = compileGrammar $ CNFGrammar rules "S" False

-- Misc. helper functions.

ruleName :: NamedCNFRule -> RuleName
Expand Down
157 changes: 128 additions & 29 deletions src/CFG/Read.hs
@@ -1,42 +1,134 @@
module CFG.Read (readCFGrammar, readCNFGrammar)
where

import CFG.Helpers.CNF
import CFG.Types

import Control.Applicative hiding (many, (<|>))
import Control.Monad (unless)
import Data.Bifunctor (first)
import Data.Char (isPunctuation)
import Data.Either ()
import Data.List (find, groupBy, sortBy)
import Data.Maybe (isJust)
import Data.Ord (comparing)
import Text.Parsec
import qualified CFG.Helpers.CFG as CFG
import qualified CFG.Helpers.CNF as CNF
import CFG.Types

import Control.Applicative hiding (many, (<|>))
import Control.Monad (foldM, unless)
import Data.Bifunctor (first)
import Data.Either (partitionEithers)
import Data.List (find, groupBy, partition, sortBy)
import qualified Data.Map as M
import Data.Maybe (fromJust, isJust)
import Data.Ord (comparing)
import qualified Data.Set as S
import Data.Tuple (swap)
import Text.Parsec

type SymbolMap = M.Map Symbol RuleName
type SymOrName = Either Symbol RuleName

-- | Parse a comma-delimited string representing a context-free grammar
-- in. Uppercase letters followed by zero or more digits act as nonterminals and
-- lowercase letters are terminals. The initial nonterminal is always called
-- S. Empty productions are allowed.
readCFGrammar :: String -> Either String NamedCFGrammar
readCFGrammar _input = undefined
readCFGrammar input = do
rules <- first show $ parse cfgRulesP "<stdin>" input
validateCFGrammar rules

-- | A list of CFG rule names separated by commas.
cfgRulesP :: Parsec String () [(RuleName, [SymOrName])]
cfgRulesP = sepBy cfgRuleP (char ',' >> spaces)

-- | A single CFG rule. Sum type representation makes it easier to extract
-- non-terminals (see 'processRules').
cfgRuleP :: Parsec String () (RuleName, [SymOrName])
cfgRuleP = do
name <- ruleNameP
_ <- spaces >> string "->" >> spaces
rhs <- many ruleNameOrTerminalP
return $ (name, rhs)
where
ruleNameOrTerminalP :: Parsec String () (SymOrName)
ruleNameOrTerminalP = (Left <$> terminalP) <|> (Right <$> ruleNameP)

-- | Given a bunch of rules, perform various checks and produce a CFGrammar.
validateCFGrammar :: [(RuleName, [SymOrName])] ->
Either String NamedCFGrammar
validateCFGrammar g = do
-- Replace all Lefts with Rights in non-terminal productions. We don't want to
-- mix symbols and rule names in non-terminal rules.
let (terms, nonterms) = partition isTerm g
allNames = S.fromList . map fst $ g
allSyms = map snd namedSyms ++ concatMap extractSyms nonterms
namedSyms = map (\(nam, [Left sym]) -> (nam, sym)) terms

allSymsMap = CFG.runNameMonad allNames $
foldM bindSym M.empty allSyms

termRules = map toTermRule namedSyms
++ map (toTermRule . swap) (M.toList allSymsMap)
nontermRules = map (toNonTermRule allSymsMap) nonterms
allRules = termRules ++ nontermRules

-- Merge all non-terminal productions with the same name. TODO: remove
-- duplication.
sorted = sortBy (comparing CFG.ruleName) allRules
grouped = groupBy
(\r0 r1 ->
CFG.isNonTerminalRule r0
&& CFG.isNonTerminalRule r1
&& CFG.ruleName r0 == CFG.ruleName r1) sorted
merged = foldr mergeProductions [] grouped

-- Check that the grammar contains the start rule.
unless (S.member "S" allNames) $ (Left "No start rule found!")

return (CFGrammar merged "S")
where
isTerm :: (RuleName, [SymOrName]) -> Bool
isTerm (_,[Left _]) = True
isTerm _ = False

extractSyms :: (RuleName, [SymOrName]) -> [Symbol]
extractSyms (_, l) = fst . partitionEithers $ l

bindSym :: SymbolMap -> Symbol -> CFG.NameMonad SymbolMap
bindSym m sym = do n <- CFG.freshName
return $ M.insert sym n m

toTermRule :: (RuleName, Symbol) -> NamedCFGRule
toTermRule (nam, sym) = CFGTerminalRule nam sym

toNonTermRule :: SymbolMap -> (RuleName, [SymOrName]) -> NamedCFGRule
toNonTermRule symMap (nam, prods) = CFGNonTerminalRule nam
[(map toName prods)]
where
toName :: SymOrName -> RuleName
toName (Left sym) = fromJust . M.lookup sym $ symMap
toName (Right n) = n

mergeProductions :: [NamedCFGRule] -> [NamedCFGRule] -> [NamedCFGRule]
mergeProductions [] rest = rest
mergeProductions [rule] rest = rule:rest
mergeProductions rules rest =
let name = CFG.ruleName . head $ rules
prods = concatMap CFG.nonTerminalRuleProductions rules
in (CFGNonTerminalRule name prods) : rest


-- | Parse a comma-delimited string representing a context-free grammar in
-- CNF. Uppercase letters followed by zero or more digits act as nonterminals
-- and lowercase letters are terminals. The initial nonterminal is always called
-- S.
readCNFGrammar :: String -> Either String NamedCNFGrammar
readCNFGrammar input = do g <- first show $ parse rulesP "<stdin>" input
validateCNFGrammar g
readCNFGrammar input = do rules <- first show $ parse cnfRulesP "<stdin>" input
validateCNFGrammar rules

-- | A list of rule names separated by commas.
rulesP :: Parsec String () [NamedCNFRule]
rulesP = sepBy ruleP (char ',' >> spaces)
-- | A list of CNF rule names separated by commas.
cnfRulesP :: Parsec String () [NamedCNFRule]
cnfRulesP = sepBy cnfRuleP (char ',' >> spaces)

-- | A rule name, followed by '<-', followed either by a terminal (lowercase
-- letter, digit or a punctuation symbol) or by two names of non-terminals.
ruleP :: Parsec String () NamedCNFRule
ruleP = do
cnfRuleP :: Parsec String () NamedCNFRule
cnfRuleP = do
name <- ruleNameP
_ <- spaces >> string "->" >> spaces
mTerm <- optionMaybe (charToSymbol <$>
(lower <|> satisfy isPunctuation <|> digit))
mTerm <- optionMaybe terminalP
case mTerm of
Just t -> return $ CNFTerminalRule name t
Nothing -> do
Expand All @@ -47,28 +139,35 @@ ruleP = do
ruleNameP :: Parsec String () String
ruleNameP = (:) <$> upper <*> (many digit)

-- | A terminal: a digit, lowercase letter or a special character.
terminalP :: Parsec String () Symbol
terminalP = charToSymbol <$>
(lower <|> digit <|> satisfy (flip elem "()+-."))

-- | Given a bunch of CNF rules, perform various checks on them.
validateCNFGrammar :: [NamedCNFRule] -> Either String NamedCNFGrammar
validateCNFGrammar g = do
-- Group productions of non-terminals together (not actually necessary).
let sorted = sortBy (comparing ruleName) g
grouped = groupBy (\r0 r1 -> isNonTerminalRule r0
&& isNonTerminalRule r1
&& ruleName r0 == ruleName r1) sorted
let sorted = sortBy (comparing CNF.ruleName) g
grouped = groupBy (\r0 r1 -> CNF.isNonTerminalRule r0
&& CNF.isNonTerminalRule r1
&& CNF.ruleName r0 == CNF.ruleName r1) sorted
merged = foldr mergeProductions [] grouped

-- Check that the start rule exists.
unless (isJust $ find (\r -> ruleName r == "S") merged) $
unless (isJust $ find (\r -> CNF.ruleName r == "S") merged) $
(Left "No start rule found!")

-- TODO: Add more validity checks. Check whether the grammar produces an empty
-- string.
-- Check that terminal rules are unique and don't intersect with nonterminals.
return (CNFGrammar merged "S" False)

where
mergeProductions :: [NamedCNFRule] -> [NamedCNFRule] -> [NamedCNFRule]
mergeProductions [] rest = rest
mergeProductions [rule] rest = rule:rest
mergeProductions rules rest =
let (CNFNonTerminalRule name _) = head rules
prods = concatMap nonTerminalRuleProductions rules
let name = CNF.ruleName . head $ rules
prods = concatMap CNF.nonTerminalRuleProductions rules
in (CNFNonTerminalRule name prods) : rest
2 changes: 1 addition & 1 deletion src/CFG/Types.hs
Expand Up @@ -20,7 +20,7 @@ module CFG.Types (
-- Basic types.

newtype Symbol = SymChar Char
deriving (Show, Eq)
deriving (Show, Eq, Ord)
type Symbols = [Symbol]
type RuleName = String
type RuleNumber = Int
Expand Down
1 change: 1 addition & 0 deletions src/cfg.cabal
Expand Up @@ -25,6 +25,7 @@ library
CFG.Examples
-- other-modules:
build-depends: base < 5,
mtl >= 2 && < 3,
array < 0.5,
containers < 0.6,
parsec >= 3 && < 3.2,
Expand Down

0 comments on commit 3bd8f14

Please sign in to comment.