Permalink
Browse files

Add the CFGrammar type and refactor a bit.

  • Loading branch information...
1 parent 111f320 commit 387be0498ba1f7f4152c8bdb2ec09b8708f7a1ad @23Skidoo committed Apr 8, 2013
Showing with 163 additions and 119 deletions.
  1. +6 −1 src/CFG/CNF.hs
  2. +76 −0 src/CFG/CNFHelpers.hs
  3. +10 −9 src/CFG/Examples.hs
  4. +8 −7 src/CFG/Parse.hs
  5. +27 −23 src/CFG/Read.hs
  6. +33 −77 src/CFG/Types.hs
  7. +1 −0 src/cfg.cabal
  8. +1 −1 src/exe/cykAlgorithm/Main.hs
  9. +1 −1 src/tests/Tests.hs
View
@@ -1,2 +1,7 @@
-module CFG.CNF ()
+module CFG.CNF (convertToCNF)
where
+
+import CFG.Types
+
+convertToCNF :: (Eq a) => CFGrammar a -> CNFGrammar a
+convertToCNF = undefined
View
@@ -0,0 +1,76 @@
+module CFG.CNFHelpers (
+ -- * Helpers for constructing the grammar.
+ ruleTerminal, ruleNonTerminal
+ ,compileGrammar, listToGrammar
+
+ -- * Misc. helper functions.
+ ,ruleName, ruleNumber
+ ,isTerminalRule, isNonTerminalRule, isStartRule
+ ,terminalRuleProduces, nonTerminalRuleProductions
+ )
+ where
+
+import CFG.Types
+
+import qualified Data.Map as M
+import Data.Maybe (fromJust)
+
+-- Helpers for constructing the grammar.
+
+ruleTerminal :: RuleName -> Char -> NamedCNFRule
+ruleTerminal name prod = CNFTerminalRule name (charToSymbol prod)
+
+ruleNonTerminal :: RuleName -> [(RuleName, RuleName)] -> NamedCNFRule
+ruleNonTerminal name prods = CNFNonTerminalRule name prods
+
+compileGrammar :: NamedCNFGrammar -> CompiledCNFGrammar
+compileGrammar (CNFGrammar rules start e) =
+ CNFGrammar (map compileRule rules) (lookupName start) e
+ where
+ idxMap :: M.Map RuleName RuleNumber
+ idxMap = M.fromList (zip (map ruleName rules) [1..])
+
+ lookupName :: RuleName -> RuleNumber
+ lookupName k = fromJust $ M.lookup k idxMap
+
+ compileRule :: NamedCNFRule -> NumberedCNFRule
+ compileRule (CNFTerminalRule name symbol) =
+ CNFTerminalRule (lookupName name) symbol
+ compileRule (CNFNonTerminalRule name prods) =
+ CNFNonTerminalRule (lookupName name)
+ [(lookupName a, lookupName b) | (a,b) <- prods]
+
+-- NB: this assumes that the grammar doesn't generate the empty string.
+listToGrammar :: [NamedCNFRule] -> CompiledCNFGrammar
+listToGrammar rules = compileGrammar $ CNFGrammar rules "S" False
+
+-- Misc. helper functions.
+
+ruleName :: NamedCNFRule -> RuleName
+ruleName (CNFTerminalRule name _) = name
+ruleName (CNFNonTerminalRule name _) = name
+
+ruleNumber :: NumberedCNFRule -> RuleNumber
+ruleNumber (CNFTerminalRule num _) = num
+ruleNumber (CNFNonTerminalRule num _) = num
+
+isTerminalRule :: CNFRule a -> Bool
+isTerminalRule (CNFTerminalRule _ _) = True
+isTerminalRule _ = False
+
+isNonTerminalRule :: CNFRule a -> Bool
+isNonTerminalRule (CNFNonTerminalRule _ _) = True
+isNonTerminalRule _ = False
+
+isStartRule :: (Eq a) => CNFGrammar a -> CNFRule a -> Bool
+isStartRule g (CNFNonTerminalRule name _) | cnfStartRule g == name = True
+ | otherwise = False
+isStartRule _ (CNFTerminalRule _ _) = False
+
+terminalRuleProduces :: CNFRule a -> Symbol -> Bool
+terminalRuleProduces (CNFTerminalRule _ s) s' = (s == s')
+terminalRuleProduces _ _ = error "Terminal rule expected!"
+
+nonTerminalRuleProductions :: CNFRule a -> [(a, a)]
+nonTerminalRuleProductions (CNFNonTerminalRule _ prods) = prods
+nonTerminalRuleProductions _ = error "Non-terminal rule expected!"
View
@@ -2,31 +2,32 @@ module CFG.Examples (balancedParentheses, sillyGrammar, sillyGrammar2)
where
import CFG.Types
+import CFG.CNFHelpers
-- | Example grammar: balanced parenthese.
-balancedParentheses :: CNFGrammar
+balancedParentheses :: CompiledCNFGrammar
balancedParentheses =
-- S -> SS | LH | LR
-- H -> SR
-- L -> '('
-- R -> ')'
- compileGrammar
- [ ruleStart "S" [ ("S","S"), ("L","H"), ("L","R")]
+ listToGrammar
+ [ ruleNonTerminal "S" [ ("S","S"), ("L","H"), ("L","R")]
, ruleNonTerminal "H" [("S","R")]
, ruleTerminal "L" '('
, ruleTerminal "R" ')'
]
-sillyGrammar :: CNFGrammar
+sillyGrammar :: CompiledCNFGrammar
sillyGrammar =
- compileGrammar
- [ ruleStart "S" [("S1", "S2")]
+ listToGrammar
+ [ ruleNonTerminal "S" [("S1", "S2")]
, ruleTerminal "S1" 'o'
, ruleTerminal "S2" 'o' ]
-sillyGrammar2 :: CNFGrammar
+sillyGrammar2 :: CompiledCNFGrammar
sillyGrammar2 =
- compileGrammar
- [ ruleStart "S" [("S", "S"), ("S1", "S1")]
+ listToGrammar
+ [ ruleNonTerminal "S" [("S", "S"), ("S1", "S1")]
, ruleTerminal "S1" '1'
, ruleTerminal "S1" '0' ]
View
@@ -8,32 +8,33 @@ import Data.Array.MArray
import Data.Array.ST
import CFG.Types
+import CFG.CNFHelpers
-- The CYK algorithm.
-cykAlgorithm :: CNFGrammar -> String -> Bool
+cykAlgorithm :: CompiledCNFGrammar -> String -> Bool
cykAlgorithm grammar input' = if n == 0
- -- TODO: Decide emptiness.
- then error "empty string"
+ then cnfProducesEmpty grammar
else or [arr ! (1,n,x) | x <- startIndices]
where
n = length input
- r = length grammar
- startIndices = map ruleNumber . filter isStartRule $ grammar
+ r = length rules
+ rules = cnfRules grammar
+ startIndices = map ruleNumber . filter (isStartRule grammar) $ rules
input = stringToSymbols input'
arr = runSTUArray $ do
marr <- newArray ((1,1,1),(n,n,r)) False
forM_ (zip [1..] input) $ \(i, ci) ->
- forM_ (filter isTerminalRule grammar) $ \rule -> do
+ forM_ (filter isTerminalRule rules) $ \rule -> do
let j = ruleNumber rule
when (terminalRuleProduces rule ci) $
writeArray marr (i,1,j) True
forM_ [2..n] $ \i ->
forM_ [1..(n-i+1)] $ \j ->
forM_ [1..(i-1)] $ \k ->
- forM_ (filter isNonTerminalRule grammar) $ \rule -> do
+ forM_ (filter isNonTerminalRule rules) $ \rule -> do
let a = ruleNumber rule
forM_ (nonTerminalRuleProductions rule) $ \(b,c) -> do
e0 <- readArray marr (j,k,b)
View
@@ -1,24 +1,29 @@
-module CFG.Read (readCNFGrammar)
+module CFG.Read (readCFGrammar, readCNFGrammar)
where
import CFG.Types
+import CFG.CNFHelpers
import Control.Applicative hiding (many, (<|>))
-import Control.Monad (when)
+import Control.Monad (unless)
import Data.Bifunctor (first)
import Data.Char (isPunctuation)
import Data.Either ()
-import Data.List (groupBy, sortBy)
+import Data.List (find, groupBy, sortBy)
+import Data.Maybe (isJust)
import Data.Ord (comparing)
import Text.Parsec
+readCFGrammar :: String -> Either String NamedCFGrammar
+readCFGrammar _input = undefined
+
-- | Parse a comma-delimited string representing a context-free grammar in
-- CNF. Uppercase letters followed by zero or more digits act as nonterminals
-- and lowercase letters are terminals. The initial nonterminal is always called
-- S.
readCNFGrammar :: String -> Either String NamedCNFGrammar
readCNFGrammar input = do g <- first show $ parse rulesP "<stdin>" input
- validateGrammar g
+ validateCNFGrammar g
-- | A list of rule names separated by commas.
rulesP :: Parsec String () [NamedCNFRule]
@@ -33,38 +38,37 @@ ruleP = do
mTerm <- optionMaybe (charToSymbol <$>
(lower <|> satisfy isPunctuation <|> digit))
case mTerm of
- Just t -> return $ TerminalRule name t
+ Just t -> return $ CNFTerminalRule name t
Nothing -> do
rhs <- pure (,) <*> ruleNameP <*> ruleNameP
- let isStart = if name == "S" then StartRule else NormalRule
- return $ NonTerminalRule name [rhs] isStart
+ return $ CNFNonTerminalRule name [rhs]
--- | A rule name: an upper-case letter followed by zero or more lower-case
--- letters.
+-- | A rule name: an upper-case letter followed by zero or more digits.
ruleNameP :: Parsec String () String
ruleNameP = (:) <$> upper <*> (many digit)
-validateGrammar :: NamedCNFGrammar -> Either String NamedCNFGrammar
-validateGrammar g = do
+validateCNFGrammar :: [NamedCNFRule] -> Either String NamedCNFGrammar
+validateCNFGrammar g = do
-- Group productions of non-terminals together (not actually necessary).
let sorted = sortBy (comparing ruleName) g
grouped = groupBy (\r0 r1 -> isNonTerminalRule r0
&& isNonTerminalRule r1
&& ruleName r0 == ruleName r1) sorted
- merged = foldr merge [] grouped
- numStartRules = length . filter isStartRule $ merged
+ merged = foldr mergeProductions [] grouped
- -- Check that there is only one start rule.
- when (numStartRules > 1) $ (Left "More than one start rule!")
+ -- Check that the start rule exists.
+ unless (isJust $ find (\r -> ruleName r == "S") merged) $
+ (Left "No start rule found!")
- -- TODO: Add more validity checks.
- return merged
+ -- TODO: Add more validity checks. Check whether the grammar produces an empty
+ -- string.
+ return (CNFGrammar merged "S" False)
where
- merge :: [NamedCNFRule] -> [NamedCNFRule] -> [NamedCNFRule]
- merge [] rest = rest
- merge [rule] rest = rule:rest
- merge rules rest =
- let (NonTerminalRule name _ isStart) = head rules
+ mergeProductions :: [NamedCNFRule] -> [NamedCNFRule] -> [NamedCNFRule]
+ mergeProductions [] rest = rest
+ mergeProductions [rule] rest = rule:rest
+ mergeProductions rules rest =
+ let (CNFNonTerminalRule name _) = head rules
prods = concatMap nonTerminalRuleProductions rules
- in (NonTerminalRule name prods isStart) : rest
+ in (CNFNonTerminalRule name prods) : rest
View
@@ -3,105 +3,61 @@ module CFG.Types (
-- * Types.
Symbol
,RuleName, RuleNumber
- ,IsStartRule(..)
- ,CNFRule(..)
- ,NamedCNFRule, NumberedCNFRule
- ,NamedCNFGrammar, CNFGrammar
- -- * Helpers for constructing the grammar.
- ,ruleTerminal, ruleNonTerminal, ruleStart
- ,compileGrammar
+ ,CFGRule(..), CFGrammar(..)
+ ,NamedCFGRule, NumberedCFGRule
+ ,NamedCFGrammar, CompiledCFGrammar
+
+ ,CNFRule(..), CNFGrammar(..)
+ ,NamedCNFRule, NumberedCNFRule
+ ,NamedCNFGrammar, CompiledCNFGrammar
-- * Misc. helper functions.
- ,ruleName, ruleNumber
- ,isTerminalRule, isNonTerminalRule, isStartRule
- ,terminalRuleProduces, nonTerminalRuleProductions
,charToSymbol, stringToSymbols, symbolsToString
)
- where
-
-import qualified Data.Map as M
-import Data.Maybe (fromJust)
-
+ where
--- Grammar ADT definition.
+-- Basic types.
newtype Symbol = SymChar Char
deriving (Show, Eq)
type Symbols = [Symbol]
type RuleName = String
type RuleNumber = Int
-data IsStartRule = StartRule | NormalRule
- deriving (Eq,Show)
-
-data CNFRule a = TerminalRule !a !Symbol
- | NonTerminalRule !a ![(a, a)] !IsStartRule
+-- General CFG. A rule that generates the empty string is represented as
+-- @CFGNonTerminalRule "RuleName" [[]]@
+data CFGRule a = CFGTerminalRule !a !Symbol
+ | CFGNonTerminalRule !a ![[a]]
deriving (Eq, Show)
-type NamedCNFRule = CNFRule RuleName
-type NumberedCNFRule = CNFRule RuleNumber
-
-type NamedCNFGrammar = [NamedCNFRule]
-type CNFGrammar = [NumberedCNFRule]
-
--- Helpers for constructing the grammar.
-
-ruleTerminal :: RuleName -> Char -> NamedCNFRule
-ruleTerminal name prod = TerminalRule name (SymChar prod)
+type NamedCFGRule = CFGRule RuleName
+type NumberedCFGRule = CFGRule RuleNumber
-ruleNonTerminal :: RuleName -> [(RuleName, RuleName)] -> NamedCNFRule
-ruleNonTerminal name prods = NonTerminalRule name prods NormalRule
+data CFGrammar a = CFGrammar { cfgRules :: [CFGRule a]
+ , cfgStartRule :: a
+ } deriving Show
-ruleStart :: RuleName -> [(RuleName, RuleName)] -> NamedCNFRule
-ruleStart name prods = NonTerminalRule name prods StartRule
+type NamedCFGrammar = CFGrammar RuleName
+type CompiledCFGrammar = CFGrammar RuleNumber
-compileGrammar :: NamedCNFGrammar -> CNFGrammar
-compileGrammar grammar = map compileRule grammar
- where
- idxMap :: M.Map RuleName RuleNumber
- idxMap = M.fromList (zip (map ruleName grammar) [1..])
-
- lookupName :: RuleName -> RuleNumber
- lookupName k = fromJust $ M.lookup k idxMap
-
- compileRule :: NamedCNFRule -> NumberedCNFRule
- compileRule (TerminalRule name symbol) =
- TerminalRule (lookupName name) symbol
- compileRule (NonTerminalRule name prods isStart) =
- NonTerminalRule (lookupName name)
- [(lookupName a, lookupName b) | (a,b) <- prods] isStart
-
--- Misc. helper functions.
-
-ruleName :: NamedCNFRule -> RuleName
-ruleName (TerminalRule name _) = name
-ruleName (NonTerminalRule name _ _) = name
-
-ruleNumber :: NumberedCNFRule -> RuleNumber
-ruleNumber (TerminalRule num _) = num
-ruleNumber (NonTerminalRule num _ _) = num
-
-isTerminalRule :: CNFRule a -> Bool
-isTerminalRule (TerminalRule _ _) = True
-isTerminalRule _ = False
-
-isNonTerminalRule :: CNFRule a -> Bool
-isNonTerminalRule (NonTerminalRule _ _ _) = True
-isNonTerminalRule _ = False
+-- A context-free grammar in CNF form.
+data CNFRule a = CNFTerminalRule !a !Symbol
+ | CNFNonTerminalRule !a ![(a, a)]
+ deriving (Eq, Show)
-isStartRule :: CNFRule a -> Bool
-isStartRule (NonTerminalRule _ _ StartRule) = True
-isStartRule _ = False
+type NamedCNFRule = CNFRule RuleName
+type NumberedCNFRule = CNFRule RuleNumber
-terminalRuleProduces :: CNFRule a -> Symbol -> Bool
-terminalRuleProduces (TerminalRule _ s) s' = (s == s')
-terminalRuleProduces _ _ = error "Terminal rule expected!"
+data CNFGrammar a = CNFGrammar { cnfRules :: [CNFRule a]
+ , cnfStartRule :: a
+ , cnfProducesEmpty :: Bool
+ } deriving Show
-nonTerminalRuleProductions :: CNFRule a -> [(a, a)]
-nonTerminalRuleProductions (NonTerminalRule _ prods _) = prods
-nonTerminalRuleProductions _ = error "Non-terminal rule expected!"
+type NamedCNFGrammar = CNFGrammar RuleName
+type CompiledCNFGrammar = CNFGrammar RuleNumber
+-- Helpers for working with the 'Symbol' type.
charToSymbol :: Char -> Symbol
charToSymbol = SymChar
Oops, something went wrong.

0 comments on commit 387be04

Please sign in to comment.