Skip to content

Commit

Permalink
New implementation of patterns using globs.
Browse files Browse the repository at this point in the history
Closes gh-18
  • Loading branch information
jaspervdj committed Mar 29, 2011
1 parent e9666f7 commit 8bd45b9
Show file tree
Hide file tree
Showing 3 changed files with 58 additions and 62 deletions.
8 changes: 4 additions & 4 deletions src/Hakyll/Core/Identifier.hs
Expand Up @@ -29,15 +29,15 @@ module Hakyll.Core.Identifier

import Control.Arrow (second)
import Data.Monoid (Monoid)
import System.FilePath (joinPath)
import Data.List (intercalate)

import Data.Binary (Binary)
import GHC.Exts (IsString, fromString)
import Data.Typeable (Typeable)

-- | An identifier used to uniquely identify a value
--
newtype Identifier = Identifier {unIdentifier :: [String]}
newtype Identifier = Identifier {unIdentifier :: String}
deriving (Eq, Ord, Monoid, Binary, Typeable)

instance Show Identifier where
Expand All @@ -49,7 +49,7 @@ instance IsString Identifier where
-- | Parse an identifier from a string
--
parseIdentifier :: String -> Identifier
parseIdentifier = Identifier . filter (not . null) . split'
parseIdentifier = Identifier . intercalate "/" . filter (not . null) . split'
where
split' [] = [[]]
split' str = let (pre, post) = second (drop 1) $ break (== '/') str
Expand All @@ -58,4 +58,4 @@ parseIdentifier = Identifier . filter (not . null) . split'
-- | Convert an identifier to a relative 'FilePath'
--
toFilePath :: Identifier -> FilePath
toFilePath = joinPath . unIdentifier
toFilePath = unIdentifier
91 changes: 41 additions & 50 deletions src/Hakyll/Core/Identifier/Pattern.hs
Expand Up @@ -6,27 +6,22 @@
-- To match more than one identifier, there are different captures that one can
-- use:
--
-- * @*@: matches exactly one element of an identifier;
-- * @*@: matches at most one element of an identifier;
--
-- * @**@: matches one or more elements of an identifier.
--
-- Some examples:
--
-- * @foo\/*@ will match @foo\/bar@ and @foo\/foo@, but not @foo\/bar\/qux@ nor
-- @foo@;
-- * @foo\/*@ will match @foo\/bar@ and @foo\/foo@, but not @foo\/bar\/qux@;
--
-- * @**@ will match any non-empty identifier;
-- * @**@ will match any identifier;
--
-- * @foo\/**@ will match @foo\/bar@ and @foo\/bar\/qux@, but not @bar\/foo@ nor
-- @foo@;
-- * @foo\/**@ will match @foo\/bar@ and @foo\/bar\/qux@, but not @bar\/foo@;
--
-- A small warning: patterns are not globs. Using @foo\/*.markdown@ will not do
-- what you probably intended, as it will only match the file which is literally
-- called @foo\/*.markdown@. Remember that these captures only work on elements
-- of identifiers as a whole; not on parts of these elements.
-- * @foo\/*.html@ will match all HTML files in the @foo\/@ directory.
--
-- Furthermore, the 'match' function allows the user to get access to the
-- elements captured by the capture elements in the pattern.
-- The 'match' function allows the user to get access to the elements captured
-- by the capture elements in the pattern.
--
module Hakyll.Core.Identifier.Pattern
( Pattern
Expand All @@ -39,7 +34,8 @@ module Hakyll.Core.Identifier.Pattern
, fromCaptures
) where

import Data.List (intercalate)
import Data.List (isPrefixOf, inits, tails)
import Control.Arrow ((&&&), (>>>))
import Control.Monad (msum)
import Data.Maybe (isJust)
import Data.Monoid (mempty, mappend)
Expand All @@ -50,40 +46,36 @@ import Hakyll.Core.Identifier

-- | One base element of a pattern
--
data PatternComponent = CaptureOne
data PatternComponent = Capture
| CaptureMany
| Literal String
deriving (Eq)

instance Show PatternComponent where
show CaptureOne = "*"
show CaptureMany = "**"
show (Literal s) = s
deriving (Eq, Show)

-- | Type that allows matching on identifiers
--
newtype Pattern = Pattern {unPattern :: [PatternComponent]}
deriving (Eq)

instance Show Pattern where
show = intercalate "/" . map show . unPattern
deriving (Eq, Show)

instance IsString Pattern where
fromString = parsePattern

-- | Parse a pattern from a string
--
parsePattern :: String -> Pattern
parsePattern = Pattern . map toPattern . unIdentifier . parseIdentifier
parsePattern = Pattern . parse' -- undefined -- Pattern . map toPattern . unIdentifier . parseIdentifier
where
toPattern x | x == "*" = CaptureOne
| x == "**" = CaptureMany
| otherwise = Literal x
parse' str =
let (chunk, rest) = break (`elem` "\\*") str
in case rest of
('\\' : x : xs) -> Literal (chunk ++ [x]) : parse' xs
('*' : '*' : xs) -> Literal chunk : CaptureMany : parse' xs
('*' : xs) -> Literal chunk : Capture : parse' xs
xs -> Literal chunk : Literal xs : []

-- | Match an identifier against a pattern, generating a list of captures
--
match :: Pattern -> Identifier -> Maybe [Identifier]
match (Pattern p) (Identifier i) = fmap (map Identifier) $ match' p i
match p (Identifier i) = fmap (map Identifier) $ match' (unPattern p) i

-- | Check if an identifier matches a pattern
--
Expand All @@ -95,31 +87,30 @@ doesMatch p = isJust . match p
matches :: Pattern -> [Identifier] -> [Identifier]
matches p = filter (doesMatch p)

-- | Split a list at every possible point, generate a list of (init, tail) cases
-- | Split a list at every possible point, generate a list of (init, tail)
-- cases. The result is sorted with inits decreasing in length.
--
splits :: [a] -> [([a], [a])]
splits ls = reverse $ splits' [] ls
where
splits' lx ly = (lx, ly) : case ly of
[] -> []
(y : ys) -> splits' (lx ++ [y]) ys
splits = inits &&& tails >>> uncurry zip >>> reverse

-- | Internal verion of 'match'
--
match' :: [PatternComponent] -> [String] -> Maybe [[String]]
match' :: [PatternComponent] -> String -> Maybe [String]
match' [] [] = Just [] -- An empty match
match' [] _ = Nothing -- No match
match' _ [] = Nothing -- No match
match' (m : ms) (s : ss) = case m of
-- Take one string and one literal, fail on mismatch
Literal l -> if s == l then match' ms ss else Nothing
-- Take one string and one capture
CaptureOne -> fmap ([s] :) $ match' ms ss
-- Take one string, and one or many captures
CaptureMany ->
let take' (i, t) = fmap (i :) $ match' ms t
in msum $ map take' $ splits (s : ss)

match' [] _ = Nothing -- No match
-- match' _ [] = Nothing -- No match
match' (Literal l : ms) str
-- Match the literal against the string
| l `isPrefixOf` str = match' ms $ drop (length l) str
| otherwise = Nothing
match' (Capture : ms) str =
-- Match until the next /
let (chunk, rest) = break (== '/') str
in msum $ [ fmap (i :) (match' ms (t ++ rest)) | (i, t) <- splits chunk ]
match' (CaptureMany : ms) str =
-- Match everything
msum $ [ fmap (i :) (match' ms t) | (i, t) <- splits str ]

-- | Create an identifier from a pattern by filling in the captures with a given
-- string
--
Expand Down Expand Up @@ -152,9 +143,9 @@ fromCaptureString pattern = fromCapture pattern . parseIdentifier
fromCaptures :: Pattern -> [Identifier] -> Identifier
fromCaptures (Pattern []) _ = mempty
fromCaptures (Pattern (m : ms)) [] = case m of
Literal l -> Identifier [l] `mappend` fromCaptures (Pattern ms) []
Literal l -> Identifier l `mappend` fromCaptures (Pattern ms) []
_ -> error $ "Hakyll.Core.Identifier.Pattern.fromCaptures: "
++ "identifier list exhausted"
fromCaptures (Pattern (m : ms)) ids@(i : is) = case m of
Literal l -> Identifier [l] `mappend` fromCaptures (Pattern ms) ids
Literal l -> Identifier l `mappend` fromCaptures (Pattern ms) ids
_ -> i `mappend` fromCaptures (Pattern ms) is
21 changes: 13 additions & 8 deletions tests/Hakyll/Core/Identifier/Tests.hs
Expand Up @@ -11,12 +11,17 @@ import TestSuite.Util

tests :: [Test]
tests = fromAssertions "match"
[ Just ["bar"] @=? match "foo/**" "foo/bar"
, Just ["foo/bar"] @=? match "**" "foo/bar"
, Nothing @=? match "*" "foo/bar"
, Just [] @=? match "foo" "foo"
, Just ["foo"] @=? match "*/bar" "foo/bar"
, Just ["foo/bar"] @=? match "**/qux" "foo/bar/qux"
, Just ["foo/bar", "qux"] @=? match "**/*" "foo/bar/qux"
, Just ["foo", "bar/qux"] @=? match "*/**" "foo/bar/qux"
[ Just ["bar"] @=? match "foo/**" "foo/bar"
, Just ["foo/bar"] @=? match "**" "foo/bar"
, Nothing @=? match "*" "foo/bar"
, Just [] @=? match "foo" "foo"
, Just ["foo"] @=? match "*/bar" "foo/bar"
, Just ["foo/bar"] @=? match "**/qux" "foo/bar/qux"
, Just ["foo/bar", "qux"] @=? match "**/*" "foo/bar/qux"
, Just ["foo", "bar/qux"] @=? match "*/**" "foo/bar/qux"
, Just ["foo"] @=? match "*.html" "foo.html"
, Nothing @=? match "*.html" "foo/bar.html"
, Just ["foo/bar"] @=? match "**.html" "foo/bar.html"
, Just ["foo/bar", "wut"] @=? match "**/qux/*" "foo/bar/qux/wut"
, Just ["lol", "fun/large"] @=? match "*cat/**.jpg" "lolcat/fun/large.jpg"
]

0 comments on commit 8bd45b9

Please sign in to comment.