|
| 1 | +module Main where |
| 2 | + |
| 3 | +import qualified Text.BibTeX.Format as Format |
| 4 | +import qualified Text.BibTeX.Entry as Entry |
| 5 | +import qualified Text.LaTeX.Character as LaTeX |
| 6 | + |
| 7 | +import qualified Distribution.PackageDescription.Parse as PkgP |
| 8 | +import qualified Distribution.PackageDescription as PkgD |
| 9 | +import qualified Distribution.Package as Pkg |
| 10 | +import qualified Distribution.Verbosity as Verbosity |
| 11 | +import Distribution.PackageDescription |
| 12 | + (PackageDescription, ) |
| 13 | +import Distribution.Package |
| 14 | + (PackageIdentifier(..), ) |
| 15 | +import Distribution.PackageDescription.Parse |
| 16 | + (parsePackageDescription, |
| 17 | + readPackageDescription, ) |
| 18 | +import System.Time (ClockTime(TOD), getClockTime, |
| 19 | + toCalendarTime, toUTCTime, |
| 20 | + CalendarTime, ctYear, ctMonth, ) |
| 21 | + |
| 22 | +import qualified Codec.Archive.Tar as Tar |
| 23 | +import qualified Codec.Archive.Tar.Entry as TarEnt |
| 24 | +import qualified Data.ByteString.Lazy.UTF8 as UTF8 |
| 25 | +import qualified Data.ByteString.Lazy as B |
| 26 | +import qualified System.IO as IO |
| 27 | + |
| 28 | +import Distribution.Text (display, ) |
| 29 | + |
| 30 | +import Data.String.HT (trim, ) |
| 31 | +import Data.Tuple.HT (mapFst, ) |
| 32 | +import Data.List.HT (switchL, switchR, ) |
| 33 | +import Data.Char (toLower, isSpace, isAlpha, chr, ) |
| 34 | +import Data.Version (showVersion, ) |
| 35 | +import qualified Data.List as List |
| 36 | + |
| 37 | + |
| 38 | +{- | |
| 39 | +See hackage-server:Distribution/Server/Pages/Package.hs |
| 40 | +-} |
| 41 | +packageURL :: PackageIdentifier -> String |
| 42 | +packageURL pkgid = "/package/" ++ display pkgid |
| 43 | + |
| 44 | + |
| 45 | +{- | |
| 46 | +Filter out parts in parentheses and e-mails addresses |
| 47 | +-} |
| 48 | +removeAnnotations :: String -> String |
| 49 | +removeAnnotations "" = "" |
| 50 | +removeAnnotations (c:cs) = |
| 51 | + case c of |
| 52 | + '(' -> removeAnnotations $ drop 1 $ dropWhile (')'/=) cs |
| 53 | + '<' -> removeAnnotations $ drop 1 $ dropWhile ('>'/=) cs |
| 54 | + _ -> c : removeAnnotations cs |
| 55 | + |
| 56 | +splitList :: String -> [String] |
| 57 | +splitList = |
| 58 | + let separate rest = ([], uncurry (:) $ recourse rest) |
| 59 | + continue c rest = mapFst (c:) $ recourse rest |
| 60 | + recourse str = |
| 61 | + case str of |
| 62 | + '/' : rest -> separate rest |
| 63 | + '&' : rest -> separate rest |
| 64 | + ',' : rest -> separate rest |
| 65 | + c0:rest0@('a':'n':'d':c1:rest) -> |
| 66 | + if isSpace c0 && isSpace c1 |
| 67 | + then separate rest |
| 68 | + else continue c0 rest0 |
| 69 | + c:rest -> continue c rest |
| 70 | + "" -> ([], []) |
| 71 | + in uncurry (:) . recourse |
| 72 | + |
| 73 | +splitAuthorList :: String -> [String] |
| 74 | +splitAuthorList = |
| 75 | + map (\author -> |
| 76 | + case author of |
| 77 | + "..." -> "others" |
| 78 | + "et al." -> "others" |
| 79 | + _ -> author) . |
| 80 | + filter (not . null) . |
| 81 | + map trim . |
| 82 | + splitList . |
| 83 | + -- remove numbers, quotation marks ... |
| 84 | + filter (\c -> isAlpha c || isSpace c || elem c ".-'@/&,") . |
| 85 | + removeAnnotations |
| 86 | + |
| 87 | +{- authors must be split with respect to ',', '/', '&' and ' and ' -} |
| 88 | +fromPackage :: CalendarTime -> PackageDescription -> Entry.T |
| 89 | +fromPackage time pkg = |
| 90 | + let authors = |
| 91 | + splitAuthorList $ PkgD.author pkg |
| 92 | + surname = |
| 93 | + switchL "unknown" (\firstAuthor _ -> |
| 94 | + switchR "" (\_ -> filter isAlpha) $ |
| 95 | + words firstAuthor) $ |
| 96 | + authors |
| 97 | + pkgId = PkgD.package pkg |
| 98 | + Pkg.PackageName name = Pkg.pkgName pkgId |
| 99 | + year = ctYear time |
| 100 | + versionStr = showVersion (Pkg.pkgVersion pkgId) |
| 101 | + bibId = |
| 102 | + map toLower surname ++ show year ++ |
| 103 | + name ++ "-" ++ versionStr |
| 104 | + in Entry.Cons "Misc" bibId $ |
| 105 | + ("author", |
| 106 | + if null authors |
| 107 | + then "unknown" |
| 108 | + else Format.authorList $ |
| 109 | + map LaTeX.fromUnicodeString authors) : |
| 110 | + ("title", |
| 111 | + "{" ++ name ++ ": " ++ |
| 112 | + LaTeX.fromUnicodeString (PkgD.synopsis pkg) ++ "}") : |
| 113 | + ("howpublished", |
| 114 | + "\\url{http://hackage.haskell.org" ++ |
| 115 | + packageURL (PkgD.package pkg) ++ "}") : |
| 116 | + ("year", show year) : |
| 117 | + ("month", show (ctMonth time)) : |
| 118 | + ("version", versionStr) : |
| 119 | + ("keywords", "Haskell, " ++ PkgD.category pkg ) : |
| 120 | + ("subtype", "program") : |
| 121 | + [] |
| 122 | + |
| 123 | +example :: IO () |
| 124 | +example = |
| 125 | + do now <- toCalendarTime =<< getClockTime |
| 126 | + pkg <- readPackageDescription Verbosity.silent "example.cabal" |
| 127 | + putStrLn (Format.entry $ fromPackage now $ PkgD.packageDescription pkg) |
| 128 | + |
| 129 | + |
| 130 | +{- | |
| 131 | +This decodes UTF-8 but in contrast to UTF8.toString |
| 132 | +it handles invalid characters like Latin-1 ones. |
| 133 | +This way we can also cope with many texts that contain actually Latin-1. |
| 134 | +-} |
| 135 | +decodeUTF8orLatin :: B.ByteString -> String |
| 136 | +decodeUTF8orLatin = |
| 137 | + List.unfoldr (\bstr -> |
| 138 | + flip fmap (UTF8.uncons bstr) $ \(c, rest) -> |
| 139 | + if c==UTF8.replacement_char |
| 140 | + then (chr $ fromIntegral $ B.head bstr, B.tail bstr) |
| 141 | + else (c,rest)) |
| 142 | + |
| 143 | + |
| 144 | +fromTarEntry :: Tar.Entry -> String |
| 145 | +fromTarEntry ent = |
| 146 | + case (List.isSuffixOf ".cabal" $ TarEnt.entryPath ent, |
| 147 | + TarEnt.entryContent ent) of |
| 148 | + (True, TarEnt.NormalFile txt _size) -> |
| 149 | + case parsePackageDescription (decodeUTF8orLatin txt) of |
| 150 | + PkgP.ParseOk _ pkg -> |
| 151 | + Format.entry $ |
| 152 | + fromPackage |
| 153 | + (toUTCTime (TOD (fromIntegral $ TarEnt.entryTime ent) 0)) |
| 154 | + (PkgD.packageDescription pkg) |
| 155 | + PkgP.ParseFailed msg -> show msg |
| 156 | + _ -> "" |
| 157 | + |
| 158 | +main :: IO () |
| 159 | +main = |
| 160 | + Tar.foldEntries |
| 161 | + (\entry cont -> putStrLn (fromTarEntry entry) >> cont) |
| 162 | + (return ()) (IO.hPutStr IO.stderr . show) . |
| 163 | + Tar.read =<< |
| 164 | + B.getContents |
| 165 | + |
0 commit comments