This repository has been archived by the owner. It is now read-only.
Permalink
Browse files

attempt at starting publication automation

  • Loading branch information...
Tim DuBois
Tim DuBois committed Mar 14, 2015
1 parent ec4b571 commit 28a471d594f31a80995ee06101fd6a2375b684ad
Showing with 210 additions and 0 deletions.
  1. +165 −0 Includes/Hackage.hs
  2. BIN Includes/Publications.exe
  3. +45 −0 Includes/Publications.hs
@@ -0,0 +1,165 @@
module Main where
import qualified Text.BibTeX.Format as Format
import qualified Text.BibTeX.Entry as Entry
import qualified Text.LaTeX.Character as LaTeX
import qualified Distribution.PackageDescription.Parse as PkgP
import qualified Distribution.PackageDescription as PkgD
import qualified Distribution.Package as Pkg
import qualified Distribution.Verbosity as Verbosity
import Distribution.PackageDescription
(PackageDescription, )
import Distribution.Package
(PackageIdentifier(..), )
import Distribution.PackageDescription.Parse
(parsePackageDescription,
readPackageDescription, )
import System.Time (ClockTime(TOD), getClockTime,
toCalendarTime, toUTCTime,
CalendarTime, ctYear, ctMonth, )
import qualified Codec.Archive.Tar as Tar
import qualified Codec.Archive.Tar.Entry as TarEnt
import qualified Data.ByteString.Lazy.UTF8 as UTF8
import qualified Data.ByteString.Lazy as B
import qualified System.IO as IO
import Distribution.Text (display, )
import Data.String.HT (trim, )
import Data.Tuple.HT (mapFst, )
import Data.List.HT (switchL, switchR, )
import Data.Char (toLower, isSpace, isAlpha, chr, )
import Data.Version (showVersion, )
import qualified Data.List as List
{- |
See hackage-server:Distribution/Server/Pages/Package.hs
-}
packageURL :: PackageIdentifier -> String
packageURL pkgid = "/package/" ++ display pkgid
{- |
Filter out parts in parentheses and e-mails addresses
-}
removeAnnotations :: String -> String
removeAnnotations "" = ""
removeAnnotations (c:cs) =
case c of
'(' -> removeAnnotations $ drop 1 $ dropWhile (')'/=) cs
'<' -> removeAnnotations $ drop 1 $ dropWhile ('>'/=) cs
_ -> c : removeAnnotations cs
splitList :: String -> [String]
splitList =
let separate rest = ([], uncurry (:) $ recourse rest)
continue c rest = mapFst (c:) $ recourse rest
recourse str =
case str of
'/' : rest -> separate rest
'&' : rest -> separate rest
',' : rest -> separate rest
c0:rest0@('a':'n':'d':c1:rest) ->
if isSpace c0 && isSpace c1
then separate rest
else continue c0 rest0
c:rest -> continue c rest
"" -> ([], [])
in uncurry (:) . recourse
splitAuthorList :: String -> [String]
splitAuthorList =
map (\author ->
case author of
"..." -> "others"
"et al." -> "others"
_ -> author) .
filter (not . null) .
map trim .
splitList .
-- remove numbers, quotation marks ...
filter (\c -> isAlpha c || isSpace c || elem c ".-'@/&,") .
removeAnnotations
{- authors must be split with respect to ',', '/', '&' and ' and ' -}
fromPackage :: CalendarTime -> PackageDescription -> Entry.T
fromPackage time pkg =
let authors =
splitAuthorList $ PkgD.author pkg
surname =
switchL "unknown" (\firstAuthor _ ->
switchR "" (\_ -> filter isAlpha) $
words firstAuthor) $
authors
pkgId = PkgD.package pkg
Pkg.PackageName name = Pkg.pkgName pkgId
year = ctYear time
versionStr = showVersion (Pkg.pkgVersion pkgId)
bibId =
map toLower surname ++ show year ++
name ++ "-" ++ versionStr
in Entry.Cons "Misc" bibId $
("author",
if null authors
then "unknown"
else Format.authorList $
map LaTeX.fromUnicodeString authors) :
("title",
"{" ++ name ++ ": " ++
LaTeX.fromUnicodeString (PkgD.synopsis pkg) ++ "}") :
("howpublished",
"\\url{http://hackage.haskell.org" ++
packageURL (PkgD.package pkg) ++ "}") :
("year", show year) :
("month", show (ctMonth time)) :
("version", versionStr) :
("keywords", "Haskell, " ++ PkgD.category pkg ) :
("subtype", "program") :
[]
example :: IO ()
example =
do now <- toCalendarTime =<< getClockTime
pkg <- readPackageDescription Verbosity.silent "example.cabal"
putStrLn (Format.entry $ fromPackage now $ PkgD.packageDescription pkg)
{- |
This decodes UTF-8 but in contrast to UTF8.toString
it handles invalid characters like Latin-1 ones.
This way we can also cope with many texts that contain actually Latin-1.
-}
decodeUTF8orLatin :: B.ByteString -> String
decodeUTF8orLatin =
List.unfoldr (\bstr ->
flip fmap (UTF8.uncons bstr) $ \(c, rest) ->
if c==UTF8.replacement_char
then (chr $ fromIntegral $ B.head bstr, B.tail bstr)
else (c,rest))
fromTarEntry :: Tar.Entry -> String
fromTarEntry ent =
case (List.isSuffixOf ".cabal" $ TarEnt.entryPath ent,
TarEnt.entryContent ent) of
(True, TarEnt.NormalFile txt _size) ->
case parsePackageDescription (decodeUTF8orLatin txt) of
PkgP.ParseOk _ pkg ->
Format.entry $
fromPackage
(toUTCTime (TOD (fromIntegral $ TarEnt.entryTime ent) 0))
(PkgD.packageDescription pkg)
PkgP.ParseFailed msg -> show msg
_ -> ""
main :: IO ()
main =
Tar.foldEntries
(\entry cont -> putStrLn (fromTarEntry entry) >> cont)
(return ()) (IO.hPutStr IO.stderr . show) .
Tar.read =<<
B.getContents
Binary file not shown.
@@ -0,0 +1,45 @@
module Main where
import qualified Text.BibTeX.Parse as Parse
import qualified Text.BibTeX.Entry as Entry
import qualified Text.ParserCombinators.Parsec as Parsec
import qualified Data.Char as Char
import System.IO (hPutStrLn, stderr, )
typeTable :: [((String, Maybe String), String)]
typeTable =
(("article", Just "reviewed"), "reviewedjournal") :
(("article", Just "popular"), "popular") :
(("article", Nothing), "journal") :
(("inproceedings", Just "reviewed"), "reviewedconference") :
(("inproceedings", Nothing), "conference") :
(("techreport", Nothing), "techreport") :
(("inbook", Just "program"), "program") :
(("misc", Just "program"), "program") :
(("misc", Just "talk"), "talk") :
(("mastersthesis", Nothing), "thesis") :
(("phdthesis", Nothing), "thesis") :
[]
cite :: Entry.T -> String
cite entry =
maybe
"% \\nocite"
("\\nocite" ++ )
(lookup
(map Char.toLower (Entry.entryType entry),
lookup "subtype" (Entry.fields (Entry.lowerCaseFieldNames entry)))
typeTable) ++
"{" ++ Entry.identifier entry ++ "}"
main :: IO ()
main =
do bib <- getContents
case Parsec.parse (Parsec.skipMany Parsec.space >> Parse.file) "stdin" bib of
Left errMsg -> hPutStrLn stderr (show errMsg)
Right entries ->
mapM_ (putStrLn . cite) entries

0 comments on commit 28a471d

Please sign in to comment.