Skip to content
This repository has been archived by the owner on Mar 17, 2018. It is now read-only.

Commit

Permalink
Browse files Browse the repository at this point in the history
attempt at starting publication automation
  • Loading branch information
Tim DuBois committed Mar 14, 2015
1 parent ec4b571 commit 28a471d
Show file tree
Hide file tree
Showing 3 changed files with 210 additions and 0 deletions.
165 changes: 165 additions & 0 deletions Includes/Hackage.hs
@@ -0,0 +1,165 @@
module Main where

import qualified Text.BibTeX.Format as Format
import qualified Text.BibTeX.Entry as Entry
import qualified Text.LaTeX.Character as LaTeX

import qualified Distribution.PackageDescription.Parse as PkgP
import qualified Distribution.PackageDescription as PkgD
import qualified Distribution.Package as Pkg
import qualified Distribution.Verbosity as Verbosity
import Distribution.PackageDescription
(PackageDescription, )
import Distribution.Package
(PackageIdentifier(..), )
import Distribution.PackageDescription.Parse
(parsePackageDescription,
readPackageDescription, )
import System.Time (ClockTime(TOD), getClockTime,
toCalendarTime, toUTCTime,
CalendarTime, ctYear, ctMonth, )

import qualified Codec.Archive.Tar as Tar
import qualified Codec.Archive.Tar.Entry as TarEnt
import qualified Data.ByteString.Lazy.UTF8 as UTF8
import qualified Data.ByteString.Lazy as B
import qualified System.IO as IO

import Distribution.Text (display, )

import Data.String.HT (trim, )
import Data.Tuple.HT (mapFst, )
import Data.List.HT (switchL, switchR, )
import Data.Char (toLower, isSpace, isAlpha, chr, )
import Data.Version (showVersion, )
import qualified Data.List as List


{- |
See hackage-server:Distribution/Server/Pages/Package.hs
-}
packageURL :: PackageIdentifier -> String
packageURL pkgid = "/package/" ++ display pkgid


{- |
Filter out parts in parentheses and e-mails addresses
-}
removeAnnotations :: String -> String
removeAnnotations "" = ""
removeAnnotations (c:cs) =
case c of
'(' -> removeAnnotations $ drop 1 $ dropWhile (')'/=) cs
'<' -> removeAnnotations $ drop 1 $ dropWhile ('>'/=) cs
_ -> c : removeAnnotations cs

splitList :: String -> [String]
splitList =
let separate rest = ([], uncurry (:) $ recourse rest)
continue c rest = mapFst (c:) $ recourse rest
recourse str =
case str of
'/' : rest -> separate rest
'&' : rest -> separate rest
',' : rest -> separate rest
c0:rest0@('a':'n':'d':c1:rest) ->
if isSpace c0 && isSpace c1
then separate rest
else continue c0 rest0
c:rest -> continue c rest
"" -> ([], [])
in uncurry (:) . recourse

splitAuthorList :: String -> [String]
splitAuthorList =
map (\author ->
case author of
"..." -> "others"
"et al." -> "others"
_ -> author) .
filter (not . null) .
map trim .
splitList .
-- remove numbers, quotation marks ...
filter (\c -> isAlpha c || isSpace c || elem c ".-'@/&,") .
removeAnnotations

{- authors must be split with respect to ',', '/', '&' and ' and ' -}
fromPackage :: CalendarTime -> PackageDescription -> Entry.T
fromPackage time pkg =
let authors =
splitAuthorList $ PkgD.author pkg
surname =
switchL "unknown" (\firstAuthor _ ->
switchR "" (\_ -> filter isAlpha) $
words firstAuthor) $
authors
pkgId = PkgD.package pkg
Pkg.PackageName name = Pkg.pkgName pkgId
year = ctYear time
versionStr = showVersion (Pkg.pkgVersion pkgId)
bibId =
map toLower surname ++ show year ++
name ++ "-" ++ versionStr
in Entry.Cons "Misc" bibId $
("author",
if null authors
then "unknown"
else Format.authorList $
map LaTeX.fromUnicodeString authors) :
("title",
"{" ++ name ++ ": " ++
LaTeX.fromUnicodeString (PkgD.synopsis pkg) ++ "}") :
("howpublished",
"\\url{http://hackage.haskell.org" ++
packageURL (PkgD.package pkg) ++ "}") :
("year", show year) :
("month", show (ctMonth time)) :
("version", versionStr) :
("keywords", "Haskell, " ++ PkgD.category pkg ) :
("subtype", "program") :
[]

example :: IO ()
example =
do now <- toCalendarTime =<< getClockTime
pkg <- readPackageDescription Verbosity.silent "example.cabal"
putStrLn (Format.entry $ fromPackage now $ PkgD.packageDescription pkg)


{- |
This decodes UTF-8 but in contrast to UTF8.toString
it handles invalid characters like Latin-1 ones.
This way we can also cope with many texts that contain actually Latin-1.
-}
decodeUTF8orLatin :: B.ByteString -> String
decodeUTF8orLatin =
List.unfoldr (\bstr ->
flip fmap (UTF8.uncons bstr) $ \(c, rest) ->
if c==UTF8.replacement_char
then (chr $ fromIntegral $ B.head bstr, B.tail bstr)
else (c,rest))


fromTarEntry :: Tar.Entry -> String
fromTarEntry ent =
case (List.isSuffixOf ".cabal" $ TarEnt.entryPath ent,
TarEnt.entryContent ent) of
(True, TarEnt.NormalFile txt _size) ->
case parsePackageDescription (decodeUTF8orLatin txt) of
PkgP.ParseOk _ pkg ->
Format.entry $
fromPackage
(toUTCTime (TOD (fromIntegral $ TarEnt.entryTime ent) 0))
(PkgD.packageDescription pkg)
PkgP.ParseFailed msg -> show msg
_ -> ""

main :: IO ()
main =
Tar.foldEntries
(\entry cont -> putStrLn (fromTarEntry entry) >> cont)
(return ()) (IO.hPutStr IO.stderr . show) .
Tar.read =<<
B.getContents

Binary file added Includes/Publications.exe
Binary file not shown.
45 changes: 45 additions & 0 deletions Includes/Publications.hs
@@ -0,0 +1,45 @@
module Main where

import qualified Text.BibTeX.Parse as Parse
import qualified Text.BibTeX.Entry as Entry
import qualified Text.ParserCombinators.Parsec as Parsec

import qualified Data.Char as Char
import System.IO (hPutStrLn, stderr, )


typeTable :: [((String, Maybe String), String)]
typeTable =
(("article", Just "reviewed"), "reviewedjournal") :
(("article", Just "popular"), "popular") :
(("article", Nothing), "journal") :
(("inproceedings", Just "reviewed"), "reviewedconference") :
(("inproceedings", Nothing), "conference") :
(("techreport", Nothing), "techreport") :
(("inbook", Just "program"), "program") :
(("misc", Just "program"), "program") :
(("misc", Just "talk"), "talk") :
(("mastersthesis", Nothing), "thesis") :
(("phdthesis", Nothing), "thesis") :
[]


cite :: Entry.T -> String
cite entry =
maybe
"% \\nocite"
("\\nocite" ++ )
(lookup
(map Char.toLower (Entry.entryType entry),
lookup "subtype" (Entry.fields (Entry.lowerCaseFieldNames entry)))
typeTable) ++
"{" ++ Entry.identifier entry ++ "}"


main :: IO ()
main =
do bib <- getContents
case Parsec.parse (Parsec.skipMany Parsec.space >> Parse.file) "stdin" bib of
Left errMsg -> hPutStrLn stderr (show errMsg)
Right entries ->
mapM_ (putStrLn . cite) entries

0 comments on commit 28a471d

Please sign in to comment.