Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

speedup encode and decode #1

Merged
merged 3 commits into from Nov 17, 2011
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
81 changes: 62 additions & 19 deletions Data/ByteString/Base16.hs
@@ -1,4 +1,4 @@
{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE BangPatterns, MagicHash #-}

-- |
-- Module : Data.ByteString.Base16
Expand All @@ -17,16 +17,16 @@ module Data.ByteString.Base16
, decode
) where

import Control.Monad (forM_)
import Data.Bits ((.&.), shiftL, shiftR)
import Data.ByteString.Char8 (empty)
import Data.ByteString.Internal (ByteString(..), createAndTrim', unsafeCreate)
import Data.Word (Word8)
import Data.Bits (shiftL)
import Foreign.ForeignPtr (ForeignPtr, withForeignPtr)
import Foreign.Marshal.Alloc (mallocBytes)
import Foreign.Ptr (Ptr, minusPtr, plusPtr)
import Foreign.Storable (peek, poke)
import System.IO.Unsafe (unsafePerformIO)
import GHC.Prim
import GHC.Types
import GHC.Word

-- | Encode a string into base16 form. The result will always be a
-- multiple of 2 bytes in length.
Expand All @@ -47,16 +47,45 @@ encode (PS sfp soff slen)
go s d | s == e = return ()
| otherwise = do
x <- peek8 s
poke d =<< (peek (digits `plusPtr` (x `shiftR` 4)) :: IO Word8)
poke (d `plusPtr` 1) =<< (peek (digits `plusPtr` (x .&. 0xf)) :: IO Word8)
poke d (tlookup tableHi x)
poke (d `plusPtr` 1) (tlookup tableLo x)
go (s `plusPtr` 1) (d `plusPtr` 2)
digits :: Ptr Word8
!digits = unsafePerformIO $ do
ptr <- mallocBytes 16
forM_ (zip [0..] "0123456789abcdef") $ \(i,c) ->
poke (ptr `plusPtr` i) ((fromIntegral (fromEnum c)) :: Word8)
return ptr
{-# NOINLINE digits #-}
tlookup :: Addr# -> Int -> Word8
tlookup table (I# index) = W8# (indexWord8OffAddr# table index)
!tableLo =
"\x30\x31\x32\x33\x34\x35\x36\x37\x38\x39\x61\x62\x63\x64\x65\x66\
\\x30\x31\x32\x33\x34\x35\x36\x37\x38\x39\x61\x62\x63\x64\x65\x66\
\\x30\x31\x32\x33\x34\x35\x36\x37\x38\x39\x61\x62\x63\x64\x65\x66\
\\x30\x31\x32\x33\x34\x35\x36\x37\x38\x39\x61\x62\x63\x64\x65\x66\
\\x30\x31\x32\x33\x34\x35\x36\x37\x38\x39\x61\x62\x63\x64\x65\x66\
\\x30\x31\x32\x33\x34\x35\x36\x37\x38\x39\x61\x62\x63\x64\x65\x66\
\\x30\x31\x32\x33\x34\x35\x36\x37\x38\x39\x61\x62\x63\x64\x65\x66\
\\x30\x31\x32\x33\x34\x35\x36\x37\x38\x39\x61\x62\x63\x64\x65\x66\
\\x30\x31\x32\x33\x34\x35\x36\x37\x38\x39\x61\x62\x63\x64\x65\x66\
\\x30\x31\x32\x33\x34\x35\x36\x37\x38\x39\x61\x62\x63\x64\x65\x66\
\\x30\x31\x32\x33\x34\x35\x36\x37\x38\x39\x61\x62\x63\x64\x65\x66\
\\x30\x31\x32\x33\x34\x35\x36\x37\x38\x39\x61\x62\x63\x64\x65\x66\
\\x30\x31\x32\x33\x34\x35\x36\x37\x38\x39\x61\x62\x63\x64\x65\x66\
\\x30\x31\x32\x33\x34\x35\x36\x37\x38\x39\x61\x62\x63\x64\x65\x66\
\\x30\x31\x32\x33\x34\x35\x36\x37\x38\x39\x61\x62\x63\x64\x65\x66\
\\x30\x31\x32\x33\x34\x35\x36\x37\x38\x39\x61\x62\x63\x64\x65\x66"#
!tableHi =
"\x30\x30\x30\x30\x30\x30\x30\x30\x30\x30\x30\x30\x30\x30\x30\x30\
\\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\
\\x32\x32\x32\x32\x32\x32\x32\x32\x32\x32\x32\x32\x32\x32\x32\x32\
\\x33\x33\x33\x33\x33\x33\x33\x33\x33\x33\x33\x33\x33\x33\x33\x33\
\\x34\x34\x34\x34\x34\x34\x34\x34\x34\x34\x34\x34\x34\x34\x34\x34\
\\x35\x35\x35\x35\x35\x35\x35\x35\x35\x35\x35\x35\x35\x35\x35\x35\
\\x36\x36\x36\x36\x36\x36\x36\x36\x36\x36\x36\x36\x36\x36\x36\x36\
\\x37\x37\x37\x37\x37\x37\x37\x37\x37\x37\x37\x37\x37\x37\x37\x37\
\\x38\x38\x38\x38\x38\x38\x38\x38\x38\x38\x38\x38\x38\x38\x38\x38\
\\x39\x39\x39\x39\x39\x39\x39\x39\x39\x39\x39\x39\x39\x39\x39\x39\
\\x61\x61\x61\x61\x61\x61\x61\x61\x61\x61\x61\x61\x61\x61\x61\x61\
\\x62\x62\x62\x62\x62\x62\x62\x62\x62\x62\x62\x62\x62\x62\x62\x62\
\\x63\x63\x63\x63\x63\x63\x63\x63\x63\x63\x63\x63\x63\x63\x63\x63\
\\x64\x64\x64\x64\x64\x64\x64\x64\x64\x64\x64\x64\x64\x64\x64\x64\
\\x65\x65\x65\x65\x65\x65\x65\x65\x65\x65\x65\x65\x65\x65\x65\x65\
\\x66\x66\x66\x66\x66\x66\x66\x66\x66\x66\x66\x66\x66\x66\x66\x66"#

-- | Decode a string from base16 form. The first element of the
-- returned tuple contains the decoded data. The second element starts
Expand All @@ -78,11 +107,6 @@ decode (PS sfp soff slen) =
go s d | s == e = let len = e `minusPtr` sptr
in return (0, len `div` 2, ps sfp (soff+len) (slen-len))
| otherwise = do
let hex w
| w >= 48 && w <= 57 = w - 48
| w >= 97 && w <= 102 = w - 97 + 10
| w >= 65 && w <= 70 = w - 65 + 10
| otherwise = 0xff
hi <- hex `fmap` peek8 s
lo <- hex `fmap` peek8 (s `plusPtr` 1)
if lo == 0xff || hi == 0xff
Expand All @@ -92,6 +116,25 @@ decode (PS sfp soff slen) =
poke d . fromIntegral $ lo + (hi `shiftL` 4)
go (s `plusPtr` 2) (d `plusPtr` 1)

hex (I# index) = W8# $ indexWord8OffAddr# table index
!table =
"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\
\\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\
\\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\
\\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\xff\xff\xff\xff\xff\xff\
\\x0a\x0b\x0c\x0d\x0e\x0f\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\
\\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\
\\xff\x0a\x0b\x0c\x0d\x0e\x0f\xff\xff\xff\xff\xff\xff\xff\xff\xff\
\\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\
\\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\
\\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\
\\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\
\\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\
\\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\
\\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\
\\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\
\\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"#

peek8 :: Ptr Word8 -> IO Int
peek8 p = fromIntegral `fmap` peek p

Expand Down
3 changes: 2 additions & 1 deletion base16-bytestring.cabal
Expand Up @@ -23,7 +23,8 @@ library

build-depends:
base == 4.*,
bytestring == 0.9.*
bytestring == 0.9.*,
ghc-prim

ghc-options: -Wall -funbox-strict-fields
ghc-prof-options: -auto-all
Expand Down
7 changes: 7 additions & 0 deletions benchmarks/Benchmarks.hs
Expand Up @@ -13,4 +13,11 @@ main = defaultMain [
, bench "1024" $ whnf B16.encode (generate 1024)
, bench "65536" $ whnf B16.encode (generate 65536)
]
, bgroup "decode" [
bench "8" $ whnf (B16.decode . B16.encode) (generate 8)
, bench "32" $ whnf (B16.decode . B16.encode) (generate 32)
, bench "128" $ whnf (B16.decode . B16.encode) (generate 128)
, bench "1024" $ whnf (B16.decode . B16.encode) (generate 1024)
, bench "65536" $ whnf (B16.decode . B16.encode) (generate 65536)
]
]