Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions lsm-tree.cabal
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,7 @@ library
Database.LSMTree.Internal.BloomFilter
Database.LSMTree.Internal.BloomFilterQuery1
Database.LSMTree.Internal.ByteString
Database.LSMTree.Internal.ChecksumHandle
Database.LSMTree.Internal.Chunk
Database.LSMTree.Internal.Config
Database.LSMTree.Internal.CRC32C
Expand Down
13 changes: 13 additions & 0 deletions src-extras/Database/LSMTree/Extras/NoThunks.hs
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ import Data.Word
import Database.LSMTree.Internal as Internal
import Database.LSMTree.Internal.BlobFile
import Database.LSMTree.Internal.BlobRef
import Database.LSMTree.Internal.ChecksumHandle
import Database.LSMTree.Internal.Config
import Database.LSMTree.Internal.CRC32C
import Database.LSMTree.Internal.Entry
Expand Down Expand Up @@ -206,6 +207,18 @@ deriving anyclass instance NoThunks SessionRoot
deriving stock instance Generic RunFsPaths
deriving anyclass instance NoThunks RunFsPaths

deriving stock instance Generic (ForKOps a)
deriving anyclass instance NoThunks a => NoThunks (ForKOps a)

deriving stock instance Generic (ForBlob a)
deriving anyclass instance NoThunks a => NoThunks (ForBlob a)

deriving stock instance Generic (ForFilter a)
deriving anyclass instance NoThunks a => NoThunks (ForFilter a)

deriving stock instance Generic (ForIndex a)
deriving anyclass instance NoThunks a => NoThunks (ForIndex a)

deriving stock instance Generic (ForRunFiles a)
deriving anyclass instance NoThunks a => NoThunks (ForRunFiles a)

Expand Down
1 change: 0 additions & 1 deletion src/Database/LSMTree/Internal/CRC32C.hs
Original file line number Diff line number Diff line change
Expand Up @@ -348,4 +348,3 @@ formatChecksumsFile checksums =
<> BS.word32HexFixed crc
<> BS.char8 '\n'
| (ChecksumsFileName name, CRC32C crc) <- Map.toList checksums ]

247 changes: 247 additions & 0 deletions src/Database/LSMTree/Internal/ChecksumHandle.hs
Original file line number Diff line number Diff line change
@@ -0,0 +1,247 @@
module Database.LSMTree.Internal.ChecksumHandle
(
-- * Checksum handles
-- $checksum-handles
ChecksumHandle (..),
makeHandle,
readChecksum,
dropCache,
closeHandle,
writeToHandle,
-- * Specialised writers
writeRawPage,
writeRawOverflowPages,
writeBlob,
copyBlob,
writeFilter,
writeIndexHeader,
writeIndexChunk,
writeIndexFinal,
) where

import Control.Monad.Class.MonadSTM (MonadSTM (..))
import Control.Monad.Class.MonadThrow (MonadThrow)
import Control.Monad.Primitive
import Data.BloomFilter (Bloom)
import qualified Data.ByteString.Lazy as BSL
import Data.Primitive.PrimVar
import Data.Word (Word64)
import Database.LSMTree.Internal.BlobRef (BlobSpan (..), RawBlobRef)
import qualified Database.LSMTree.Internal.BlobRef as BlobRef
import Database.LSMTree.Internal.BloomFilter (bloomFilterToLBS)
import Database.LSMTree.Internal.Chunk (Chunk)
import qualified Database.LSMTree.Internal.Chunk as Chunk (toByteString)
import Database.LSMTree.Internal.CRC32C (CRC32C)
import qualified Database.LSMTree.Internal.CRC32C as CRC
import Database.LSMTree.Internal.Entry
import Database.LSMTree.Internal.IndexCompact (IndexCompact)
import qualified Database.LSMTree.Internal.IndexCompact as Index
import Database.LSMTree.Internal.Paths (ForBlob (..), ForFilter (..),
ForIndex (..), ForKOps (..))
import qualified Database.LSMTree.Internal.RawBytes as RB
import Database.LSMTree.Internal.RawOverflowPage (RawOverflowPage)
import qualified Database.LSMTree.Internal.RawOverflowPage as RawOverflowPage
import Database.LSMTree.Internal.RawPage (RawPage)
import qualified Database.LSMTree.Internal.RawPage as RawPage
import Database.LSMTree.Internal.Serialise
import qualified System.FS.API as FS
import System.FS.API
import qualified System.FS.BlockIO.API as FS
import System.FS.BlockIO.API (HasBlockIO)

{-------------------------------------------------------------------------------
ChecksumHandle
-------------------------------------------------------------------------------}

{- $checksum-handles
A handle ('ChecksumHandle') that maintains a running CRC32 checksum.
-}

-- | Tracks the checksum of a (write mode) file handle.
data ChecksumHandle s h = ChecksumHandle !(FS.Handle h) !(PrimVar s CRC32C)

{-# SPECIALISE makeHandle ::
HasFS IO h
-> FS.FsPath
-> IO (ChecksumHandle RealWorld h) #-}
makeHandle ::
(MonadSTM m, PrimMonad m)
=> HasFS m h
-> FS.FsPath
-> m (ChecksumHandle (PrimState m) h)
makeHandle fs path =
ChecksumHandle
<$> FS.hOpen fs path (FS.WriteMode FS.MustBeNew)
<*> newPrimVar CRC.initialCRC32C

{-# SPECIALISE readChecksum ::
ChecksumHandle RealWorld h
-> IO CRC32C #-}
readChecksum ::
PrimMonad m
=> ChecksumHandle (PrimState m) h
-> m CRC32C
readChecksum (ChecksumHandle _h checksum) = readPrimVar checksum

dropCache :: HasBlockIO m h -> ChecksumHandle (PrimState m) h -> m ()
dropCache hbio (ChecksumHandle h _) = FS.hDropCacheAll hbio h

closeHandle :: HasFS m h -> ChecksumHandle (PrimState m) h -> m ()
closeHandle fs (ChecksumHandle h _checksum) = FS.hClose fs h

{-# SPECIALISE writeToHandle ::
HasFS IO h
-> ChecksumHandle RealWorld h
-> BSL.ByteString
-> IO () #-}
writeToHandle ::
(MonadSTM m, PrimMonad m)
=> HasFS m h
-> ChecksumHandle (PrimState m) h
-> BSL.ByteString
-> m ()
writeToHandle fs (ChecksumHandle h checksum) lbs = do
crc <- readPrimVar checksum
(_, crc') <- CRC.hPutAllChunksCRC32C fs h lbs crc
writePrimVar checksum crc'

{-------------------------------------------------------------------------------
Specialised Writers for ChecksumHandle
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Where functions like writeRawPage would previously select the correct handle from the RunBuilder such as forRunKOps runBuilderHandles, the new newRawPage could in theory be passed any ChecksumHandle. It's probably not a problem in practice, as the tests would catch incorrect usage. Still, if you think it's a good idea we could introduce some type safety by adding some newtypes, such as newtype KOpsHandle s h = KOpsHandle (ChecksumHandle s h)

Thinking along those same lines, writeBlob and copyBlob are currently passed a variable holding the offset into the file: this could also be a bogus variable with no connection to the blob file. Maybe for the blob handle, we can introduce data BlobHandle s h = BlobHandle !(PrimVar s Word64) !(ChecksumHandle s h) instead of a newtype around just the ChecksumHandle part

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That might be useful. I took them out of the wrapper because otherwise I'd need additional handles for two nonexistent files. Wrapping them up individually would indeed solve that. Would you like me to implement that?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You can implement it, or leave a TODO, what you prefer

-------------------------------------------------------------------------------}

{-# SPECIALISE writeRawPage ::
HasFS IO h
-> ForKOps (ChecksumHandle RealWorld h)
-> RawPage
-> IO () #-}
writeRawPage ::
(MonadSTM m, PrimMonad m)
=> HasFS m h
-> ForKOps (ChecksumHandle (PrimState m) h)
-> RawPage
-> m ()
writeRawPage hfs kOpsHandle =
writeToHandle hfs (unForKOps kOpsHandle)
. BSL.fromStrict
. RB.unsafePinnedToByteString -- 'RawPage' is guaranteed to be pinned
. RawPage.rawPageRawBytes

{-# SPECIALISE writeRawOverflowPages ::
HasFS IO h
-> ForKOps (ChecksumHandle RealWorld h)
-> [RawOverflowPage]
-> IO () #-}
writeRawOverflowPages ::
(MonadSTM m, PrimMonad m)
=> HasFS m h
-> ForKOps (ChecksumHandle (PrimState m) h)
-> [RawOverflowPage]
-> m ()
writeRawOverflowPages hfs kOpsHandle =
writeToHandle hfs (unForKOps kOpsHandle)
. BSL.fromChunks
. map (RawOverflowPage.rawOverflowPageToByteString)

{-# SPECIALISE writeBlob ::
HasFS IO h
-> PrimVar RealWorld Word64
-> ForBlob (ChecksumHandle RealWorld h)
-> SerialisedBlob
-> IO BlobSpan #-}
writeBlob ::
(MonadSTM m, PrimMonad m)
=> HasFS m h
-> PrimVar (PrimState m) Word64
-> ForBlob (ChecksumHandle (PrimState m) h)
-> SerialisedBlob
-> m BlobSpan
writeBlob hfs blobOffset blobHandle blob = do
-- NOTE: This is different from BlobFile.writeBlob. This is because BlobFile
-- internalises a regular Handle, rather than a ChecksumHandle. These two
-- functions cannot be easily unified, because BlobFile.writeBlob permits
-- writing blobs to arbitrary positions in the blob file, whereas, by the
-- very nature of CRC32 checksums, ChecksumHandle.writeBlob only supports
-- sequential writes.
let size = sizeofBlob64 blob
offset <- readPrimVar blobOffset
modifyPrimVar blobOffset (+size)
let SerialisedBlob rb = blob
let lbs = BSL.fromStrict $ RB.toByteString rb
writeToHandle hfs (unForBlob blobHandle) lbs
return (BlobSpan offset (fromIntegral size))

{-# SPECIALISE copyBlob ::
HasFS IO h
-> PrimVar RealWorld Word64
-> ForBlob (ChecksumHandle RealWorld h)
-> RawBlobRef IO h
-> IO BlobSpan #-}
copyBlob ::
(MonadSTM m, MonadThrow m, PrimMonad m)
=> HasFS m h
-> PrimVar (PrimState m) Word64
-> ForBlob (ChecksumHandle (PrimState m) h)
-> RawBlobRef m h
-> m BlobSpan
copyBlob hfs blobOffset blobHandle blobref = do
blob <- BlobRef.readRawBlobRef hfs blobref
writeBlob hfs blobOffset blobHandle blob

{-# SPECIALISE writeFilter ::
HasFS IO h
-> ForFilter (ChecksumHandle RealWorld h)
-> Bloom SerialisedKey
-> IO () #-}
writeFilter ::
(MonadSTM m, PrimMonad m)
=> HasFS m h
-> ForFilter (ChecksumHandle (PrimState m) h)
-> Bloom SerialisedKey
-> m ()
writeFilter hfs filterHandle bf =
writeToHandle hfs (unForFilter filterHandle) (bloomFilterToLBS bf)

{-# SPECIALISE writeIndexHeader ::
HasFS IO h
-> ForIndex (ChecksumHandle RealWorld h)
-> IO () #-}
writeIndexHeader ::
(MonadSTM m, PrimMonad m)
=> HasFS m h
-> ForIndex (ChecksumHandle (PrimState m) h)
-> m ()
writeIndexHeader hfs indexHandle =
writeToHandle hfs (unForIndex indexHandle) $
Index.headerLBS

{-# SPECIALISE writeIndexChunk ::
HasFS IO h
-> ForIndex (ChecksumHandle RealWorld h)
-> Chunk
-> IO () #-}
writeIndexChunk ::
(MonadSTM m, PrimMonad m)
=> HasFS m h
-> ForIndex (ChecksumHandle (PrimState m) h)
-> Chunk
-> m ()
writeIndexChunk hfs indexHandle chunk =
writeToHandle hfs (unForIndex indexHandle) $
BSL.fromStrict $ Chunk.toByteString chunk

{-# SPECIALISE writeIndexFinal ::
HasFS IO h
-> ForIndex (ChecksumHandle RealWorld h)
-> NumEntries
-> IndexCompact
-> IO () #-}
writeIndexFinal ::
(MonadSTM m, PrimMonad m)
=> HasFS m h
-> ForIndex (ChecksumHandle (PrimState m) h)
-> NumEntries
-> IndexCompact
-> m ()
writeIndexFinal hfs indexHandle numEntries index =
writeToHandle hfs (unForIndex indexHandle) $
Index.finalLBS numEntries index
Loading