-
Notifications
You must be signed in to change notification settings - Fork 4
/
Core.hs
307 lines (284 loc) · 12.3 KB
/
Core.hs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
-- |
-- Copyright: (c) 2022 Andrew Lelechenko
-- Licence: BSD3
-- Maintainer: Andrew Lelechenko <andrew.lelechenko@gmail.com>
--
-- Low-level routines for 'Buffer' manipulations.
module Data.Text.Builder.Linear.Core (
Buffer,
runBuffer,
runBufferBS,
dupBuffer,
consumeBuffer,
eraseBuffer,
byteSizeOfBuffer,
lengthOfBuffer,
dropBuffer,
takeBuffer,
appendBounded,
appendExact,
prependBounded,
prependExact,
(><),
) where
import Data.ByteString.Internal (ByteString (..))
import Data.Text qualified as T
import Data.Text.Array (Array (..), MArray (..))
import Data.Text.Array qualified as A
import Data.Text.Internal (Text (..))
import GHC.Exts (Int (..), Levity (..), RuntimeRep (..), TYPE, byteArrayContents#, isByteArrayPinned#, isTrue#, plusAddr#, sizeofByteArray#, unsafeCoerce#)
import GHC.ForeignPtr (ForeignPtr (..), ForeignPtrContents (..))
import GHC.ST (ST (..), runST)
-- | Internally 'Buffer' is a mutable buffer.
-- If a client gets hold of a variable of type 'Buffer',
-- they'd be able to pass a mutable buffer to concurrent threads.
-- That's why API below is carefully designed to prevent such possibility:
-- clients always work with linear functions 'Buffer' ⊸ 'Buffer' instead
-- and run them on an empty 'Buffer' to extract results.
--
-- In terms of [@linear-base@](https://hackage.haskell.org/package/linear-base)
-- 'Buffer' is [@Consumable@](https://hackage.haskell.org/package/linear-base/docs/Prelude-Linear.html#t:Consumable)
-- (see 'consumeBuffer')
-- and [@Dupable@](https://hackage.haskell.org/package/linear-base/docs/Prelude-Linear.html#t:Dupable)
-- (see 'dupBuffer'),
-- but not [@Movable@](https://hackage.haskell.org/package/linear-base/docs/Prelude-Linear.html#t:Movable).
--
-- >>> :set -XOverloadedStrings -XLinearTypes
-- >>> import Data.Text.Builder.Linear.Buffer
-- >>> runBuffer (\b -> '!' .<| "foo" <| (b |> "bar" |>. '.'))
-- "!foobar."
--
-- Remember: this is a strict builder, so on contrary to "Data.Text.Lazy.Builder"
-- for optimal performance you should use strict left folds instead of lazy right ones.
--
-- 'Buffer' is an unlifted datatype,
-- so you can put it into an unboxed tuple @(# ..., ... #)@,
-- but not into @(..., ...)@.
data Buffer ∷ TYPE ('BoxedRep 'Unlifted) where
Buffer ∷ {-# UNPACK #-} !Text → Buffer
-- | Unwrap 'Buffer', no-op.
-- Most likely, this is not the function you're looking for
-- and you need 'runBuffer' instead.
unBuffer ∷ Buffer ⊸ Text
unBuffer (Buffer x) = x
-- | Run a linear function on an empty 'Buffer', producing a strict 'Text'.
--
-- Be careful to write @runBuffer (\b -> ...)@ instead of @runBuffer $ \b -> ...@,
-- because current implementation of linear types lacks special support for '($)'.
-- Another option is to enable @{-# LANGUAGE BlockArguments #-}@
-- and write @runBuffer \b -> ...@.
-- Alternatively, you can import
-- [@($)@](https://hackage.haskell.org/package/linear-base/docs/Prelude-Linear.html#v:-36-)
-- from [@linear-base@](https://hackage.haskell.org/package/linear-base).
--
-- 'runBuffer' is similar in spirit to mutable arrays API in
-- [@Data.Array.Mutable.Linear@](https://hackage.haskell.org/package/linear-base/docs/Data-Array-Mutable-Linear.html),
-- which provides functions like
-- [@fromList@](https://hackage.haskell.org/package/linear-base/docs/Data-Array-Mutable-Linear.html#v:fromList) ∷ [@a@] → (@Vector@ @a@ ⊸ [@Ur@](https://hackage.haskell.org/package/linear-base-0.3.0/docs/Prelude-Linear.html#t:Ur) b) ⊸ [@Ur@](https://hackage.haskell.org/package/linear-base-0.3.0/docs/Prelude-Linear.html#t:Ur) @b@.
-- Here the initial buffer is always empty and @b@ is 'Text'. Since 'Text' is
-- [@Movable@](https://hackage.haskell.org/package/linear-base/docs/Prelude-Linear.html#t:Movable),
-- 'Text' and [@Ur@](https://hackage.haskell.org/package/linear-base-0.3.0/docs/Prelude-Linear.html#t:Ur) 'Text' are equivalent.
runBuffer ∷ (Buffer ⊸ Buffer) ⊸ Text
runBuffer f = unBuffer (shrinkBuffer (f (Buffer mempty)))
-- | Same as 'runBuffer', but returning a UTF-8 encoded strict 'ByteString'.
runBufferBS ∷ (Buffer ⊸ Buffer) ⊸ ByteString
runBufferBS f = case shrinkBuffer (f (Buffer memptyPinned)) of
Buffer (Text (ByteArray arr) (I# from) len) → BS fp len
where
addr# = byteArrayContents# arr `plusAddr#` from
fp = ForeignPtr addr# (PlainPtr (unsafeCoerce# arr))
shrinkBuffer ∷ Buffer ⊸ Buffer
shrinkBuffer (Buffer (Text arr from len)) = Buffer $ runST $ do
arrM ← unsafeThaw arr
A.shrinkM arrM (from + len)
arr' ← A.unsafeFreeze arrM
pure $ Text arr' from len
memptyPinned ∷ Text
memptyPinned = runST $ do
marr ← A.newPinned 0
arr ← A.unsafeFreeze marr
pure $ Text arr 0 0
-- | Duplicate builder. Feel free to process results in parallel threads.
-- Similar to
-- [@Dupable@](https://hackage.haskell.org/package/linear-base/docs/Prelude-Linear.html#t:Dupable)
-- from [@linear-base@](https://hackage.haskell.org/package/linear-base).
--
-- It is a bit tricky to use because of
-- <https://ghc.gitlab.haskell.org/ghc/doc/users_guide/exts/linear_types.html#limitations current limitations>
-- of linear types with regards to @let@ and @where@. E. g., one cannot write
--
-- > let (# b1, b2 #) = dupBuffer b in ("foo" <| b1) >< (b2 |> "bar")
--
-- Instead write:
--
-- >>> :set -XOverloadedStrings -XLinearTypes -XUnboxedTuples
-- >>> import Data.Text.Builder.Linear.Buffer
-- >>> runBuffer (\b -> (\(# b1, b2 #) -> ("foo" <| b1) >< (b2 |> "bar")) (dupBuffer b))
-- "foobar"
--
-- Note the unboxed tuple: 'Buffer' is an unlifted datatype,
-- so it cannot be put into @(..., ...)@.
dupBuffer ∷ Buffer ⊸ (# Buffer, Buffer #)
dupBuffer (Buffer x) = (# Buffer x, Buffer (T.copy x) #)
-- | Consume buffer linearly,
-- similar to
-- [@Consumable@](https://hackage.haskell.org/package/linear-base/docs/Prelude-Linear.html#t:Consumable)
-- from [@linear-base@](https://hackage.haskell.org/package/linear-base).
consumeBuffer ∷ Buffer ⊸ ()
consumeBuffer Buffer {} = ()
-- | Erase buffer's content, replacing it with an empty 'Text'.
eraseBuffer ∷ Buffer ⊸ Buffer
eraseBuffer (Buffer (Text arr _ _)) =
Buffer (if isPinned arr then memptyPinned else mempty)
-- | Return buffer's size in __bytes__ (not in 'Char's).
-- This could be useful to implement a lazy builder atop of a strict one.
byteSizeOfBuffer ∷ Buffer ⊸ (# Buffer, Word #)
byteSizeOfBuffer (Buffer t@(Text _ _ len)) = (# Buffer t, fromIntegral len #)
-- | Return buffer's length in 'Char's (not in bytes).
-- This could be useful to implement @dropEndBuffer@ and @takeEndBuffer@, e. g.,
--
-- @
-- import Data.Unrestricted.Linear
--
-- dropEndBuffer :: Word -> Buffer %1 -> Buffer
-- dropEndBuffer n buf =
-- (\(# buf', len #) -> case move len of Ur len' -> takeBuffer (len' - n) buf')
-- (lengthOfBuffer buf)
-- @
lengthOfBuffer ∷ Buffer ⊸ (# Buffer, Word #)
lengthOfBuffer (Buffer t) = (# Buffer t, fromIntegral (T.length t) #)
-- | Slice 'Buffer' by dropping given number of 'Char's.
dropBuffer ∷ Word → Buffer ⊸ Buffer
dropBuffer nChar (Buffer t@(Text arr off len))
| nByte <= 0 = Buffer (Text arr (off + len) 0)
| otherwise = Buffer (Text arr (off + nByte) (len - nByte))
where
nByte = T.measureOff (fromIntegral nChar) t
-- | Slice 'Buffer' by taking given number of 'Char's.
takeBuffer ∷ Word → Buffer ⊸ Buffer
takeBuffer nChar (Buffer t@(Text arr off _))
| nByte <= 0 = Buffer t
| otherwise = Buffer (Text arr off nByte)
where
nByte = T.measureOff (fromIntegral nChar) t
-- | Low-level routine to append data of unknown size to a 'Buffer'.
appendBounded
∷ Int
-- ^ Upper bound for the number of bytes, written by an action
→ (∀ s. MArray s → Int → ST s Int)
-- ^ Action, which writes bytes __starting__ from the given offset
-- and returns an actual number of bytes written.
→ Buffer
⊸ Buffer
appendBounded maxSrcLen appender (Buffer (Text dst dstOff dstLen)) = Buffer $ runST $ do
let dstFullLen = sizeofByteArray dst
newFullLen = dstOff + 2 * (dstLen + maxSrcLen)
newM ←
if dstOff + dstLen + maxSrcLen <= dstFullLen
then unsafeThaw dst
else do
tmpM ← (if isPinned dst then A.newPinned else A.new) newFullLen
A.copyI dstLen tmpM dstOff dst dstOff
pure tmpM
srcLen ← appender newM (dstOff + dstLen)
new ← A.unsafeFreeze newM
pure $ Text new dstOff (dstLen + srcLen)
{-# INLINE appendBounded #-}
-- | Low-level routine to append data of known size to a 'Buffer'.
appendExact
∷ Int
-- ^ Exact number of bytes, written by an action
→ (∀ s. MArray s → Int → ST s ())
-- ^ Action, which writes bytes __starting__ from the given offset
→ Buffer
⊸ Buffer
appendExact srcLen appender =
appendBounded
srcLen
(\dst dstOff → appender dst dstOff >> pure srcLen)
{-# INLINE appendExact #-}
-- | Low-level routine to prepend data of unknown size to a 'Buffer'.
prependBounded
∷ Int
-- ^ Upper bound for the number of bytes, written by an action
→ (∀ s. MArray s → Int → ST s Int)
-- ^ Action, which writes bytes __finishing__ before the given offset
-- and returns an actual number of bytes written.
→ (∀ s. MArray s → Int → ST s Int)
-- ^ Action, which writes bytes __starting__ from the given offset
-- and returns an actual number of bytes written.
→ Buffer
⊸ Buffer
prependBounded maxSrcLen prepender appender (Buffer (Text dst dstOff dstLen))
| maxSrcLen <= dstOff = Buffer $ runST $ do
newM ← unsafeThaw dst
srcLen ← prepender newM dstOff
new ← A.unsafeFreeze newM
pure $ Text new (dstOff - srcLen) (srcLen + dstLen)
| otherwise = Buffer $ runST $ do
let dstFullLen = sizeofByteArray dst
newOff = dstLen + maxSrcLen
newFullLen = 2 * newOff + (dstFullLen - dstOff - dstLen)
newM ← (if isPinned dst then A.newPinned else A.new) newFullLen
srcLen ← appender newM newOff
A.copyI dstLen newM (newOff + srcLen) dst dstOff
new ← A.unsafeFreeze newM
pure $ Text new newOff (dstLen + srcLen)
{-# INLINE prependBounded #-}
-- | Low-level routine to append data of unknown size to a 'Buffer'.
prependExact
∷ Int
-- ^ Exact number of bytes, written by an action
→ (∀ s. MArray s → Int → ST s ())
-- ^ Action, which writes bytes __starting__ from the given offset
→ Buffer
⊸ Buffer
prependExact srcLen appender =
prependBounded
srcLen
(\dst dstOff → appender dst (dstOff - srcLen) >> pure srcLen)
(\dst dstOff → appender dst dstOff >> pure srcLen)
{-# INLINE prependExact #-}
unsafeThaw ∷ Array → ST s (MArray s)
unsafeThaw (ByteArray a) = ST $ \s# →
(# s#, MutableByteArray (unsafeCoerce# a) #)
sizeofByteArray ∷ Array → Int
sizeofByteArray (ByteArray a) = I# (sizeofByteArray# a)
isPinned ∷ Array → Bool
isPinned (ByteArray a) = isTrue# (isByteArrayPinned# a)
-- | Concatenate two 'Buffer's, potentially mutating both of them.
--
-- You likely need to use 'dupBuffer' to get hold on two builders at once:
--
-- >>> :set -XOverloadedStrings -XLinearTypes -XUnboxedTuples
-- >>> import Data.Text.Builder.Linear.Buffer
-- >>> runBuffer (\b -> (\(# b1, b2 #) -> ("foo" <| b1) >< (b2 |> "bar")) (dupBuffer b))
-- "foobar"
(><) ∷ Buffer ⊸ Buffer ⊸ Buffer
infix 6 ><
Buffer (Text left leftOff leftLen) >< Buffer (Text right rightOff rightLen) = Buffer $ runST $ do
let leftFullLen = sizeofByteArray left
rightFullLen = sizeofByteArray right
canCopyToLeft = leftOff + leftLen + rightLen <= leftFullLen
canCopyToRight = leftLen <= rightOff
shouldCopyToLeft = canCopyToLeft && (not canCopyToRight || leftLen >= rightLen)
if shouldCopyToLeft
then do
newM ← unsafeThaw left
A.copyI rightLen newM (leftOff + leftLen) right rightOff
new ← A.unsafeFreeze newM
pure $ Text new leftOff (leftLen + rightLen)
else
if canCopyToRight
then do
newM ← unsafeThaw right
A.copyI leftLen newM (rightOff - leftLen) left leftOff
new ← A.unsafeFreeze newM
pure $ Text new (rightOff - leftLen) (leftLen + rightLen)
else do
let fullLen = leftOff + leftLen + rightLen + (rightFullLen - rightOff - rightLen)
newM ← (if isPinned left || isPinned right then A.newPinned else A.new) fullLen
A.copyI leftLen newM leftOff left leftOff
A.copyI rightLen newM (leftOff + leftLen) right rightOff
new ← A.unsafeFreeze newM
pure $ Text new leftOff (leftLen + rightLen)