Skip to content

Commit 4e54346

Browse files
committed
merge: dev
2 parents edfa3ff + c7bb9b1 commit 4e54346

File tree

23 files changed

+673
-122
lines changed

23 files changed

+673
-122
lines changed

src/GraphBLAS-sharp.Backend/Common/Sum.fs

Lines changed: 237 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,12 @@ open GraphBLAS.FSharp.Backend.Quotes
55
open Microsoft.FSharp.Control
66
open Microsoft.FSharp.Quotations
77
open GraphBLAS.FSharp.Backend.Objects.ClContext
8+
open GraphBLAS.FSharp.Backend.Objects.ArraysExtensions
89

910
module Reduce =
11+
/// <summary>
12+
/// Generalized reduction pattern.
13+
/// </summary>
1014
let private runGeneral (clContext: ClContext) workGroupSize scan scanToCell =
1115

1216
fun (processor: MailboxProcessor<_>) (inputArray: ClArray<'a>) ->
@@ -45,8 +49,8 @@ module Reduce =
4549
let result =
4650
scanToCell processor fstVertices verticesLength
4751

48-
processor.Post(Msg.CreateFreeMsg(firstVerticesArray))
49-
processor.Post(Msg.CreateFreeMsg(secondVerticesArray))
52+
firstVerticesArray.Free processor
53+
secondVerticesArray.Free processor
5054

5155
result
5256

@@ -127,6 +131,13 @@ module Reduce =
127131

128132
resultCell
129133

134+
/// <summary>
135+
/// Summarize array elements.
136+
/// </summary>
137+
/// <param name="clContext">ClContext.</param>
138+
/// <param name="workGroupSize">Work group size.</param>
139+
/// <param name="op">Summation operation.</param>
140+
/// <param name="zero">Neutral element for summation.</param>
130141
let sum (clContext: ClContext) workGroupSize op zero =
131142

132143
let scan = scanSum clContext workGroupSize op zero
@@ -224,6 +235,12 @@ module Reduce =
224235

225236
resultCell
226237

238+
/// <summary>
239+
/// Reduce an array of values.
240+
/// </summary>
241+
/// <param name="clContext">ClContext.</param>
242+
/// <param name="workGroupSize">Work group size.</param>
243+
/// <param name="op">Reduction operation.</param>
227244
let reduce (clContext: ClContext) workGroupSize op =
228245

229246
let scan = scanReduce clContext workGroupSize op
@@ -235,3 +252,221 @@ module Reduce =
235252
runGeneral clContext workGroupSize scan scanToCell
236253

237254
fun (processor: MailboxProcessor<_>) (array: ClArray<'a>) -> run processor array
255+
256+
/// <summary>
257+
/// Reduction of an array of values by an array of keys.
258+
/// </summary>
259+
module ByKey =
260+
/// <summary>
261+
/// Reduce an array of values by key using a single work item.
262+
/// </summary>
263+
/// <param name="clContext">ClContext.</param>
264+
/// <param name="workGroupSize">Work group size.</param>
265+
/// <param name="reduceOp">Operation for reducing values.</param>
266+
/// <remarks>
267+
/// The length of the result must be calculated in advance.
268+
/// </remarks>
269+
let sequential (clContext: ClContext) workGroupSize (reduceOp: Expr<'a -> 'a -> 'a>) =
270+
271+
let kernel =
272+
<@ fun (ndRange: Range1D) length (keys: ClArray<int>) (values: ClArray<'a>) (reducedValues: ClArray<'a>) (reducedKeys: ClArray<int>) ->
273+
274+
let gid = ndRange.GlobalID0
275+
276+
if gid = 0 then
277+
let mutable currentKey = keys.[0]
278+
let mutable segmentResult = values.[0]
279+
let mutable segmentCount = 0
280+
281+
for i in 1 .. length - 1 do
282+
if currentKey = keys.[i] then
283+
segmentResult <- (%reduceOp) segmentResult values.[i]
284+
else
285+
reducedValues.[segmentCount] <- segmentResult
286+
reducedKeys.[segmentCount] <- currentKey
287+
288+
segmentCount <- segmentCount + 1
289+
currentKey <- keys.[i]
290+
segmentResult <- values.[i]
291+
292+
reducedKeys.[segmentCount] <- currentKey
293+
reducedValues.[segmentCount] <- segmentResult @>
294+
295+
let kernel = clContext.Compile kernel
296+
297+
fun (processor: MailboxProcessor<_>) allocationMode (resultLength: int) (keys: ClArray<int>) (values: ClArray<'a>) ->
298+
299+
let reducedValues =
300+
clContext.CreateClArrayWithSpecificAllocationMode(allocationMode, resultLength)
301+
302+
let reducedKeys =
303+
clContext.CreateClArrayWithSpecificAllocationMode(allocationMode, resultLength)
304+
305+
let ndRange =
306+
Range1D.CreateValid(resultLength, workGroupSize)
307+
308+
let kernel = kernel.GetKernel()
309+
310+
processor.Post(
311+
Msg.MsgSetArguments
312+
(fun () -> kernel.KernelFunc ndRange keys.Length keys values reducedValues reducedKeys)
313+
)
314+
315+
processor.Post(Msg.CreateRunMsg<_, _>(kernel))
316+
317+
reducedKeys, reducedValues
318+
319+
/// <summary>
320+
/// Reduces values by key. Each segment is reduced by one work item.
321+
/// </summary>
322+
/// <param name="clContext">ClContext.</param>
323+
/// <param name="workGroupSize">Work group size.</param>
324+
/// <param name="reduceOp">Operation for reducing values.</param>
325+
/// <remarks>
326+
/// The length of the result must be calculated in advance.
327+
/// </remarks>
328+
let segmentSequential (clContext: ClContext) workGroupSize (reduceOp: Expr<'a -> 'a -> 'a>) =
329+
330+
let kernel =
331+
<@ fun (ndRange: Range1D) uniqueKeyCount keysLength (offsets: ClArray<int>) (keys: ClArray<int>) (values: ClArray<'a>) (reducedValues: ClArray<'a>) (reducedKeys: ClArray<int>) ->
332+
333+
let gid = ndRange.GlobalID0
334+
335+
if gid < uniqueKeyCount then
336+
let startPosition = offsets.[gid]
337+
338+
let sourceKey = keys.[startPosition]
339+
let mutable sum = values.[startPosition]
340+
341+
let mutable currentPosition = startPosition + 1
342+
343+
while currentPosition < keysLength
344+
&& sourceKey = keys.[currentPosition] do
345+
346+
sum <- (%reduceOp) sum values.[currentPosition]
347+
currentPosition <- currentPosition + 1
348+
349+
reducedValues.[gid] <- sum
350+
reducedKeys.[gid] <- sourceKey @>
351+
352+
let kernel = clContext.Compile kernel
353+
354+
fun (processor: MailboxProcessor<_>) allocationMode (resultLength: int) (offsets: ClArray<int>) (keys: ClArray<int>) (values: ClArray<'a>) ->
355+
356+
let reducedValues =
357+
clContext.CreateClArrayWithSpecificAllocationMode(allocationMode, resultLength)
358+
359+
let reducedKeys =
360+
clContext.CreateClArrayWithSpecificAllocationMode(allocationMode, resultLength)
361+
362+
let ndRange =
363+
Range1D.CreateValid(resultLength, workGroupSize)
364+
365+
let kernel = kernel.GetKernel()
366+
367+
processor.Post(
368+
Msg.MsgSetArguments
369+
(fun () ->
370+
kernel.KernelFunc
371+
ndRange
372+
resultLength
373+
keys.Length
374+
offsets
375+
keys
376+
values
377+
reducedValues
378+
reducedKeys)
379+
)
380+
381+
processor.Post(Msg.CreateRunMsg<_, _>(kernel))
382+
383+
reducedKeys, reducedValues
384+
385+
/// <summary>
386+
/// Reduces values by key. One work group participates in the reduction.
387+
/// </summary>
388+
/// <param name="clContext">ClContext.</param>
389+
/// <param name="workGroupSize">Work group size.</param>
390+
/// <param name="reduceOp">Operation for reducing values.</param>
391+
/// <remarks>
392+
/// Reduces an array of values that does not exceed the size of the workgroup.
393+
/// The length of the result must be calculated in advance.
394+
/// </remarks>
395+
let oneWorkGroupSegments (clContext: ClContext) workGroupSize (reduceOp: Expr<'a -> 'a -> 'a>) =
396+
397+
let kernel =
398+
<@ fun (ndRange: Range1D) length (keys: ClArray<int>) (values: ClArray<'a>) (reducedValues: ClArray<'a>) (reducedKeys: ClArray<int>) ->
399+
400+
let lid = ndRange.GlobalID0
401+
402+
// load values to local memory (may be without it)
403+
let localValues = localArray<'a> workGroupSize
404+
405+
if lid < length then
406+
localValues.[lid] <- values.[lid]
407+
408+
// load keys to local memory (mb without it)
409+
let localKeys = localArray<int> workGroupSize
410+
411+
if lid < length then
412+
localKeys.[lid] <- keys.[lid]
413+
414+
// get unique keys bitmap
415+
let localBitmap = localArray<int> workGroupSize
416+
localBitmap.[lid] <- 0
417+
(%PreparePositions.getUniqueBitmapLocal<int>) localKeys workGroupSize lid localBitmap
418+
419+
// get positions from bitmap by prefix sum
420+
// ??? get bitmap by prefix sum in another kernel ???
421+
// ??? we can restrict prefix sum for 0 .. length ???
422+
(%SubSum.localIntPrefixSum) lid workGroupSize localBitmap
423+
424+
let uniqueKeysCount = localBitmap.[length - 1]
425+
426+
if lid < uniqueKeysCount then
427+
let itemKeyId = lid + 1
428+
429+
let startKeyIndex =
430+
(%Search.Bin.lowerPosition) length itemKeyId localBitmap
431+
432+
match startKeyIndex with
433+
| Some startPosition ->
434+
let sourceKeyPosition = localBitmap.[startPosition]
435+
let mutable currentSum = localValues.[startPosition]
436+
let mutable currentIndex = startPosition + 1
437+
438+
while currentIndex < length
439+
&& localBitmap.[currentIndex] = sourceKeyPosition do
440+
441+
currentSum <- (%reduceOp) currentSum localValues.[currentIndex]
442+
currentIndex <- currentIndex + 1
443+
444+
reducedKeys.[lid] <- localKeys.[startPosition]
445+
reducedValues.[lid] <- currentSum
446+
| None -> () @>
447+
448+
let kernel = clContext.Compile kernel
449+
450+
fun (processor: MailboxProcessor<_>) allocationMode (resultLength: int) (keys: ClArray<int>) (values: ClArray<'a>) ->
451+
if keys.Length > workGroupSize then
452+
failwith "The length of the value should not exceed the size of the workgroup"
453+
454+
let reducedValues =
455+
clContext.CreateClArrayWithSpecificAllocationMode(allocationMode, resultLength)
456+
457+
let reducedKeys =
458+
clContext.CreateClArrayWithSpecificAllocationMode(allocationMode, resultLength)
459+
460+
let ndRange =
461+
Range1D.CreateValid(resultLength, workGroupSize)
462+
463+
let kernel = kernel.GetKernel()
464+
465+
processor.Post(
466+
Msg.MsgSetArguments
467+
(fun () -> kernel.KernelFunc ndRange keys.Length keys values reducedValues reducedKeys)
468+
)
469+
470+
processor.Post(Msg.CreateRunMsg<_, _>(kernel))
471+
472+
reducedKeys, reducedValues

src/GraphBLAS-sharp.Backend/GraphBLAS-sharp.Backend.fsproj

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,12 +26,12 @@
2626
<Compile Include="Quotes/PreparePositions.fs" />
2727
<Compile Include="Quotes/Predicates.fs" />
2828
<Compile Include="Quotes/Map.fs" />
29-
<Compile Include="Quotes/BinSearch.fs" />
29+
<Compile Include="Quotes/Search.fs" />
3030
<Compile Include="Common/Scatter.fs" />
3131
<Compile Include="Common/Utils.fs" />
32-
<Compile Include="Common/Sum.fs" />
3332
<Compile Include="Common/PrefixSum.fs" />
3433
<Compile Include="Common/ClArray.fs" />
34+
<Compile Include="Common/Sum.fs" />
3535
<Compile Include="Common/BitonicSort.fs" />
3636
<Compile Include="Predefined/PrefixSum.fs" />
3737
<!--Compile Include="Matrices.fs" /-->

src/GraphBLAS-sharp.Backend/Matrix/COOMatrix/Map.fs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
namespace GraphBLAS.FSharp.Backend.Matrix.COO
22

3+
open System
34
open Brahma.FSharp
45
open GraphBLAS.FSharp.Backend.Matrix
56
open GraphBLAS.FSharp.Backend.Quotes
@@ -26,7 +27,7 @@ module internal Map =
2627
(uint64 rowIndex <<< 32) ||| (uint64 columnIndex)
2728

2829
let value =
29-
(%BinSearch.byKey2D) valuesLength index rows columns values
30+
(%Search.Bin.byKey2D) valuesLength index rows columns values
3031

3132
match (%op) value with
3233
| Some resultValue ->

src/GraphBLAS-sharp.Backend/Matrix/COOMatrix/Map2.fs

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,10 @@ open GraphBLAS.FSharp.Backend
88
open GraphBLAS.FSharp.Backend.Quotes
99
open GraphBLAS.FSharp.Backend.Objects.ClMatrix
1010
open GraphBLAS.FSharp.Backend.Objects.ClContext
11+
open GraphBLAS.FSharp.Backend.Quotes
1112

1213
module internal Map2 =
14+
1315
let preparePositions<'a, 'b, 'c> (clContext: ClContext) workGroupSize opAdd =
1416

1517
let preparePositions (op: Expr<'a option -> 'b option -> 'c option>) =
@@ -26,10 +28,10 @@ module internal Map2 =
2628
(uint64 rowIndex <<< 32) ||| (uint64 columnIndex)
2729

2830
let leftValue =
29-
(%BinSearch.byKey2D) leftValuesLength index leftRows leftColumns leftValues
31+
(%Search.Bin.byKey2D) leftValuesLength index leftRows leftColumns leftValues
3032

3133
let rightValue =
32-
(%BinSearch.byKey2D) rightValuesLength index rightRows rightColumn rightValues
34+
(%Search.Bin.byKey2D) rightValuesLength index rightRows rightColumn rightValues
3335

3436
match (%op) leftValue rightValue with
3537
| Some value ->

src/GraphBLAS-sharp.Backend/Matrix/CSRMatrix/Map.fs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ module internal Map =
2727
let lastIndex = rowPointers.[rowIndex + 1] - 1
2828

2929
let value =
30-
(%BinSearch.inRange) startIndex lastIndex columnIndex columns values
30+
(%Search.Bin.inRange) startIndex lastIndex columnIndex columns values
3131

3232
match (%op) value with
3333
| Some resultValue ->

src/GraphBLAS-sharp.Backend/Matrix/CSRMatrix/Map2.fs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,10 +30,10 @@ module internal Map2 =
3030
let rightLastIndex = rightRowPointers.[rowIndex + 1] - 1
3131

3232
let leftValue =
33-
(%BinSearch.inRange) leftStartIndex leftLastIndex columnIndex leftColumns leftValues
33+
(%Search.Bin.inRange) leftStartIndex leftLastIndex columnIndex leftColumns leftValues
3434

3535
let rightValue =
36-
(%BinSearch.inRange) rightStartIndex rightLastIndex columnIndex rightColumn rightValues
36+
(%Search.Bin.inRange) rightStartIndex rightLastIndex columnIndex rightColumn rightValues
3737

3838
match (%op) leftValue rightValue with
3939
| Some value ->

src/GraphBLAS-sharp.Backend/Objects/ArraysExtentions.fs

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,14 @@ module ArraysExtensions =
1313
let dst = Array.zeroCreate this.Length
1414
q.PostAndReply(fun ch -> Msg.CreateToHostMsg(this, dst, ch))
1515

16+
member this.Free(q: MailboxProcessor<_>) = q.Post <| Msg.CreateFreeMsg this
17+
18+
member this.ToHostAndFree(q: MailboxProcessor<_>) =
19+
let result = this.ToHost q
20+
this.Free q
21+
22+
result
23+
1624
member this.Size = this.Length
1725

1826
type 'a ``[]`` with

0 commit comments

Comments
 (0)