Skip to content
Merged

SpMV #49

Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 19 additions & 0 deletions src/GraphBLAS-sharp.Backend/Common/Utils.fs
Original file line number Diff line number Diff line change
Expand Up @@ -21,3 +21,22 @@ module internal Utils =
>> fun x -> x ||| (x >>> 8)
>> fun x -> x ||| (x >>> 16)
>> fun x -> x + 1

let floorToMultiple multiple x = x / multiple * multiple

let ceilToMultiple multiple x = ((x - 1) / multiple + 1) * multiple

let getLocalMemorySize (clContext: ClContext) =
let error = ref Unchecked.defaultof<ClErrorCode>

Cl
.GetDeviceInfo(clContext.ClDevice.Device, OpenCL.Net.DeviceInfo.LocalMemSize, error)
.CastTo<int>()

let getClArrayOfValueTypeSize<'a when 'a: struct> localMemorySize = localMemorySize / sizeof<'a>

//Option type in C is represented as structure with additional integer field
let getClArrayOfOptionTypeSize<'a> localMemorySize =
localMemorySize
/ (sizeof<int> + sizeof<'a>
|> ceilToMultiple (max sizeof<'a> sizeof<int>))
5 changes: 2 additions & 3 deletions src/GraphBLAS-sharp.Backend/GraphBLAS-sharp.Backend.fsproj
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
<?xml version="1.0" encoding="utf-8"?>
<?xml version="1.0" encoding="utf-8"?>
<Project Sdk="Microsoft.NET.Sdk">

<PropertyGroup>
Expand Down Expand Up @@ -30,9 +30,8 @@
<Compile Include="Matrix/CSRMatrix/SpGEMM.fs" />
<Compile Include="Matrix/CSRMatrix/CSRMatrix.fs" />
<Compile Include="Matrix/CSRMatrix/CSRMatrix.fs" />
<Compile Include="Matrix/CSRMatrix/SpMV.fs" />
<Compile Include="Matrix/Matrix.fs" />
<Folder Include="Vector" />
<Compile Include="Vector/SpMV.fs" />
<!--Compile Include="Backend/CSRMatrix/GetTuples.fs" /-->
<!--Compile Include="Backend/CSRMatrix/SpMSpV.fs" /-->
<!--Compile Include="Backend/CSRMatrix/Transpose.fs" /-->
Expand Down
69 changes: 0 additions & 69 deletions src/GraphBLAS-sharp.Backend/Matrix/CSRMatrix/SpMV.fs

This file was deleted.

157 changes: 157 additions & 0 deletions src/GraphBLAS-sharp.Backend/Vector/SpMV.fs
Original file line number Diff line number Diff line change
@@ -0,0 +1,157 @@
namespace GraphBLAS.FSharp.Backend

open Brahma.FSharp
open GraphBLAS.FSharp.Backend
open GraphBLAS.FSharp.Backend.ArraysExtensions
open GraphBLAS.FSharp.Backend.Common
open Microsoft.FSharp.Quotations

module Vector =
let spMV
(clContext: ClContext)
(add: Expr<'c option -> 'c option -> 'c option>)
(mul: Expr<'a option -> 'b option -> 'c option>)
workGroupSize
=
//Until LocalMemSize added to ClDevice as member
let localMemorySize = Utils.getLocalMemorySize clContext

let localPointersArraySize = workGroupSize + 1

let localMemoryLeft =
localMemorySize
- localPointersArraySize * sizeof<int>

let localValuesArraySize =
Utils.getClArrayOfOptionTypeSize localMemoryLeft

let multiplyValues =
<@ fun (ndRange: Range1D) matrixLength (matrixColumns: ClArray<int>) (matrixValues: ClArray<'a>) (vectorValues: ClArray<'b option>) (intermediateArray: ClArray<'c option>) ->

let i = ndRange.GlobalID0
let value = matrixValues.[i]
let column = matrixColumns.[i]

if i < matrixLength then
intermediateArray.[i] <- (%mul) (Some value) vectorValues.[column] @>

let reduceValuesByRows =
<@ fun (ndRange: Range1D) (numberOfRows: int) (intermediateArray: ClArray<'c option>) (matrixPtr: ClArray<int>) (outputVector: ClArray<'c option>) ->

let gid = ndRange.GlobalID0
let lid = ndRange.LocalID0

if gid <= numberOfRows then
let threadsPerBlock =
min (numberOfRows - gid + lid) workGroupSize //If number of rows left is lesser than number of threads in a block

let localPtr = localArray<int> localPointersArraySize
localPtr.[lid] <- matrixPtr.[gid]

if lid = 0 then
localPtr.[threadsPerBlock] <- matrixPtr.[gid + threadsPerBlock]

barrierLocal ()

let localValues =
localArray<'c option> localValuesArraySize

let workEnd = localPtr.[threadsPerBlock]
let mutable blockLowerBound = localPtr.[0]
let numberOfBlocksFitting = localValuesArraySize / threadsPerBlock
let workPerIteration = threadsPerBlock * numberOfBlocksFitting

let mutable sum: 'c option = None

while blockLowerBound < workEnd do
let mutable index = blockLowerBound + lid

barrierLocal ()
//Loading values to the local memory
for block in 0 .. numberOfBlocksFitting - 1 do
if index < workEnd then
localValues.[lid + block * threadsPerBlock] <- intermediateArray.[index]
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Looks like if it's the last work group, indices are out of bounds sometimes

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

On the last work group threadsPerBlock is computed as numberOfRows - gid + lid, so workEnd will be equal to the last value of row pointers array and indices will not be out of bounds

index <- index + threadsPerBlock

barrierLocal ()
//Reduction
//Check if any part of the row is loaded into local memory on this iteration
if (localPtr.[lid + 1] > blockLowerBound
&& localPtr.[lid] < blockLowerBound + workPerIteration) then
let rowStart = max (localPtr.[lid] - blockLowerBound) 0

let rowEnd =
min (localPtr.[lid + 1] - blockLowerBound) workPerIteration

for j in rowStart .. rowEnd - 1 do
let newSum = (%add) sum localValues.[j] //For some reason sum <- (%add) ... causes Brahma exception
sum <- newSum

blockLowerBound <- blockLowerBound + workPerIteration

if gid < numberOfRows then
outputVector.[gid] <- sum @>

let multiplyValues = clContext.Compile multiplyValues
let reduceValuesByRows = clContext.Compile reduceValuesByRows

fun (queue: MailboxProcessor<_>) (matrix: CSRMatrix<'a>) (vector: ClArray<'b option>) ->

let matrixLength = matrix.Values.Length

let ndRange1 =
Range1D.CreateValid(matrixLength, workGroupSize)

let ndRange2 =
Range1D.CreateValid(matrix.RowCount, workGroupSize)

let intermediateArray =
clContext.CreateClArray<'c option>(
matrixLength,
deviceAccessMode = DeviceAccessMode.ReadWrite,
hostAccessMode = HostAccessMode.NotAccessible,
allocationMode = AllocationMode.Default
)

let multiplyValues = multiplyValues.GetKernel()

queue.Post(
Msg.MsgSetArguments
(fun () ->
multiplyValues.KernelFunc
ndRange1
matrixLength
matrix.Columns
matrix.Values
vector
intermediateArray)
)

queue.Post(Msg.CreateRunMsg<_, _>(multiplyValues))

let outputArray =
clContext.CreateClArray<'c option>(
matrix.RowCount,
deviceAccessMode = DeviceAccessMode.ReadWrite,
hostAccessMode = HostAccessMode.NotAccessible,
allocationMode = AllocationMode.Default
)

let reduceValuesByRows = reduceValuesByRows.GetKernel()

queue.Post(
Msg.MsgSetArguments
(fun () ->
reduceValuesByRows.KernelFunc
ndRange2
matrix.RowCount
intermediateArray
matrix.RowPointers
outputArray)
)

queue.Post(Msg.CreateRunMsg<_, _>(reduceValuesByRows))

queue.Post(Msg.CreateFreeMsg intermediateArray)

outputArray
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ open Expecto.Logging.Message
open GraphBLAS.FSharp.Backend.Common
open Brahma.FSharp
open GraphBLAS.FSharp.Tests.Utils
open GraphBLAS.FSharp.Tests.Context

let logger = Log.create "BitonicSort.Tests"

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ open Expecto
open Expecto.Logging
open Expecto.Logging.Message
open GraphBLAS.FSharp.Tests.Utils

open GraphBLAS.FSharp.Tests.Context
open GraphBLAS.FSharp.Backend
open GraphBLAS.FSharp

Expand Down
4 changes: 2 additions & 2 deletions tests/GraphBLAS-sharp.Tests/BackendCommonTests/CopyTests.fs
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,10 @@ open GraphBLAS.FSharp.Tests

let logger = Log.create "Copy.Tests"

let context = Utils.defaultContext.ClContext
let context = Context.defaultContext.ClContext

let testCases =
let q = Utils.defaultContext.Queue
let q = Context.defaultContext.Queue
q.Error.Add(fun e -> failwithf "%A" e)

let getCopyFun copy =
Expand Down
Loading