# Example DistibutedArrays vs SharedArrays

With :
* @everywhere const dim1 = 100
* @everywhere const dim2 = 100
* @everywhere const dim3 = nworkers()*1=3
* sleep(0.)
## Serial (2nd) run
* 34.513930 seconds (150.72 k allocations: 5.504 MiB)

## DistributedArrays (2nd run):
* 11.860878 seconds (129.58 k allocations: 6.103 MiB)

## SharedArrays (2nd run):
* 12.662117 seconds (129.72 k allocations: 6.112 MiB)

------------------------------------------------------------------
With :
* @everywhere const dim1 = 10000
* @everywhere const dim2 = 10000
* @everywhere const dim3 = nworkers()*1=3
* no sleep()

## Serial (2nd) run
* 0.060183 seconds (4 allocations: 160 bytes)

## DistributedArrays (2nd run):
* 0.150617 seconds (49.27 k allocations: 2.550 MiB)

## SharedArrays (2nd run):
* 0.282719 seconds (45.36 k allocations: 2.359 MiB, 4.98% gc time)




In [None]:
addprocs(3)

In [None]:
nworkers()

In [None]:
@everywhere using DistributedArrays

In [None]:
@everywhere using Distributions

In [None]:
using Base.Test

In [None]:
@everywhere const dim1 = 100
@everywhere const dim2 = 100
@everywhere const dim3 = nworkers()*1

In [None]:
@time sleep(0.0)

## Completion time in serial (predicted)

In [None]:
0.000236*dim1*dim2*dim3

 0.000236 seconds (37 allocations: 800 bytes)

# Serial

In [None]:
t = zeros(dim1,dim2,dim3)

In [None]:
@everywhere function give_my_id_serial!(x::Array{Float64,3}, id::Int64)
    for indexDim2 = 1:dim2
        for indexDim1 = 1:dim1
            x[indexDim1, indexDim2, id - 1] = id^2
            #sleep (0.0) still takes:
            #0.000236 seconds (37 allocations: 800 bytes)
            sleep(0.0)
        end
    end
end

In [None]:
function wrapper(x::Array{Float64,3})
    give_my_id_serial!(x, 2)
    give_my_id_serial!(x, 3)
    give_my_id_serial!(x, 4)
end

In [38]:
@time wrapper(t)

 34.513930 seconds (150.72 k allocations: 5.504 MiB)


## Using DistributedArrays

In [16]:
@everywhere function give_my_id_parallel!(x::Array{Float64,3}, id::Int64)
    for indexDim2 = 1:dim2
        for indexDim1 = 1:dim1
            x[indexDim1, indexDim2, 1] = id^2
            #sleep (0.0) still takes:
            #0.000236 seconds (37 allocations: 800 bytes)
            sleep(0.0)
        end
    end
end

In [17]:
@sync begin 
    #a = dzeros(dim1,dim2,dim3);
    #Split along the 3rd dimension
    #-----------------------------
    a = dzeros((dim1,dim2,dim3), workers(), [1,1,nworkers()]);
end

100×100×3 DistributedArrays.DArray{Float64,3,Array{Float64,3}}:
[:, :, 1] =
 0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  …  0.0  0.0  0.0  0.0  0.0  0.0  0.0
 0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0     0.0  0.0  0.0  0.0  0.0  0.0  0.0
 0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0     0.0  0.0  0.0  0.0  0.0  0.0  0.0
 0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0     0.0  0.0  0.0  0.0  0.0  0.0  0.0
 0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0     0.0  0.0  0.0  0.0  0.0  0.0  0.0
 0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  …  0.0  0.0  0.0  0.0  0.0  0.0  0.0
 0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0     0.0  0.0  0.0  0.0  0.0  0.0  0.0
 0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0     0.0  0.0  0.0  0.0  0.0  0.0  0.0
 0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0     0.0  0.0  0.0  0.0  0.0  0.0  0.0
 0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0     0.0  0.0  0.0  0.0  0.0  0.0  0.0
 0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  …  0.0  0.0  0.0  0.0  0.0  0.0  0.0
 0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0     0.0  0.0  0.0  0.0  0.

In [None]:
#=
@time @sync begin
 [@spawnat p eval(:(fill!(localpart(a), myid()))) for p in procs(a)]
end
=#

In [18]:
procs(a)

1×1×3 Array{Int64,3}:
[:, :, 1] =
 2

[:, :, 2] =
 3

[:, :, 3] =
 4

In [None]:
#@time map(fetch, (@spawnat p fill!(localpart(a), p)) for p in procs(a))

In [None]:
typeof(a)

In [22]:
@time asyncmap(fetch, (@spawnat p give_my_id_parallel!(localpart(a), p)) for p in procs(a))

1×1×3 Array{Void,3}:
[:, :, 1] =
 nothing

[:, :, 2] =
 nothing

[:, :, 3] =
 nothing

 11.422966 seconds (270.65 k allocations: 14.489 MiB)


In [None]:
#@time [@spawnat p println(localpart(a)) for p in procs(a)]

In [None]:
#@time map(fetch, (@spawnat p println(localpart(a))) for p=1:nworkers())

In [None]:
@time [@spawnat p println(size(localpart(a))) for p in procs(a)]

In [None]:
@time [@spawnat p println(typeof(localpart(a))) for p in procs(a)]

In [None]:
procs(a)

In [23]:
map(fetch, (@spawnat p size(localpart(a))) for p=procs(a) )

1×1×3 Array{Tuple{Int64,Int64,Int64},3}:
[:, :, 1] =
 (100, 100, 1)

[:, :, 2] =
 (100, 100, 1)

[:, :, 3] =
 (100, 100, 1)

In [24]:
a[:,:,1]

100×100 SubArray{Float64,2,DistributedArrays.DArray{Float64,3,Array{Float64,3}},Tuple{Base.Slice{Base.OneTo{Int64}},Base.Slice{Base.OneTo{Int64}},Int64},false}:
 4.0  4.0  4.0  4.0  4.0  4.0  4.0  4.0  …  4.0  4.0  4.0  4.0  4.0  4.0  4.0
 4.0  4.0  4.0  4.0  4.0  4.0  4.0  4.0     4.0  4.0  4.0  4.0  4.0  4.0  4.0
 4.0  4.0  4.0  4.0  4.0  4.0  4.0  4.0     4.0  4.0  4.0  4.0  4.0  4.0  4.0
 4.0  4.0  4.0  4.0  4.0  4.0  4.0  4.0     4.0  4.0  4.0  4.0  4.0  4.0  4.0
 4.0  4.0  4.0  4.0  4.0  4.0  4.0  4.0     4.0  4.0  4.0  4.0  4.0  4.0  4.0
 4.0  4.0  4.0  4.0  4.0  4.0  4.0  4.0  …  4.0  4.0  4.0  4.0  4.0  4.0  4.0
 4.0  4.0  4.0  4.0  4.0  4.0  4.0  4.0     4.0  4.0  4.0  4.0  4.0  4.0  4.0
 4.0  4.0  4.0  4.0  4.0  4.0  4.0  4.0     4.0  4.0  4.0  4.0  4.0  4.0  4.0
 4.0  4.0  4.0  4.0  4.0  4.0  4.0  4.0     4.0  4.0  4.0  4.0  4.0  4.0  4.0
 4.0  4.0  4.0  4.0  4.0  4.0  4.0  4.0     4.0  4.0  4.0  4.0  4.0  4.0  4.0
 4.0  4.0  4.0  4.0  4.0  4.0  4.0  4.0  …  4.0  4.0  4.0  

In [25]:
a[:,:,2]

100×100 SubArray{Float64,2,DistributedArrays.DArray{Float64,3,Array{Float64,3}},Tuple{Base.Slice{Base.OneTo{Int64}},Base.Slice{Base.OneTo{Int64}},Int64},false}:
 9.0  9.0  9.0  9.0  9.0  9.0  9.0  9.0  …  9.0  9.0  9.0  9.0  9.0  9.0  9.0
 9.0  9.0  9.0  9.0  9.0  9.0  9.0  9.0     9.0  9.0  9.0  9.0  9.0  9.0  9.0
 9.0  9.0  9.0  9.0  9.0  9.0  9.0  9.0     9.0  9.0  9.0  9.0  9.0  9.0  9.0
 9.0  9.0  9.0  9.0  9.0  9.0  9.0  9.0     9.0  9.0  9.0  9.0  9.0  9.0  9.0
 9.0  9.0  9.0  9.0  9.0  9.0  9.0  9.0     9.0  9.0  9.0  9.0  9.0  9.0  9.0
 9.0  9.0  9.0  9.0  9.0  9.0  9.0  9.0  …  9.0  9.0  9.0  9.0  9.0  9.0  9.0
 9.0  9.0  9.0  9.0  9.0  9.0  9.0  9.0     9.0  9.0  9.0  9.0  9.0  9.0  9.0
 9.0  9.0  9.0  9.0  9.0  9.0  9.0  9.0     9.0  9.0  9.0  9.0  9.0  9.0  9.0
 9.0  9.0  9.0  9.0  9.0  9.0  9.0  9.0     9.0  9.0  9.0  9.0  9.0  9.0  9.0
 9.0  9.0  9.0  9.0  9.0  9.0  9.0  9.0     9.0  9.0  9.0  9.0  9.0  9.0  9.0
 9.0  9.0  9.0  9.0  9.0  9.0  9.0  9.0  …  9.0  9.0  9.0  

In [26]:
a[:,:,3]

100×100 SubArray{Float64,2,DistributedArrays.DArray{Float64,3,Array{Float64,3}},Tuple{Base.Slice{Base.OneTo{Int64}},Base.Slice{Base.OneTo{Int64}},Int64},false}:
 16.0  16.0  16.0  16.0  16.0  16.0  …  16.0  16.0  16.0  16.0  16.0  16.0
 16.0  16.0  16.0  16.0  16.0  16.0     16.0  16.0  16.0  16.0  16.0  16.0
 16.0  16.0  16.0  16.0  16.0  16.0     16.0  16.0  16.0  16.0  16.0  16.0
 16.0  16.0  16.0  16.0  16.0  16.0     16.0  16.0  16.0  16.0  16.0  16.0
 16.0  16.0  16.0  16.0  16.0  16.0     16.0  16.0  16.0  16.0  16.0  16.0
 16.0  16.0  16.0  16.0  16.0  16.0  …  16.0  16.0  16.0  16.0  16.0  16.0
 16.0  16.0  16.0  16.0  16.0  16.0     16.0  16.0  16.0  16.0  16.0  16.0
 16.0  16.0  16.0  16.0  16.0  16.0     16.0  16.0  16.0  16.0  16.0  16.0
 16.0  16.0  16.0  16.0  16.0  16.0     16.0  16.0  16.0  16.0  16.0  16.0
 16.0  16.0  16.0  16.0  16.0  16.0     16.0  16.0  16.0  16.0  16.0  16.0
 16.0  16.0  16.0  16.0  16.0  16.0  …  16.0  16.0  16.0  16.0  16.0  16.0
 16.0  16.0  1

## Let's use SharedArrays instead

In [40]:
@time b = SharedArray(zeros(dim1,dim2,dim3));

  0.006387 seconds (469 allocations: 250.453 KiB)


In [41]:
@everywhere function give_my_id!(x::SharedArray{Float64,3}, indexDim3)
    for indexDim2 = 1:dim2
        for indexDim1 = 1:dim1
            x[indexDim1, indexDim2, indexDim3] = myid()^2
            #sleep (0.0) still takes:
            #0.000236 seconds (37 allocations: 800 bytes)
            sleep(0.0)
        end
    end
end

In [43]:
@time asyncmap(fetch, (@spawnat p give_my_id!(b, p)) for p=1:nworkers())

3-element Array{Void,1}:
 nothing
 nothing
 nothing

 12.662117 seconds (129.72 k allocations: 6.112 MiB)


In [35]:
b

100×100×3 SharedArray{Float64,3}:
[:, :, 1] =
 1.0  1.0  1.0  1.0  1.0  1.0  1.0  1.0  …  1.0  1.0  1.0  1.0  1.0  1.0  1.0
 1.0  1.0  1.0  1.0  1.0  1.0  1.0  1.0     1.0  1.0  1.0  1.0  1.0  1.0  1.0
 1.0  1.0  1.0  1.0  1.0  1.0  1.0  1.0     1.0  1.0  1.0  1.0  1.0  1.0  1.0
 1.0  1.0  1.0  1.0  1.0  1.0  1.0  1.0     1.0  1.0  1.0  1.0  1.0  1.0  1.0
 1.0  1.0  1.0  1.0  1.0  1.0  1.0  1.0     1.0  1.0  1.0  1.0  1.0  1.0  1.0
 1.0  1.0  1.0  1.0  1.0  1.0  1.0  1.0  …  1.0  1.0  1.0  1.0  1.0  1.0  1.0
 1.0  1.0  1.0  1.0  1.0  1.0  1.0  1.0     1.0  1.0  1.0  1.0  1.0  1.0  1.0
 1.0  1.0  1.0  1.0  1.0  1.0  1.0  1.0     1.0  1.0  1.0  1.0  1.0  1.0  1.0
 1.0  1.0  1.0  1.0  1.0  1.0  1.0  1.0     1.0  1.0  1.0  1.0  1.0  1.0  1.0
 1.0  1.0  1.0  1.0  1.0  1.0  1.0  1.0     1.0  1.0  1.0  1.0  1.0  1.0  1.0
 1.0  1.0  1.0  1.0  1.0  1.0  1.0  1.0  …  1.0  1.0  1.0  1.0  1.0  1.0  1.0
 1.0  1.0  1.0  1.0  1.0  1.0  1.0  1.0     1.0  1.0  1.0  1.0  1.0  1.0  1.0
 1.0  1.0  1.0  1.

## Test

In [21]:
asyncmap

asyncmap (generic function with 4 methods)