###Julia Parallel Workshop
######Viral Shah, Tanmay Mohapatra, Eka Palamadai, Amit Murthy

- The basics
    - @sync/@async
    - remotecall
    - @spawn
- Abstractions in parallel computing
- Distributed Arrays
- MPI
- MessageUtils
- Hadoop




###Basics

In [1]:
# Example: parallel I/O
using HTTPClient.HTTPC

In [2]:
# HTTP GET's executed serially
crawls = []
@time begin
    @time push!(crawls, HTTPC.get("http://www.google.com/"))  
    @time push!(crawls, HTTPC.get("http://www.julialang.org/"))  
    @time push!(crawls, HTTPC.get("http://www.juliacon.org/"))  
    @time push!(crawls, HTTPC.get("http://www.juliabox.org/"))  
end;


 365.920 milliseconds (319 k allocations: 13369 KB)
 355.011 milliseconds (2031 allocations: 139 KB)
 286.999 milliseconds (1602 allocations: 133 KB)
 876.839 milliseconds (1873 allocations: 99744 bytes)
   2.147 seconds      (770 k allocations: 32970 KB, 0.21% gc time)


In [3]:
# Lets make sure we actually downloaded the web page
fieldnames(crawls[1])

5-element Array{Symbol,1}:
 :body      
 :headers   
 :http_code 
 :total_time
 :bytes_recd

In [4]:
Int[x.bytes_recd for x in crawls]

4-element Array{Int64,1}:
 58680
 26486
 32171
  6071

In [5]:
# parallel execution of HTTP calls
# underlying IO is asynchronous
crawls = []
@time begin
    @async push!(crawls, HTTPC.get("http://www.google.com/"))  
    @async push!(crawls, HTTPC.get("http://www.julialang.org/"))  
    @async push!(crawls, HTTPC.get("http://www.juliacon.org/"))  
    @async push!(crawls, HTTPC.get("http://www.juliabox.org/"))  
end;
length(crawls)

  

0

 3.856 milliseconds (93 allocations: 8216 bytes)


In [6]:
# We have the data
length(crawls)

4

In [7]:
# @sync - Wait for all calls to finish
crawls = []
@time @sync begin
    @async push!(crawls, HTTPC.get("http://www.google.com/"))  
    @async push!(crawls, HTTPC.get("http://www.julialang.org/"))  
    @async push!(crawls, HTTPC.get("http://www.juliacon.org/"))  
    @async push!(crawls, HTTPC.get("http://www.juliabox.org/"))  
end;
println(length(crawls));

 723.057 milliseconds (17584 allocations: 938 KB)
4


In [None]:
# Example : @schedule
# does not localize_vars
# does not add to @sync block.
function foo()
    lvar = "Hello"
    @schedule println(lvar)
    lvar = "World"
    nothing
end;

function bar()
    lvar = "Hello"
    @async println(lvar)
    lvar = "World"
    nothing
end;
    

In [None]:
foo()

In [None]:
bar()

In [None]:
rmprocs(workers())
addprocs(4)

In [None]:
wpid = workers()[1]
rr = remotecall(wpid, ()->(sleep(5.0); rand(5,5)))
isready(rr)

In [None]:
wait(rr);

In [None]:
isready(rr)

In [None]:
fetch(rr)

In [None]:
isready(rr)

In [None]:
take!(rr)

In [None]:
isready(rr)

In [None]:
rr = remotecall_wait(wpid, ()->(sleep(3.0); rand(5,5)))

In [None]:
fetch(rr)

In [None]:
remotecall_fetch(wpid, ()->(sleep(3.0); rand(5,5)))


In [None]:
# round-robin over all workers()
@spawn println("Executing on worker $(myid())");


In [None]:
@spawnat wpid println("Executing on worker $(myid())");

In [None]:
myvar = pi
@everywhere println(myvar)

In [None]:
@spawn println(myvar);

In [None]:
# Smaller compute, large iterations
@parallel (+) for i in 1:10^8
    rand(Bool)
end

In [None]:
# Larger compute, smaller iterations
@everywhere begin
    function mycompute(x)
        sleep(rand())
        myid()
    end
end

results = pmap(mycompute, [3*rand() for x in 1:20]);
transpose(results)