In [13]:
using Distributed
using Base.Threads
@everywhere begin
    using BenchmarkTools
    using ParallelTemperingMonteCarlo
end
#addprocs(nthreads())

8-element Vector{Int64}:
 18
 19
 20
 21
 22
 23
 24
 25

In order to use a function of several variables in the pmap environment we require a curry function

In [6]:
@everywhere curry(f,y) = x -> f(x,y)
@everywhere add_xy(x,y) = x + y 


In [12]:
pmap(curry(add_xy,10), 1:5)


5-element Vector{Int64}:
 11
 12
 13
 14
 15

NB spawning processes is expensive and _not recommended at all_ for simple loops and functions.

# Sync macros

sync requires all tasks inside to complete before moving on, async moves right on along without waiting, 

In [20]:
@sync begin
    sleep(2)
    println("slept for two")

    @async begin 
        sleep(5)
        println("nice and rested")
    end
#the async wrapper skips straight to done
    println("done")
end

slept for two
done


nice and rested


We'll simulate a complex process with a 2 second sleep

In [22]:
function simtest(x)
    sleep(2)
    return x
end

simtest (generic function with 1 method)

In [30]:
@time begin 
    veccy = []
    for i = 1:10

        y = simtest(i)
        push!(veccy,y)
        
    end
    println(veccy)
end

@time begin 
    @sync for i = 1:10
        veccy = []
        @async begin 
            y = simtest(i)
            push!(veccy,y)
        end
    end
    println(veccy)
end

Any[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
 20.016610 seconds (1.16 k allocations: 35.969 KiB)


Any[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
  2.024840 seconds (2.64 k allocations: 133.564 KiB, 1.09% compilation time)


NB: Asynchronous tasks are not parallel, demonstrated above they do boost performance for large operations. Use @spawn not @async for parallel operations

Let's test a data unsafe operation

In [42]:
function printtest(x::Bool = true)
    println("begin")
    if x == true
        @sync for i=1:10
            @spawn println("$i $i $i $i $i ")
        end
    else
        for i=1:10
            println("$i $i $i $i $i ")
        end
    end
    println("end")
end

printtest (generic function with 2 methods)

In [44]:
@time printtest()
println("done")
@time printtest(false)
println("done")
@time begin 
    println("begin")
    @threads for i=1:10
        println("$i $i $i $i $i ")
    end
    println("end")
end

begin
      From worker 3:	6 6 6 6 6 
      From worker 4:	7 7 7 7 7 
      From worker 2:	5 5 5 5 5 
      From worker 5:	8 8 8 8 8 
      From worker 7:	10 10 10 10 10 
      From worker 6:	9 9 9 9 9 


      From worker 22:	1 1 1 1 1 
      From worker 24:	3 3 3 3 3 
      From worker 23:	2 2 2 2 2 
      From worker 25:	4 4 4 4 4 


end
  3.457296 seconds (2.62 k allocations: 123.656 KiB)
done
begin
1 1 1 1 1 
2 2 2 2 2 
3 3 3 3 3 
4 4 4 4 4 
5 5 5 5 5 
6 6 6 6 6 
7 7 7 7 7 
8 8 8 8 8 
9 9 9 9 9 
10 10 10 10 10 
end
  0.000178 seconds (517 allocations: 17.969 KiB)
done
begin
7 7 7 7 7 
1 1 1 1 1 
2 2 2 2 2 
10 10 10 10 10 
3 3 3 3 3 
4 4 4 4 4 
5 5 5 5 5 
8 8 8 8 8 
6 6 6 6 6 
9 9 9 9 9 
end
  0.023980 seconds (18.15 k allocations: 1.024 MiB, 96.25% compilation time)


# NB 
Categorically do not use @spawn where data ordering is relevant. Additionally, it is considered bad practice to parallelise any operation faster than 100 $\mu m$ for threads or 100ms for spawning as it does not increase speed enough to compensate for the time taken to spawn the operation.

Below we test specific functions for this purpose.

## Onwards, time-testing some functions

In [59]:
n_atoms = 13

# temperature grid
ti = 5.
tf = 16.
n_traj = 32

temp = TempGrid{n_traj}(ti,tf) 

# MC simulation details
mc_cycles = 300000 #default 20% equilibration cycles on top
mc_sample = 1  #sample every mc_sample MC cycles

#move_atom=AtomMove(n_atoms) #move strategy (here only atom moves, n_atoms per MC cycle)
displ_atom = 0.1 # Angstrom
n_adjust = 100

max_displ_atom = [0.1*sqrt(displ_atom*temp.t_grid[i]) for i in 1:n_traj]

mc_params = MCParams(mc_cycles, n_traj, n_atoms, mc_sample = mc_sample, n_adjust = n_adjust)

#moves - allowed at present: atom, volume and rotation moves (volume,rotation not yet implemented)
move_strat = MoveStrategy(atom_moves = n_atoms)  

#ensemble
ensemble = NVT(n_atoms)

#ELJpotential for neon
#c1=[-10.5097942564988, 0., 989.725135614556, 0., -101383.865938807, 0., 3918846.12841668, 0., -56234083.4334278, 0., 288738837.441765]
#elj_ne1 = ELJPotential{11}(c1)

c=[-10.5097942564988, 989.725135614556, -101383.865938807, 3918846.12841668, -56234083.4334278, 288738837.441765]
pot = ELJPotentialEven{6}(c)

#starting configurations
#icosahedral ground state of Ne13 (from Cambridge cluster database) in Angstrom
pos_ne13 = [[2.825384495892464, 0.928562467914040, 0.505520149314310],
[2.023342172678102,	-2.136126268595355, 0.666071287554958],
[2.033761811732818,	-0.643989413759464, -2.133000349161121],
[0.979777205108572,	2.312002562803556, -1.671909307631893],
[0.962914279874254,	-0.102326586625353, 2.857083360096907],
[0.317957619634043,	2.646768968413408, 1.412132053672896],
[-2.825388342924982, -0.928563755928189, -0.505520471387560],
[-0.317955944853142, -2.646769840660271, -1.412131825293682],
[-0.979776174195320, -2.312003751825495, 1.671909138648006],
[-0.962916072888105, 0.102326392265998,	-2.857083272537599],
[-2.023340541398004, 2.136128558801072,	-0.666071089291685],
[-2.033762834001679, 0.643989905095452, 2.132999911364582],
[0.000002325340981,	0.000000762100600, 0.000000414930733]]

#convert to Bohr
AtoBohr = 1.8897259886
pos_ne13 = pos_ne13 * AtoBohr

length(pos_ne13) == n_atoms || error("number of atoms and positions not the same - check starting config")

#boundary conditions 
bc_ne13 = SphericalBC(radius=5.32*AtoBohr)   #5.32 Angstrom

#starting configuration
start_config = Config(pos_ne13, bc_ne13)

#histogram information
n_bin = 100
#en_min = -0.006    #might want to update after equilibration run if generated on the fly
#en_max = -0.001    #otherwise will be determined after run as min/max of sampled energies (ham vector)

#construct array of MCState (for each temperature)
mc_states = [MCState(temp.t_grid[i], temp.beta_grid[i], start_config, pot; max_displ=[max_displ_atom[i],0.01,1.]) for i in 1:n_traj]

#results = Output(n_bin, max_displ_vec)
results = Output{Float64}(n_bin; en_min = mc_states[1].en_tot)


Output{Float64}(100, 0.0, 0.0, Float64[], Float64[], Float64[], Vector{Float64}[], Float64[], Float64[], Float64[], Float64[], Float64[])

above we just define the 13 atom system, below we show that threading halves the time taken to complete one mc_step per trajectory.

In [60]:
@btime begin 
    for i in 1:mc_params.n_traj
        mc_step!(mc_states[i],pot,ensemble,1,0,0);
    end
end

@btime begin 
    @threads for i in 1:mc_params.n_traj
        mc_step!(mc_states[i],pot,ensemble,1,0,0);
    end
end


  13.672 μs (98 allocations: 13.09 KiB)


  8.018 μs (107 allocations: 15.73 KiB)
