In [1]:
using Distributed
# Script for investigating amplitude dependence of potential
@everywhere using Distributions
@everywhere using Test
@everywhere using StatsBase
@everywhere using BenchmarkTools
@everywhere using LinearAlgebra
@everywhere using LaTeXStrings
using Primes
using MCMCDiagnostics
using SharedArrays
using Plots
pyplot()

# Hack for fixing remote channels in 1.0.3 commit 099e826241
@everywhere struct Hack end
function fixRC()
    for p in workers()
        @fetchfrom p Hack()
    end
end

fixRC (generic function with 1 method)

In [2]:
@everywhere src_path = "../Source/"
@everywhere include(src_path*"types.jl")
@everywhere include(src_path*"functions_msc.jl")
@everywhere include(src_path*"functions_neighbors.jl")
@everywhere include(src_path*"functions_types.jl")
@everywhere include(src_path*"functions_energy.jl")
@everywhere include(src_path*"functions_mc.jl")
@everywhere include(src_path*"functions_thermalization.jl")
@everywhere include(src_path*"functions_observables.jl")
@everywhere include(src_path*"functions_symmetries.jl")
include(src_path*"functions_plots_and_files.jl")

makeDirRec (generic function with 1 method)

In [3]:
@everywhere mutable struct Replica
    ψ::State
    state::Int64
    En::Float64
    sim::Controls
end

In [4]:
A = [1,2,3,4]
p = sortperm(A)
println(p)

[1, 2, 3, 4]


In [5]:
function Replica(ψ::State, sim::Controls)
    Replica(ψ, 3, E(ψ), sim)
end

PTRun

# Parallelizing Monte-Carlo Sweeps across multiple replicas

The challenge here is to make it so that replicas are not continuously copied to new processes when this needs to be done.

In [6]:
# Make test state.

g = 1.0    # Gauge coupling
ν = 0.3    # Anisotropy
# Other parameters
L = 24     # System length
L₃ = 24
N = L^2*L₃
T_list = [1.64, 1.63, 1.6, 1.61, 1.62, 1.65]
κ₅ = 1.0
f = 0.0/L
syst_list = [SystConstants(L, L₃, 1/g^2, ν, κ₅, f, 1/temp) for temp in T_list]
ψ_list = [State(1, syst; u⁺=1.0, u⁻=0.0) for syst in syst_list]
sim_list = [Controls() for i = 1:length(T_list)]
pt = PTRun(ψ_list, sim_list, 1; verbose=true)
rep_list = [Replica(ψ_list[i], sim_list[i]) for i = 1:length(ψ_list)]
println(pt.N_temp)
println("Temperatures: $([1/R.ψ.consts.β for R in pt.rep_list])")

6
Temperatures: [1.6, 1.61, 1.62, 1.63, 1.64, 1.65]


In [7]:
@everywhere function nMCS!(R::Replica, n::Int64)
    En = R.En
    for i=1:n
        En += mcSweepEn!(R.ψ,R.sim)
    end
    R.En = En
    return R
end

# Fast implementation of distributed lists
We create a new type that takes a normal 1-d Array of any type, distributes it evenly on available processors, and then enables fast parallel processing of the elements in the array, without the need of copying the element data to each process for each parallel execution.

In [8]:
range(1; length=10)

1:10

In [9]:
function getChunckRanges(n::T, nw::Int64=nprocs()-1) where T <: Int
    chunck_min, pluss_num = divrem(n, nw)
    chunck_ranges = Array{UnitRange{Int64}, 1}(undef, nw)
    for i = 1:pluss_num
        chunck_ranges[i] = range((i-1)*(chunck_min+1)+1; length=chunck_min+1)
    end
    for i = pluss_num+1:nw
        chunck_ranges[i] = range(pluss_num*(chunck_min+1)+(i-1-pluss_num)*chunck_min+1; length=chunck_min)
    end
    return chunck_ranges
end

getChunckRanges (generic function with 2 methods)

In [10]:
length(workers())

2

In [11]:
@everywhere struct DLChunck{T}
    p::Int64
    chan::RemoteChannel{Channel{Array{T,1}}}
end

In [12]:
function distribute(r_list::Array{T,1}; pids = workers()) where {T}
    n = length(r_list)
    nw = length(pids)
    chunck_ranges = getChunckRanges(n, nw)
    
    chunck_list = Array{DLChunck, 1}(undef, nw)
    for (i, p) = enumerate(pids)
        chunck_list[i] = DLChunck(p, RemoteChannel(()->Channel{Array{T,1}}(1), p))
        put!(chunck_list[i].chan, r_list[chunck_ranges[i]])
    end
    chunck_list
end

distribute (generic function with 1 method)

In [13]:
struct DList
    chunck_list::Array{DLChunck, 1}
    elem_pr_chunck::Array{Int64, 1}
    chuncks::Int64
end
import Base.length
function length(dlist::DList)
    l = 0
    for i = 1:dlist.n
        l += elem_pr_chunck[i]
    end
    return l
end
function DList(r_list::Array{Replica, 1})
    chunck_list = distribute(r_list)
    chuncks = length(chunck_list)
    elem_pr_chunck = Array{Int64, 1}(undef, chuncks)
    for i = 1:chuncks
        elem_pr_chunck[i] = length(fetch(chunck_list[i].chan))
    end
    DList(chunck_list, elem_pr_chunck, chuncks)
end
@everywhere function dmap(f::Function, chan::RemoteChannel{Channel{Array{T, 1}}}) where {T}
    local_list = fetch(chan)
    [f(el) for el in local_list]
end
function dmap(f::Function, dlist::DList)
    chuncks = dlist.chuncks
    futures = Array{Future, 1}(undef, chuncks)
    
    for (i, ck) = enumerate(dlist.chunck_list)
        futures[i] = @spawnat ck.p dmap(f, ck.chan)
    end
    vcat([fetch(futures[i]) for i = 1:chuncks]...)
end
        

dmap (generic function with 2 methods)

In [14]:
@everywhere function dmutate(f!::Function, channel::RemoteChannel{Channel{Array{T, 1}}}) where {T}
    local_list = take!(channel)
    for el in local_list
        f!(el)
    end
    put!(channel, local_list)
    nothing
end
function dmutate(f!::Function, list::DList)
    chuncks = list.chuncks
    futures = Array{Future, 1}(undef, chuncks)
    
    for (i, ck) = enumerate(list.chunck_list)
        futures[i] = @spawnat ck.p dmutate(f!, ck.chan)
    end
    for i = 1:chuncks
        wait(futures[i])
    end
    nothing
end

dmutate (generic function with 2 methods)

In [15]:
@everywhere function impureMutate(f!::Function, chan::RemoteChannel{Channel{Array{T, 1}}}) where {T}
    local_list = take!(chan)
    res_list = [f!(el) for el in local_list]
    put!(chan, local_list)
    res_list
end
function impureMutate(f!::Function, dlist::DList)
    chuncks = dlist = chuncks
    futures = Array{Future, 1}(undef, chuncks)
    
    for (i, ck) = enumerate(dlist.chunck_list)
        futures[i] = @spawnat ck.p impureMutat(f!, ck.chan)
    end
    vcat([fetch(futures[i]) for i = 1:chuncks]...)
end

impureMutate (generic function with 2 methods)

In [34]:
println("Energies: $([R.En/N for R in rep_list])")

Energies: [-3.0, -3.0, -3.0, -3.0, -3.0, -3.0]


In [20]:
fixRC()
ck_list = distribute(rep_list)
length(fetch(ck_list[1].chan))

3

In [21]:
@everywhere function nMCS(remote_r_list::RemoteChannel{Channel{Array{Replica,1}}}, n::T) where T <: Int
    r_list = take!(remote_r_list)
    for R in r_list
        nMCS!(R, n)
    end
    put!(remote_r_list, r_list)
    nothing
end
function nMCS(dl_list::Array{DLChunck, 1}, n::I) where I <: Int
    chuncks = length(dl_list)
    futures = Array{Future, 1}(undef, chuncks)
    
    for (i, dl) = enumerate(dl_list)
        futures[i] = @spawnat dl.p nMCS(dl.chan, n)
    end
    for i = 1:chuncks
        wait(futures[i])
    end
    nothing
end

nMCS (generic function with 3 methods)

In [23]:
nMCS(ck_list, 100)

In [31]:
function localize(chunck_list::Array{DLChunck, 1})
    return vcat([fetch(ck.chan) for ck in chunck_list]...)
end
function localize(dlist::DList)
    localize(dlist.chunck_list)
end

localize (generic function with 3 methods)

In [32]:
new_rep_list = localize(ck_list)
println("Energies: $([R.En/N for R in new_rep_list])")

Energies: [-1.85492, -1.89984, -1.92137, -1.91085, -1.90519, -1.84576]


In [33]:
@benchmark nMCS(ck_list, 1)

BenchmarkTools.Trial: 
  memory estimate:  9.25 KiB
  allocs estimate:  217
  --------------
  minimum time:     130.842 ms (0.00% GC)
  median time:      164.371 ms (0.00% GC)
  mean time:        159.443 ms (0.00% GC)
  maximum time:     176.189 ms (0.00% GC)
  --------------
  samples:          32
  evals/sample:     1

YEAH! Det funket!

In [36]:
dlist = DList(rep_list);

DList(DLChunck[DLChunck{Replica}(2, RemoteChannel{Channel{Array{Replica,1}}}(2, 1, 439)), DLChunck{Replica}(3, RemoteChannel{Channel{Array{Replica,1}}}(3, 1, 442))], [3, 3], 2)

In [37]:
dmutate(R -> nMCS!(R, 100), dlist)

In [38]:
new_rep_list = localize(dlist)
println("Energies: $([R.En/N for R in new_rep_list])")

Energies: [-1.84713, -1.8754, -1.93635, -1.94505, -1.91681, -1.80607]


In [39]:
@benchmark dmutate(R -> nMCS!(R, 1), dlist)

BenchmarkTools.Trial: 
  memory estimate:  11.22 KiB
  allocs estimate:  249
  --------------
  minimum time:     116.413 ms (0.00% GC)
  median time:      160.119 ms (0.00% GC)
  mean time:        149.264 ms (0.00% GC)
  maximum time:     176.955 ms (0.00% GC)
  --------------
  samples:          34
  evals/sample:     1

In [40]:
@everywhere function E(R::Replica)
    R.En
end

In [43]:
@benchmark dmap(E, dlist)

BenchmarkTools.Trial: 
  memory estimate:  11.66 KiB
  allocs estimate:  275
  --------------
  minimum time:     399.994 μs (0.00% GC)
  median time:      527.650 μs (0.00% GC)
  mean time:        531.174 μs (1.51% GC)
  maximum time:     26.140 ms (66.27% GC)
  --------------
  samples:          9315
  evals/sample:     1

# Old slow naive methods of parallelization

In [17]:
@benchmark mcSweepEn!(pt.rep_list[1].ψ, pt.rep_list[1].sim)

BenchmarkTools.Trial: 
  memory estimate:  2.74 MiB
  allocs estimate:  69120
  --------------
  minimum time:     30.019 ms (0.00% GC)
  median time:      30.357 ms (0.00% GC)
  mean time:        30.881 ms (1.38% GC)
  maximum time:     38.660 ms (13.57% GC)
  --------------
  samples:          162
  evals/sample:     1

In [17]:
@benchmark pmap(R -> nMCS!(R, pt.N_mc), pt.rep_list)

BenchmarkTools.Trial: 
  memory estimate:  178.14 MiB
  allocs estimate:  8248645
  --------------
  minimum time:     1.333 s (15.45% GC)
  median time:      1.397 s (18.90% GC)
  mean time:        1.391 s (18.91% GC)
  maximum time:     1.436 s (22.13% GC)
  --------------
  samples:          4
  evals/sample:     1

In [10]:
# --------------------------------------------------------------------------------------------------
# Preform nMCS! on a list of replicas (as much as possible in paralllel)
function nMCS!(rep_list::Array{Replica, 1}, n::Int64)
    nw = nprocs()-1
    n_state = length(rep_list)
    
    i = 0 # Index in rep_list of states already updated.
    while i < n_state
        
        worker_jobs = min(nw, n_state-1-i) # Number of needed jobs given to workers
        # Start the max number of workers if that wouldn't be too much.
        work_futures = [Future() for w = 1:worker_jobs]
        
        for w = 1:worker_jobs
            work_futures[w] = @spawn nMCS!(rep_list[i+w], n)
        end
        index = i+worker_jobs+1
        rep_list[index] =  nMCS!(rep_list[index], n)
        
        for w = 1:worker_jobs
            rep_list[i+w] = fetch(work_futures[w])
        end
        
        i += worker_jobs+1
    end
    
    # After this, all states should have been updated
    return
end

nMCS! (generic function with 3 methods)

# Continuing developing parallel tempering type

In [None]:
@everywhere mutable struct PTRun
    rep_list::Array{Replica,1}
    accepts::Array{Int64,1}     # Number of accepted PT-swaps between i and i+1
    histograms::Array{Array{Int64, 1},1}
    N_mc::Int64                 # Number of MC sweeps to do between each PT step.
    N_pt::Int64                 # Number of PT steps done
    N_temp::Int64               # Number of temperatures.
end
function PTRun(ψ_list::Array{State,1}, sim_list::Array{Controls,1}, N_mc::Int64; verbose=false)
    N = length(ψ_list)
    N == length(sim_list) || throw(error("ERROR: List of states and control constants are not same length"))
    
    # Sort ψ_list according to increasing temperature
    perm = sortperm([ψ.consts.β for ψ in ψ_list]; rev=true)
    if verbose && perm != [i for i = 1:N]
        println("WARNING: Inserted state list did not have states in increasing temperature")
    end
    ψ_list = ψ_list[perm]
    sim_list = sim_list[perm]
        
    accepts = [0 for i = 1:N-1]
    rep_list = [Replica(ψ_list[i], sim_list[i]) for i = 1:N]
    rep_list[1].state = 1    # The lowest temperature replica is moving up
    rep_list[N].state = 2    # The highest temperature replica is moving down.
    histograms = [[0,0,0] for i = 1:N]
    return PTRun(rep_list, accepts, histograms, N_mc, 0, N)
end

In [6]:
function incrementHistograms!(pt::PTRun)
    for i = 1:pt.N_temp
        histogram = pt.histograms[i]
        replica_state = pt.rep_list[i].state
        histogram[replica_state] += 1
    end
end

incrementHistograms! (generic function with 1 method)

In [7]:
function ptSwap!(rep_list::Array{Replica,1}, i::Int64, j::Int64)
    ψⱼ = rep_list[j].ψ
    ψᵢ = rep_list[i].ψ
    
    # First we swap the temperatures of the two replicas
    temp = ψⱼ.consts
    ψⱼ.consts = ψᵢ.consts
    ψᵢ.consts = temp

    # Then we swap the placement of the replicas in the list, thus keeping the position of
    # the temperatures in the list constant.
    R_temp = rep_list[i]
    rep_list[i] = rep_list[j]
    rep_list[j] = R_temp
    return
end

ptSwap! (generic function with 1 method)

In [8]:
function attemptSwap(rep_list::Array{Replica,1}, i::Int64, j::Int64)
    ψⱼ = rep_list[j].ψ
    ψᵢ = rep_list[i].ψ
    Δβ = ψⱼ.consts.β-ψᵢ.consts.β
    ΔE = rep_list[j].En - rep_list[i].En
    
    r = 1-rand() # make a number ∈ (0, 1]
    # Same method as metropolis-hasting selection: to select with probability min{1, e^{Δβ⋅ΔE}}
    # We could choose a random number r ∈ [0, 1] and check if it's lower than r <= e^{Δβ⋅ΔE} provided
    # the exponent is negative. Taking the logarithm on both sides we see that in this case then also
    # log(r) <= Δβ⋅ΔE. This has the advantage that since r ∈ (0,1], log(r) will always be negative
    # and thus we always select if Δβ⋅ΔE is positive, as we should.
    if log(r) <= Δβ*ΔE
        ptSwap!(rep_list, i, j)
        return true
    end
    return false
end

attemptSwap (generic function with 1 method)

In [25]:
println([E(R.ψ) for R in pt.rep_list])
nMCS!(pt.rep_list, 1000)
println([E(R.ψ) for R in pt.rep_list])

[-7171.9, -386.738, 6338.62]
[-3030.58, 4407.93, 11968.4]


In [43]:
println([R.En for R in pt.rep_list])

[-41472.0, -41472.0, -41472.0]


In [11]:
function PTStep!(pt::PTRun)
    # First we preform N_mc MC sweeps for all of the replicas 
    nMCS!(pt.rep_list, pt.N_mc)
    
    # Before a squence of swap moves we increment the histograms
    incrementHistograms!(pt)
    
    # Then we go through the temperatures and make a swap with probability
    # p = min{1, e^(-Δβ*ΔE)} between neighboring temperatures.
    for i = 1:pt.N_temp-1
        if attemptSwap(pt.rep_list, i, i+1)
            # Updates acceptance rates if successful
            pt.accepts[i] += 1
        end
    end
    
    # Finally increment number of PT steps done and set the end states
    pt.N_pt += 1
    pt.rep_list[1].state = 1
    pt.rep_list[pt.N_temp].state = 2
    
    return
end

PTStep! (generic function with 1 method)

In [15]:
[R.En/N for R in pt.rep_list]

5-element Array{Float64,1}:
 -3.0
 -3.0
 -3.0
 -3.0
 -3.0

In [16]:
pt.N_mc

100

In [19]:
for i = 1:70
    PTStep!(pt)
end

In [12]:
@benchmark nMCS!(pt.rep_list, pt.N_mc)

BenchmarkTools.Trial: 
  memory estimate:  328.40 MiB
  allocs estimate:  15042258
  --------------
  minimum time:     3.897 s (11.97% GC)
  median time:      3.907 s (11.52% GC)
  mean time:        3.907 s (11.52% GC)
  maximum time:     3.917 s (11.07% GC)
  --------------
  samples:          2
  evals/sample:     1

In [13]:
@benchmark PTStep!(pt)

BenchmarkTools.Trial: 
  memory estimate:  328.40 MiB
  allocs estimate:  15042276
  --------------
  minimum time:     3.794 s (8.67% GC)
  median time:      3.948 s (11.25% GC)
  mean time:        3.948 s (11.25% GC)
  maximum time:     4.103 s (13.62% GC)
  --------------
  samples:          2
  evals/sample:     1

In [20]:
pt.histograms

6-element Array{Array{Int64,1},1}:
 [236, 0, 0] 
 [211, 21, 4]
 [177, 56, 3]
 [53, 181, 2]
 [11, 224, 1]
 [0, 236, 0] 

In [38]:
pt.N_pt

7

In [21]:
function getARList(pt::PTRun)
    return [an/(pt.N_pt) for an in pt.accepts]
end

getARList (generic function with 1 method)

In [22]:
function getHistograms(pt::PTRun)
    return [hist[1]/(hist[1]+hist[2]) for hist in pt.histograms]
end

getHistograms (generic function with 1 method)

In [23]:
getHistograms(pt)

6-element Array{Float64,1}:
 1.0                
 0.9094827586206896 
 0.759656652360515  
 0.2264957264957265 
 0.04680851063829787
 0.0                

In [47]:
pt.histograms

4-element Array{Array{Int64,1},1}:
 [7, 0, 0]
 [4, 0, 3]
 [0, 3, 4]
 [0, 7, 0]

In [41]:
pt.accepts

3-element Array{Int64,1}:
 3
 0
 1

In [24]:
getARList(pt).*100

5-element Array{Float64,1}:
 64.40677966101694 
 62.28813559322034 
 43.22033898305085 
 31.779661016949152
 66.10169491525424 

In [25]:
[R.En/N for R in pt.rep_list]

6-element Array{Float64,1}:
 -0.9344024445502813
 -0.9166260126388124
 -0.8872395688279187
 -0.9130433607132877
 -0.8218083248427677
 -0.801200944919485 

In [37]:
pt.histograms[3]

3-element Array{Int64,1}:
 0
 2
 0