# Julia&ndash;C++ Verification

In [33]:
# Compile and run a 3x3x3 lattice
macro bash_str(s) open(`bash`,"w",stdout) do io; print(io, s); end; end


bash"""

SRC=code
BLD=$SRC/build

g++ -std=c++14 -o $BLD/vortextest $SRC/vortextest.cpp
$BLD/vortextest
"""


--------------- PARTICLE POSITION -------------------------------
#	X1	X2	X3	sigma
1	0.00000	0.00000	0.00000	0.75000	
2	0.50000	0.00000	0.00000	0.75000	
3	1.00000	0.00000	0.00000	0.75000	
4	0.00000	0.50000	0.00000	0.75000	
5	0.50000	0.50000	0.00000	0.75000	
6	1.00000	0.50000	0.00000	0.75000	
7	0.00000	1.00000	0.00000	0.75000	
8	0.50000	1.00000	0.00000	0.75000	
9	1.00000	1.00000	0.00000	0.75000	
10	0.00000	0.00000	0.50000	0.75000	
11	0.50000	0.00000	0.50000	0.75000	
12	1.00000	0.00000	0.50000	0.75000	
13	0.00000	0.50000	0.50000	0.75000	
14	0.50000	0.50000	0.50000	0.75000	
15	1.00000	0.50000	0.50000	0.75000	
16	0.00000	1.00000	0.50000	0.75000	
17	0.50000	1.00000	0.50000	0.75000	
18	1.00000	1.00000	0.50000	0.75000	
19	0.00000	0.00000	1.00000	0.75000	
20	0.50000	0.00000	1.00000	0.75000	
21	1.00000	0.00000	1.00000	0.75000	
22	0.00000	0.50000	1.00000	0.75000	
23	0.50000	0.50000	1.00000	0.75000	
24	1.00000	0.50000	1.00000	0.75000	
25	0.00000	1.00000	1.00000	0.75000	
26	0.50000	1.00000	1.0000

In [1]:
# Compile and load Cxx wrapper
macro bash_str(s) open(`bash`,"w",stdout) do io; print(io, s); end; end

bash"""

SRC=/home/edoalvar/Dropbox/FLOWResearch/LabNotebook/Posts/juliacpp/code
BLD=$SRC/build

# Generates the compiled object libhello.cpp.o:
c++ -DJULIA_ENABLE_THREADING -Dhello_EXPORTS \
-I/home/edoalvar/.julia/packages/CxxWrap/KcmSi/deps/usr/include \
-I/home/edoalvar/Programs/julia-1.0.3/include/julia/ \
-std=c++14 -fPIC -o $BLD/vortextest_jlcxx.cpp.o -c $SRC/vortextest_jlcxx.cpp

# Generate the shared library libhello.so:
c++  -fPIC  -std=c++14  \
-shared -Wl,-soname,vortextest_jlcxx.so \
-o $BLD/vortextest_jlcxx.so \
$BLD/vortextest_jlcxx.cpp.o \
-Wl,-rpath,: \
/home/edoalvar/.julia/packages/CxxWrap/KcmSi/deps/usr/lib/libcxxwrap_julia.so.0.5.1 \
/home/edoalvar/Programs/julia-1.0.3/lib/libjulia.so

"""

# Load the module and generate the functions
module CxxVortexTest
  using CxxWrap
  @wrapmodule("code/build/vortextest_jlcxx")

  function __init__()
    @initcxx
  end
end

Main.CxxVortexTest

In [2]:
ntests = 100                # Tests to run
n = 3                       # Particles per edge
lambda = 1.5                # Core overlap
verbose = true

CxxVortexTest.benchmarkP2P_wrap(ntests, n, Float32(lambda), verbose)

Samples:	100
min time:	0.201006 ms
ave time:	0.23217 ms
max time:	0.398896 ms


0.2010059952735901

In [9]:
ntests = 1000                # Tests to run
n = 5                       # Particles per edge
lambda = 1.5                # Core overlap
verbose = true

CxxVortexTest.benchmarkP2P_wrap(ntests, n, Float32(lambda), verbose)

Samples:	1000
min time:	4.43922 ms
ave time:	4.6219 ms
max time:	9.00731 ms


4.439218044281006

In [32]:
using LinearAlgebra

const const4 = 1/(4*pi)

"""
This is a particle type with properties that are specified
as concrete types
"""
struct Particle{T}

  # User inputs
  X::Array{T, 1}                      # Position
  Gamma::Array{T, 1}                  # Vectorial circulation
  sigma::T                            # Smoothing radius

  # Properties
  U::Array{T, 1}                      # Velocity at particle
  J::Array{T, 2}                      # Jacobian at particle J[i,j]=dUi/dxj

end

Particle{T}(X, Gamma, sigma) where {T} = Particle(X, Gamma, sigma, zeros(T,3), zeros(T, 3, 3))

Base.zero(::Type{<:Particle{T}}) where {T} = Particle(zeros(T, 3), zeros(T, 3),
                                                      zero(T),
                                                      zeros(T, 3), zeros(T, 3, 3))

"Adds particles in a regular box lattice"
function generate_particles(PType, n, lambda; l=1, Gamma=ones(3))
    
    sigma = l/(n-1)*lambda
    particles = fill(zero(PType), n^3)
    
    xs = range(0, stop=l, length=n)

    # Adds particles in a regular lattice
    ind = 1
    for k in 1:n
        for j in 1:n
            for i in 1:n
                X = [xs[i], xs[j], xs[k]]
                particles[ind] = PType(X, Gamma, sigma)
                ind += 1
            end
        end
    end
    
    return particles
end

g_wnklmns(r) = r^3 * (r^2 + 2.5) / (r^2 + 1)^2.5
dgdr_wnklmns(r) = 7.5 * r^2 / (r^2 + 1)^3.5

function P2P_general(particles::Array{Particle{T}}, 
                     g::Function, dgdr::Function) where T

    for Pi in particles 
        for Pj in particles    

            dX = Pi.X - Pj.X
            r = norm(dX)

            if r != 0

                # Regularizing function and deriv
                gsgm = g(r/Pj.sigma)
                dgsgmdr = dgdr(r/Pj.sigma)  

                # K × Γp
                crss = cross(-const4 * dX / r^3, Pj.Gamma) 

                # U = ∑g_σ(x-xp) * K(x-xp) × Γp
                Pi.U[:] += gsgm * crss

                # ∂u∂xj(x) = ∑[ ∂gσ∂xj(x−xp) * K(x−xp)×Γp + gσ(x−xp) * ∂K∂xj(x−xp)×Γp ]
                for j in 1:3
                Pi.J[:, j] += ( dX[j]/(Pj.sigma*r)*dgsgmdr * crss -
                                  gsgm * 3*dX[j]/r^2*crss -
                                  gsgm * const4/r^3 * 
                                  cross([i==j for i in 1:3], Pj.Gamma) )
                end
            end

        end
    end
end

function g_dgdr_wnklmns(r)
  aux0 = (r^2 + 1)^2.5
  
  # Returns g, dgdr
  return r^3 * (r^2 + 2.5) / aux0, 7.5 * r^2 / (aux0*(r^2 + 1))
end

function P2P_FINAL(particles::Array{Particle{T}}, g_dgdr::Function) where T

  for Pi in particles
    for Pj in particles
      
      dX1 = Pi.X[1] - Pj.X[1]
      dX2 = Pi.X[2] - Pj.X[2]
      dX3 = Pi.X[3] - Pj.X[3]
      r = sqrt(dX1*dX1 + dX2*dX2 + dX3*dX3)
            
      if r != 0 

          # Regularizing function and deriv
          gsgm, dgsgmdr = g_dgdr(r/Pj.sigma)

          # K × Γp
          crss1 = -const4 / r^3 * ( dX2*Pj.Gamma[3] - dX3*Pj.Gamma[2] )
          crss2 = -const4 / r^3 * ( dX3*Pj.Gamma[1] - dX1*Pj.Gamma[3] )
          crss3 = -const4 / r^3 * ( dX1*Pj.Gamma[2] - dX2*Pj.Gamma[1] )

          # U = ∑g_σ(x-xp) * K(x-xp) × Γp
          Pi.U[1] += gsgm * crss1
          Pi.U[2] += gsgm * crss2
          Pi.U[3] += gsgm * crss3

          # ∂u∂xj(x) = ∑[ ∂gσ∂xj(x−xp) * K(x−xp)×Γp + gσ(x−xp) * ∂K∂xj(x−xp)×Γp ]
          aux = dgsgmdr/(Pj.sigma*r)* - 3*gsgm /r^2
          # j=1
          Pi.J[1, 1] += aux * crss1 * dX1
          Pi.J[2, 1] += aux * crss2 * dX1
          Pi.J[3, 1] += aux * crss3 * dX1
          # j=2
          Pi.J[1, 2] += aux * crss1 * dX2
          Pi.J[2, 2] += aux * crss2 * dX2
          Pi.J[3, 2] += aux * crss3 * dX2
          # j=3
          Pi.J[1, 3] += aux * crss1 * dX3
          Pi.J[2, 3] += aux * crss2 * dX3
          Pi.J[3, 3] += aux * crss3 * dX3

          # Adds the Kronecker delta term
          aux = - const4 * gsgm / r^3
          # j=1
          Pi.J[2, 1] -= aux * Pj.Gamma[3]
          Pi.J[3, 1] += aux * Pj.Gamma[2]
          # j=2
          Pi.J[1, 2] += aux * Pj.Gamma[3]
          Pi.J[3, 2] -= aux * Pj.Gamma[1]
          # j=3
          Pi.J[1, 3] -= aux * Pj.Gamma[2]
          Pi.J[2, 3] += aux * Pj.Gamma[1]
      end

    end
  end
end

P2P_FINAL (generic function with 1 method)

In [27]:
ntests = 100                # Tests to run
n = 3                       # Particles per edge
lambda = 1.5                # Core overlap
verbose = true

particles = generate_particles(Particle{Float64}, n, lambda)
P2P_general(particles, g_wnklmns, dgdr_wnklmns)

println("--------------- PARTICLE POSITION -------------------------------")
println("#\tX1\tX2\tX3\tsigma")
for (i, P) in enumerate(particles)
    println("$i\t$(P.X[1])\t$(P.X[2])\t$(P.X[3])\t$(P.sigma)")
end


println("\n--------------- U and J -----------------------------------------")
println("#\tU1\tU2\tU3\tJ1\tJ2\tJ3\tJ4\tJ5\tJ6\tJ7\tJ8\tJ9")
for (i, P) in enumerate(particles)
    print("$i")
    for u in P.U; print("\t$(round(u, digits=2))"); end;
    for j in P.J; print("\t$(round(j, digits=2))"); end;
    println("")
end

--------------- PARTICLE POSITION -------------------------------
#	X1	X2	X3	sigma
1	0.0	0.0	0.0	0.75
2	0.5	0.0	0.0	0.75
3	1.0	0.0	0.0	0.75
4	0.0	0.5	0.0	0.75
5	0.5	0.5	0.0	0.75
6	1.0	0.5	0.0	0.75
7	0.0	1.0	0.0	0.75
8	0.5	1.0	0.0	0.75
9	1.0	1.0	0.0	0.75
10	0.0	0.0	0.5	0.75
11	0.5	0.0	0.5	0.75
12	1.0	0.0	0.5	0.75
13	0.0	0.5	0.5	0.75
14	0.5	0.5	0.5	0.75
15	1.0	0.5	0.5	0.75
16	0.0	1.0	0.5	0.75
17	0.5	1.0	0.5	0.75
18	1.0	1.0	0.5	0.75
19	0.0	0.0	1.0	0.75
20	0.5	0.0	1.0	0.75
21	1.0	0.0	1.0	0.75
22	0.0	0.5	1.0	0.75
23	0.5	0.5	1.0	0.75
24	1.0	0.5	1.0	0.75
25	0.0	1.0	1.0	0.75
26	0.5	1.0	1.0	0.75
27	1.0	1.0	1.0	0.75

--------------- U and J -----------------------------------------
#	U1	U2	U3	J1	J2	J3	J4	J5	J6	J7	J8	J9
1	0.0	-0.0	0.0	-0.0	1.15	-1.15	-1.15	0.0	1.15	1.15	-1.15	-0.0
2	0.0	0.81	-0.81	0.0	1.02	-1.02	-1.44	0.56	0.87	1.44	-0.87	-0.56
3	0.0	1.37	-1.37	-0.0	0.22	-0.22	-1.15	0.93	0.22	1.15	-0.22	-0.93
4	-0.81	-0.0	0.81	-0.56	1.44	-0.87	-1.02	-0.0	1.02	0.87	-1.44	0.56
5	-0.95	0.95	0.0	0.0	

In [36]:
isbits(particles)

false

In [30]:
using BenchmarkTools

@benchmark P2P_general(particles, g_wnklmns, dgdr_wnklmns)

BenchmarkTools.Trial: 
  memory estimate:  2.65 MiB
  allocs estimate:  32319
  --------------
  minimum time:     743.877 μs (0.00% GC)
  median time:      847.390 μs (0.00% GC)
  mean time:        1.093 ms (15.60% GC)
  maximum time:     34.148 ms (96.83% GC)
  --------------
  samples:          4560
  evals/sample:     1

In [35]:
using BenchmarkTools

@benchmark P2P_FINAL(particles, g_dgdr_wnklmns)

BenchmarkTools.Trial: 
  memory estimate:  0 bytes
  allocs estimate:  0
  --------------
  minimum time:     57.611 μs (0.00% GC)
  median time:      58.672 μs (0.00% GC)
  mean time:        61.466 μs (0.00% GC)
  maximum time:     171.751 μs (0.00% GC)
  --------------
  samples:          10000
  evals/sample:     1

# Speeding up

In [123]:

struct ParticleNoArr{T}

  # User inputs
  X1::T
  X2::T
  X3::T
  Gamma1::T
  Gamma2::T
  Gamma3::T
  sigma::T

end

Base.zero(::Type{<:ParticleNoArr{T}}) where {T} = ParticleNoArr(zeros(T, 7)...)
ParticleNoArr{T}(X, Gamma, sigma) where T = ParticleNoArr(X..., Gamma..., sigma)


function P2P_preout(particles::Array{ParticleNoArr{T}}, g_dgdr::Function, 
                                    U::Array{T, 2}, J::Array{T, 3}) where T

  for (i, Pi) in enumerate(particles)
    @simd for Pj in particles
            
      @fastmath @inbounds begin
      
          dX1 = Pi.X1 - Pj.X1
          dX2 = Pi.X2 - Pj.X2
          dX3 = Pi.X3 - Pj.X3
          r = sqrt(dX1*dX1 + dX2*dX2 + dX3*dX3)

          if r != 0 

              # Regularizing function and deriv
              gsgm, dgsgmdr = g_dgdr(r/Pj.sigma)

              # K × Γp
              crss1 = -const4 / r^3 * ( dX2*Pj.Gamma3 - dX3*Pj.Gamma2 )
              crss2 = -const4 / r^3 * ( dX3*Pj.Gamma1 - dX1*Pj.Gamma3 )
              crss3 = -const4 / r^3 * ( dX1*Pj.Gamma2 - dX2*Pj.Gamma1 )

              # U = ∑g_σ(x-xp) * K(x-xp) × Γp
              U[1, i] += gsgm * crss1
              U[2, i] += gsgm * crss2
              U[3, i] += gsgm * crss3

              # ∂u∂xj(x) = ∑[ ∂gσ∂xj(x−xp) * K(x−xp)×Γp + gσ(x−xp) * ∂K∂xj(x−xp)×Γp ]
              aux = dgsgmdr/(Pj.sigma*r)* - 3*gsgm /r^2
              # j=1
              J[1, 1, i] += aux * crss1 * dX1
              J[2, 1, i] += aux * crss2 * dX1
              J[3, 1, i] += aux * crss3 * dX1
              # j=2
              J[1, 2, i] += aux * crss1 * dX2
              J[2, 2, i] += aux * crss2 * dX2
              J[3, 2, i] += aux * crss3 * dX2
              # j=3
              J[1, 3, i] += aux * crss1 * dX3
              J[2, 3, i] += aux * crss2 * dX3
              J[3, 3, i] += aux * crss3 * dX3

              # Adds the Kronecker delta term
              aux = - const4 * gsgm / r^3
              # j=1
              J[2, 1, i] -= aux * Pj.Gamma3
              J[3, 1, i] += aux * Pj.Gamma2
              # j=2
              J[1, 2, i] += aux * Pj.Gamma3
              J[3, 2, i] -= aux * Pj.Gamma1
              # j=3
              J[1, 3, i] -= aux * Pj.Gamma2
              J[2, 3, i] += aux * Pj.Gamma1
          end
      end
    end
  end
end


P2P_preout (generic function with 3 methods)

In [52]:
particles = generate_particles(ParticleNoArr{Float64}, n, lambda)

isbits(particles)

false

In [53]:
isbits(particles[1])

true

In [124]:
U = zeros(Float64, 3, length(particles))
J = zeros(Float64, 3, 3, length(particles))
@benchmark P2P_preout(particles, g_dgdr_wnklmns, U, J)

BenchmarkTools.Trial: 
  memory estimate:  0 bytes
  allocs estimate:  0
  --------------
  minimum time:     52.186 μs (0.00% GC)
  median time:      56.988 μs (0.00% GC)
  mean time:        63.448 μs (0.00% GC)
  maximum time:     186.268 μs (0.00% GC)
  --------------
  samples:          10000
  evals/sample:     1