# Using MSM.jl on a cluster

If you are a new user of the package MSM.jl, see [this notebook](https://github.com/JulienPascal/MSM.jl/tree/main/notebooks) first. Below is an example on how to use the package to estimate an economic model on a cluster using [ClusterManagers](https://github.com/JuliaParallel/ClusterManagers.jl) and [Slurm](https://slurm.schedmd.com/documentation.html). 

## File 1: find_MSM.jl

In [1]:
using ClusterManagers
using Distributed
OnCluster = false #set to false to run locally
addWorkers = true #set to false to run serially
println("OnCluster = $(OnCluster)")

# Current number of workers
#--------------------------
currentWorkers = nworkers()
println("Initial number of workers = $(currentWorkers)")

# Increase the number of workers available
#-----------------------------------------
maxNumberWorkers = 3
if addWorkers == true
	if OnCluster == true
	  addprocs(SlurmManager(maxNumberWorkers))
	else
	  addprocs(maxNumberWorkers)
	end
end


# Sanity checks
#-------------
hosts = []
pids = []
for i in workers()
	host, pid = fetch(@spawnat i (gethostname(), getpid()))
	println("Hello I am worker $(i), my host is $(host)")
	push!(hosts, host)
	push!(pids, pid)
end

currentWorkers = nworkers()
println("Number of workers = $(currentWorkers)")


@everywhere using MSM
@everywhere using DataStructures
@everywhere using OrderedCollections
@everywhere using Distributions
@everywhere using Random
@everywhere using DataStructures
@everywhere using Statistics
@everywhere using LinearAlgebra

Random.seed!(1234)  #for replicability reasons
T = 100000          #number of periods
P = 2               #number of dependent variables
beta0 = rand(P)     #choose true coefficients by drawing from a uniform distribution on [0,1]
alpha0 = rand(1)[]  #intercept
theta0 = 0.0        #coefficient to create serial correlation in the error terms
println("True intercept = $(alpha0)")
println("True coefficient beta0 = $(beta0)")
println("Serial correlation coefficient theta0 = $(theta0)")

# Generation of error terms
# row = individual dimension
# column = time dimension 
U = zeros(T)
d = Normal()
U[1] = rand(d, 1)[] #first error term
# loop over time periods
for t = 2:T
    U[t] = rand(d, 1)[] + theta0*U[t-1]
end

# Let's simulate the dependent variables x_t
x = zeros(T, P)

d = Uniform(0, 5)
for p = 1:P  
    x[:,p] = rand(d, T)
end

# Let's calculate the resulting y_t
y = zeros(T)

for t=1:T
    y[t] = alpha0 + x[t,1]*beta0[1] + x[t,2]*beta0[2] + U[t]
end


@everywhere optionsSMM = MSMOptions(maxFuncEvals=1000, globalOptimizer = :dxnes, localOptimizer = :NelderMead)
@everywhere myProblem = MSMProblem(options = optionsSMM);

# Priors
@everywhere dictPriors = OrderedDict{String,Array{Float64,1}}()
dictPriors["alpha"] = [0.5, 0.001, 1.0]
dictPriors["beta1"] = [0.5, 0.001, 1.0]
dictPriors["beta2"] = [0.5, 0.001, 1.0]
set_priors!(myProblem, dictPriors)

# Empirical moments
@everywhere dictEmpiricalMoments = OrderedDict{String,Array{Float64,1}}()
dictEmpiricalMoments["mean"] = [mean(y); mean(y)] #informative on the intercept
dictEmpiricalMoments["mean_x1y"] = [mean(x[:,1] .* y); mean(x[:,1] .* y)] #informative on betas
dictEmpiricalMoments["mean_x2y"] = [mean(x[:,2] .* y); mean(x[:,2] .* y)] #informative on betas
dictEmpiricalMoments["mean_x1y^2"] = [mean((x[:,1] .* y).^2); mean((x[:,1] .* y).^2)] #informative on betas
dictEmpiricalMoments["mean_x2y^2"] = [mean((x[:,2] .* y).^2); mean((x[:,2] .* y).^2)] #informative on betas
set_empirical_moments!(myProblem, dictEmpiricalMoments)


# x[1] corresponds to the intercept, x[2] corresponds to beta1, x[3] corresponds to beta2
@everywhere function functionLinearModel(x; uniform_draws::Array{Float64,1}, simX::Array{Float64,2}, nbDraws::Int64 = length(uniform_draws), burnInPerc::Int64 = 10)
    T = nbDraws
    P = 2       #number of dependent variables

    alpha = x[1]
    beta = x[2:end]
    theta = 0.0     #coefficient to create serial correlation in the error terms

    # Creation of error terms
    # row = individual dimension
    # column = time dimension
    U = zeros(T)
    d = Normal()
    # Inverse cdf (i.e. quantile)
    gaussian_draws = quantile.(d, uniform_draws)
    U[1] = gaussian_draws[1] #first error term

    # loop over time periods
    for t = 2:T
        U[t] = gaussian_draws[t] + theta*U[t-1]
    end

    # Let's calculate the resulting y_t
    y = zeros(T)

    for t=1:T
        y[t] = alpha + simX[t,1]*beta[1] + simX[t,2]*beta[2] + U[t]
    end

    # Get rid of the burn-in phase:
    #------------------------------
    startT = div(nbDraws, burnInPerc)

    # Moments:
    #---------
    output = OrderedDict{String,Float64}()
    output["mean"] = mean(y[startT:nbDraws])
    output["mean_x1y"] = mean(simX[startT:nbDraws,1] .* y[startT:nbDraws])
    output["mean_x2y"] = mean(simX[startT:nbDraws,2] .* y[startT:nbDraws])
    output["mean_x1y^2"] = mean((simX[startT:nbDraws,1] .* y[startT:nbDraws]).^2)
    output["mean_x2y^2"] = mean((simX[startT:nbDraws,2] .* y[startT:nbDraws]).^2)

    return output
end

# Let's freeze the randomness during the minimization
@everywhere d_Uni = Uniform(0,1)
@everywhere nbDraws = 100000 #number of draws in the simulated data
@everywhere uniform_draws = rand(d_Uni, nbDraws)
@everywhere simX = zeros(length(uniform_draws), 2)
d = Uniform(0, 5)
for p = 1:2
  simX[:,p] = rand(d, length(uniform_draws))
end

set_simulate_empirical_moments!(myProblem, x -> functionLinearModel(x, uniform_draws = uniform_draws, simX = simX))
construct_objective_function!(myProblem)

msm_optimize!(myProblem, verbose = false)

minimizer = best_candidate(myProblem.bbResults)

println("Estimated value for alpha = $(minimizer[1])")
println("True value for alpha = $(alpha0[1]) \n")

println("Estimated value for beta1 = $(minimizer[2])")
println("True value for beta1 = $(beta0[1]) \n")

println("Estimated value for beta2 = $(minimizer[3])")
println("True value for beta2 = $(beta0[2]) \n")

OnCluster = false
Initial number of workers = 1
Hello I am worker 2, my host is julien-myPC
Hello I am worker 3, my host is julien-myPC
Hello I am worker 4, my host is julien-myPC
Number of workers = 3
True intercept = 0.5662374165061859
True coefficient beta0 = [0.5908446386657102, 0.7667970365022592]
Serial correlation coefficient theta0 = 0.0
      From worker 2:	hello
      From worker 3:	hello
      From worker 4:	hello


┌ Info: 3 worker(s) detected
└ @ MSM /home/julien/.julia/dev/MSM/src/generic.jl:302
┌ Info: Starting optimization in parallel
└ @ MSM /home/julien/.julia/dev/MSM/src/generic.jl:302


Starting optimization with optimizer BlackBoxOptim.DXNESOpt{Float64,BlackBoxOptim.RandomBound{BlackBoxOptim.ContinuousRectSearchSpace}}
0.00 secs, 0 evals, 0 steps
σ=1.0 η[x]=1.0 η[σ]=0.0 η[B]=0.0 |tr(ln_B)|=0.0 |path|=NaN speed=NaN
1.31 secs, 8 evals, 1 steps, fitness=0.028691361
σ=1.1276264450966944 η[x]=1.0 η[σ]=1.5714285714285714 η[B]=0.06349206349206349 |tr(ln_B)|=0.0 |path|=NaN speed=NaN
1.82 secs, 232 evals, 29 steps, fitness=0.000550881
σ=24.321478777559744 η[x]=1.0 η[σ]=1.5714285714285714 η[B]=0.06349206349206349 |tr(ln_B)|=0.0 |path|=NaN speed=NaN
2.33 secs, 480 evals, 60 steps, fitness=0.000049780
σ=133.4414677995546 η[x]=1.0 η[σ]=1.5714285714285714 η[B]=0.06349206349206349 |tr(ln_B)|=5.551115123125783e-17 |path|=NaN speed=NaN
2.83 secs, 728 evals, 91 steps, fitness=0.000049780
σ=75.63796192926111 η[x]=1.0 η[σ]=1.5714285714285714 η[B]=0.06349206349206349 |tr(ln_B)|=0.0 |path|=NaN speed=NaN
3.34 secs, 984 evals, 123 steps, fitness=0.000049780
σ=11.221026133531524 η[x]=1.0 η[σ

## File 2: submit_job.sh

```bash
#!/bin/bash -
#SBATCH -J MSM
#SBATCH -N 1
#SBATCH --ntasks-per-node=3
#SBATCH --qos=qos-batch
#SBATCH -o slurmMSM.out
#SBATCH -e slurmMSM.err

echo "== Starting run at $(date)"
echo "== Job ID: ${SLURM_JOBID}"
echo "== Node list: ${SLURM_NODELIST}"
echo "== Submit dir. : ${SLURM_SUBMIT_DIR}"
# Estimate on cluster
julia $PWD/find_MSM.jl
```

To start the estimation, run from the terminal:

```bash
sbatch submit_job.sh
```

## Appendix

In [2]:
versioninfo()

Julia Version 1.5.1
Commit 697e782ab8 (2020-08-25 20:08 UTC)
Platform Info:
  OS: Linux (x86_64-pc-linux-gnu)
  CPU: Intel(R) Core(TM) i7-8850H CPU @ 2.60GHz
  WORD_SIZE: 64
  LIBM: libopenlibm
  LLVM: libLLVM-9.0.1 (ORCJIT, skylake)
