In [1]:
# REAL NOTEBOOK

# This notebook runs a simulation for the Solar Dynamo model with the sABC algorithm.
# The results of the simulation are all stored in files in the directory Simulations/Real i.
# The reason behind this is to have an easier access to the results of already run simulations,
# without running them again. This notebook only processes real data!!

# NB: In this notebook, there's no function to visualize the results: the visualization is all
# contained in the "visualization_real.ipynb" notebook.

# RULES:

# There are two ways to use this notebook:
# 1) change all the parameters and then run all -> correct way
# 2) change things randomly and not in order and then run -> wrong way
# Please be careful, some functions change the directory in which everything is being saved; the
# order of the calling of the functions is organized to start from a generic dir, create the dir
# Simulations/Simulation i, go to that directory and then, when everythingis finished, go back to
# the initial dir!!! If you don't do that, it will stay in the subdir and at the next run it will
# create a subdir of a subdir -> if you need to stop midway through because u forgot something, 
# remember to come back to the initial directory (and eliminate the directory that has not correct
# files inside).

# GG EZ - kallo27

In [2]:
# NEEDED PACKAGES -> no visualization!!

using StochasticDelayDiffEq
using SpecialFunctions
using Distributions
using SimulatedAnnealingABC
using Distances
using DataFrames
using FFTW
using CSV
using XLSX
using ThreadPinning

In [3]:
# FUNCTIONS NEDEED FOR THE MODEL

# Box-shaped function for the magnetic field range 
function f(B, B_max = 10, B_min = 1)
  return 1 / 4 * (1 .+ erf.(B .^ 2 .- B_min ^ 2)) .* (1 .- erf.(B .^ 2 .- B_max ^ 2))
end

# Model function for the DDE
function MagneticField(du, u, h, p, t)
  N, T, tau, sigma, Bmax = p

  B, dB = u

  du[1] = dB
  du[2] = - ((2 / tau) * dB + (B / tau^2) + (N / tau^2) * h(p, t - T)[1] * f(h(p, t - T)[1], Bmax))
end

# Noise function for the DDE
function noise!(du, u, h, p, t)
  N, T, tau, sigma, Bmax = p
  du[1] = 0
  du[2] = (sigma * Bmax)/(tau^(3/2))
end

# Distance function in the sABC algorithm
function f_dist(θ::Vector{Float64}; type::Int64 = 1, indeces::Union{Vector{Int64}, StepRange{Int64, Int64}} = 1:6:120, fourier_data::Vector{Float64})
  prob = SDDEProblem(MagneticField, noise!, B0, h, tspan, θ)
  sol = solve(prob, EM(), dt = dt)
  
  simulated_data = sol[1,:]
  simulated_data = simulated_data .^ 2
  fourier_transform = abs.(fft(simulated_data))
  fourier_stats = fourier_transform[indeces]

  rho = [euclidean(fourier_stats[i], fourier_data[i]) for i in 1:length(fourier_stats)]
  return rho
end

# function for the summary statistics
function reduced_fourier_spectrum(u::Vector{Float64}, indeces::Union{Vector{Int64}, StepRange{Int64, Int64}} = 1:6:120)
  fourier_transform = abs.(fft(u))
  return fourier_transform[indeces]
end

reduced_fourier_spectrum (generic function with 2 methods)

In [4]:
# FUNCTIONS NEEDED FOR SAVING THE RESULTS OF A SIMULATION

# function to create a new directory for each simulations, in order to store the needed files
function create_directory()
  base_path = pwd()
  base_path = joinpath(base_path, "Simulations")
  i = 1
  dir_name = "Real $i"
  dir_path = joinpath(base_path, dir_name)
  
  while isdir(dir_path)
    i += 1
    dir_name = "Real $i"
    dir_path = joinpath(base_path, dir_name)
  end
  
  mkpath(dir_path)
  println("Directory created at: $dir_path")
  cd(dir_path)
end

# function to save the prior as a string
function get_prior_string(prior)
  parts = []
  for d in prior.dists
    if isa(d, Uniform)
      push!(parts, "Uniform($(minimum(d)), $(maximum(d)))")
    else
      error("Unsupported distribution type: $(typeof(d))")
    end
  end
  
  return "product_distribution(" * join(parts, ", ") * ")"
end

# function to save the sabc parameters
function save_sabc_params(prior, n_particles::Int, n_simulation::Int, v::Float64, type::Int, indeces::Union{Vector{Int}, StepRange{Int64, Int64}})
  curr_path = pwd()
  filename = "sabc_params.csv"
  path = joinpath(curr_path, filename)
    
  sabc_params = DataFrame(
    Parameter = ["prior", "n_particles", "n_simulation", "v", "type", "indeces"],
    Value = [get_prior_string(prior), n_particles, n_simulation, v, type, string(indeces)]
  )
    
 CSV.write(filename, sabc_params) 
 println("Parameters saved to: $path")
end

# Function to save the result object of a sABC algorithm
function save_result(result::SimulatedAnnealingABC.SABCresult{Vector{Float64}, Float64})
  curr_path = pwd()
  filenames = ["eps_hist.csv", "u_hist.csv", "rho_hist.csv"]
  variables = [result.state.ϵ_history, result.state.u_history, result.state.ρ_history]

  for (filename, variable) in zip(filenames, variables)
    labels = string.(1:size(variable, 1))
    path = joinpath(curr_path, filename)
    CSV.write(path, DataFrame(variable, labels))
    println("$filename data saved to: $path")
  end

  filename = "pop.csv"
  path = joinpath(curr_path, filename)

  param_samples = hcat(result.population...)

  posterior_params = DataFrame(
    N_value = param_samples[1, :],
    T_value = param_samples[2, :],
    tau_value = param_samples[3, :],
    sigma_value = param_samples[4, :],
    Bmax_value = param_samples[5, :]
  )

  CSV.write(path, posterior_params)
  println("Posterior parameters saved to: $path")

  filename = "rho.csv"
  path = joinpath(curr_path, filename)

  rho = result.ρ

  rho_values = DataFrame(rho, [:ss1, :ss2, :ss3, :ss4, :ss5, :ss6])

  CSV.write(path, rho_values)
  println("Rho values saved to: $path")
end

save_result (generic function with 1 method)

In [5]:
# THREADS SETTINGS AND INFO

ThreadPinning.pinthreads(:cores)
ThreadPinning.threadinfo()


System: 8 cores (no SMT), 8 sockets, 1 NUMA domains

[0m[1m| [22m[33m[1m0[22m[39m[0m[1m | [22m[33m[1m1[22m[39m[0m[1m | [22m[33m[1m2[22m[39m[0m[1m | [22m[33m[1m3[22m[39m[0m[1m | [22m[33m[1m4[22m[39m[0m[1m | [22m[33m[1m5[22m[39m[0m[1m | [22m[33m[1m6[22m[39m[0m[1m | [22m[33m[1m7[22m[39m[0m[1m | [22m

[33m[1m#[22m[39m = Julia thread, [0m[1m|[22m = Socket seperator

Julia threads: [32m8[39m
├ Occupied CPU-threads: [32m8[39m
└ Mapping (Thread => CPUID): 1 => 0, 2 => 1, 3 => 2, 4 => 3, 5 => 4, ...


In [6]:
# EXTRACTING OPEN MAGNETIC FLUX AND SUNSPOT NUMBER RECORDS FROM XLSX FILE

# Define DataFrame object
data = DataFrame(
  year = Int[],
  open_magn_flux = Float64[],
  open_magn_flux_err = Float64[],
  ssa_open_magn_flux = Float64[],
  sunspots_num = Float64[],
  sunspots_err = Float64[],
  ssa_sunspots = Float64[]
)

# Open file and for each row write data into the DataFrame
XLSX.openxlsx("SN Usoskin Brehm.xlsx") do file
  sheet = file["Data"] 

  for row in XLSX.eachrow(sheet)
    if isa(row[2], Number)
      push!(data, (
        year = row[2],
        open_magn_flux = row[3],
        open_magn_flux_err = row[4],
        ssa_open_magn_flux = row[5],
        sunspots_num = row[7],
        sunspots_err = row[8],
        ssa_sunspots = row[9]
      ))
    end
  end
end

In [7]:
# DIRECTORY MANAGING

# Current directory
initial_directory = pwd()

# New directory
create_directory()

# NB: After "create_directory", we move to the new directory.
# DON'T RUN THIS AGAIN, wait for the simulation to finish!!!! If you made errors,
# eliminate the Simulations/Simulation i directory and then rerun everything

Directory created at: /home/ubuntu/LCP_B/Project/Simulations/Real 6


In [8]:
# SIMULATION PARAMETERS MANAGING

# Parameters that can be tuned for new simulations
prior = product_distribution(Uniform(1, 15), Uniform(0.1, 10.0), Uniform(0.1, 6.0), Uniform(0.01, 0.3), Uniform(1, 15))
n_particles = 1000
n_simulation = 10000000
v = 1.0
type = 1
indeces = 1:6:120

# Writing on file "sabc_params.csv" of the values set in this cell.
save_sabc_params(prior, n_particles, n_simulation, v, type, indeces)

Parameters saved to: /home/ubuntu/LCP_B/Project/Simulations/Real 6/sabc_params.csv


In [9]:
# SIMULATION

# Initial conditions
B0 = [3.0, 0.0]
h0 = [0.0, 0.0]
noise0 = [1.0]
h(p, t) = h0
tmin = data.year[1]; tmax = data.year[length(data.year)]
tspan = (tmin, tmax)
dt = 0.01

# Creation of the summary statistics from the simulated data
u = data.open_magn_flux
sim_ss = reduced_fourier_spectrum(u, indeces)

# Actual usage of the sABC algorithm
result = sabc(f_dist, prior;
              n_particles = n_particles, 
              n_simulation = n_simulation,
              v = v,
              type = type,
              indeces = indeces,
              fourier_data = sim_ss)

# Display of the summary of the results
display(result)

# Saving the results to the files: "eps_hist.csv", "u_hist.csv", "rho_hist.csv", "pop.csv", "rho.csv".
save_result(result)

┌ Info: Preparing to run SABC algorithm: 'single-epsilon'
└ @ SimulatedAnnealingABC /home/ubuntu/.julia/packages/SimulatedAnnealingABC/e8QsC/src/SimulatedAnnealingABC.jl:188
┌ Info: Using threads: 8 
└ @ SimulatedAnnealingABC /home/ubuntu/.julia/packages/SimulatedAnnealingABC/e8QsC/src/SimulatedAnnealingABC.jl:199
┌ Info: Set BLAS threads = 1 
└ @ SimulatedAnnealingABC /home/ubuntu/.julia/packages/SimulatedAnnealingABC/e8QsC/src/SimulatedAnnealingABC.jl:202
┌ Info: Set 'pinthreads(:cores)' for optimal multi-threading performance
└ @ SimulatedAnnealingABC /home/ubuntu/.julia/packages/SimulatedAnnealingABC/e8QsC/src/SimulatedAnnealingABC.jl:204
┌ Info: Initializing population...
└ @ SimulatedAnnealingABC /home/ubuntu/.julia/packages/SimulatedAnnealingABC/e8QsC/src/SimulatedAnnealingABC.jl:223
┌ Info: Initial resampling (δ = 0.1) - ESS = 996.6799495718245 
└ @ SimulatedAnnealingABC /home/ubuntu/.julia/packages/SimulatedAnnealingABC/e8QsC/src/SimulatedAnnealingABC.jl:277
┌ Info: Population

Approximate posterior sample with 1000 particles:
  - simulations used: 10000000
  - average transformed distance: 5.89e-5
  - ϵ: [2.289e-6]
  - population resampling: 50
  - acceptance rate: 0.01012
The sample can be accessed with the field `population`.
The history of ϵ can be accessed with the field `state.ϵ_history`.
 -------------------------------------- 


eps_hist.csv data saved to: /home/ubuntu/LCP_B/Project/Simulations/Real 6/eps_hist.csv
u_hist.csv data saved to: /home/ubuntu/LCP_B/Project/Simulations/Real 6/u_hist.csv
rho_hist.csv data saved to: /home/ubuntu/LCP_B/Project/Simulations/Real 6/rho_hist.csv
Posterior parameters saved to: /home/ubuntu/LCP_B/Project/Simulations/Real 6/pop.csv


DimensionMismatch: DimensionMismatch: Number of columns (20) and number of column names (6) are not equal

In [10]:
# DIRECTORY MANAGING

# WE go back to the initial directory
cd(initial_directory)
pwd()

"/home/ubuntu/LCP_B/Project"

In [11]:
rho = result.ρ

1000×20 Matrix{Float64}:
 0.000138224  8.56362e-5   4.98896e-5   …  0.000130728  0.000174517
 0.000196764  7.12636e-5   0.000112976     0.000191121  9.4507e-5
 0.000102513  5.2543e-5    0.00010439      3.82803e-5   0.000127028
 6.20311e-5   1.89932e-5   7.55418e-5      4.4552e-5    1.99674e-5
 0.000167288  0.000107985  2.27484e-5      0.000166035  0.000100146
 0.000110559  8.71775e-5   8.97986e-5   …  0.000135632  0.000108164
 8.61475e-5   6.44129e-5   3.61357e-5      3.53727e-5   6.47587e-5
 0.000150418  8.65447e-5   7.06772e-5      6.99252e-5   0.000144455
 0.000115058  3.60692e-5   9.71581e-5      0.000210565  0.000109562
 0.000195152  0.000126595  0.000117529     0.000153191  8.91414e-5
 ⋮                                      ⋱               
 0.000180296  8.28932e-5   0.00010136      0.00016121   9.45225e-5
 0.000135712  6.07672e-5   0.000121013     0.000152512  0.000142819
 0.000182936  0.000112721  8.69661e-5      0.000226996  0.000173321
 0.00021648   8.7791e-5    3.693e-5     

In [19]:
df = DataFrame(rho, :auto)

df_squared = DataFrame()

# Iterate over each column and compute the square of each element
for col in names(df)
    df_squared[!, col] = df[!, col] .^ 2
end

# Now `df_squared` contains the square of each entry in the original DataFrame
println(df_squared)

[1m1000×20 DataFrame[0m
[1m  Row [0m│[1m x1          [0m[1m x2          [0m[1m x3          [0m[1m x4          [0m[1m x5          [0m[1m x6          [0m[1m x7          [0m[1m x8          [0m[1m x9          [0m[1m x10         [0m[1m x11         [0m[1m x12         [0m[1m x13         [0m[1m x14         [0m[1m x15         [0m[1m x16         [0m[1m x17         [0m[1m x18         [0m[1m x19         [0m[1m x20         [0m
      │[90m Float64     [0m[90m Float64     [0m[90m Float64     [0m[90m Float64     [0m[90m Float64     [0m[90m Float64     [0m[90m Float64     [0m[90m Float64     [0m[90m Float64     [0m[90m Float64     [0m[90m Float64     [0m[90m Float64     [0m[90m Float64     [0m[90m Float64     [0m[90m Float64     [0m[90m Float64     [0m[90m Float64     [0m[90m Float64     [0m[90m Float64     [0m[90m Float64     [0m
──────┼───────────────────────────────────────────────────────────────────────────────────

In [20]:
row_sums = Vector{Float64}(undef, size(df, 1))

# Compute the sum of each row and store it in `row_sums`
for (i, row) in enumerate(eachrow(df_squared))
    row_sums[i] = sum(row)
end

# Now `row_sums` contains the sum of each row
k = 5  # Number of minimum values you want to find
new_indices = partialsortperm(row_sums, 1:k)  # Indices of the 5 smallest values
min_values = row_sums[new_indices]  # The 5 smallest values

println("Minimum values: ", min_values)
println("Indices of minimum values: ", new_indices)

Minimum values: [6.683326693693968e-8, 8.043167350854602e-8, 8.072301495217838e-8, 8.502498891253053e-8, 8.817627779865123e-8]
Indices of minimum values: [371, 928, 780, 507, 673]


In [21]:
param_samples = hcat(result.population...)

posterior_params = DataFrame(
  N_value = param_samples[1, :],
  T_value = param_samples[2, :],
  tau_value = param_samples[3, :],
  sigma_value = param_samples[4, :],
  Bmax_value = param_samples[5, :]
)

Row,N_value,T_value,tau_value,sigma_value,Bmax_value
Unnamed: 0_level_1,Float64,Float64,Float64,Float64,Float64
1,4.01968,5.93109,0.104944,0.29768,1.18204
2,3.50153,3.42958,0.107154,0.258477,1.32437
3,5.29527,9.90864,0.108195,0.288816,1.13968
4,1.78839,3.24972,0.1006,0.250305,1.33336
5,7.05227,9.56057,0.107487,0.161051,1.18454
6,2.30321,9.17842,0.114093,0.242574,1.21567
7,1.3575,2.01926,0.101348,0.193562,1.90527
8,1.01106,1.14623,0.105593,0.143939,1.87901
9,1.22879,7.37943,0.108514,0.164438,1.66578
10,1.06777,2.31291,0.102289,0.0853423,2.47874


In [22]:
new_indices

5-element view(::Vector{Int64}, 1:5) with eltype Int64:
 371
 928
 780
 507
 673

In [23]:
df[new_indices, :]

Row,x1,x2,x3,x4,x5,x6,x7,x8,x9,x10,x11,x12,x13,x14,x15,x16,x17,x18,x19,x20
Unnamed: 0_level_1,Float64,Float64,Float64,Float64,Float64,Float64,Float64,Float64,Float64,Float64,Float64,Float64,Float64,Float64,Float64,Float64,Float64,Float64,Float64,Float64
1,0.000102385,2.32221e-05,5.45696e-05,5.30192e-05,8.31511e-05,8.06746e-05,7.22517e-05,2.08227e-05,6.62634e-05,1.66145e-05,2.64803e-05,2.40722e-05,3.58357e-05,8.0264e-05,5.28987e-05,2.66747e-05,8.63456e-05,5.8653e-05,5.16919e-05,1.39449e-05
2,5.97097e-05,0.000115394,0.000118656,5.64981e-05,5.96476e-05,2.55881e-05,4.72818e-05,8.16432e-06,0.000117039,4.05073e-05,2.13883e-05,6.15379e-05,4.79333e-06,3.7801e-05,7.83457e-05,2.06212e-05,3.39299e-05,3.95587e-05,9.66714e-05,1.02163e-05
3,2.68588e-05,4.67992e-06,5.39767e-05,5.47699e-05,9.97096e-06,3.0715e-05,6.50712e-05,2.66589e-05,1.02743e-05,2.61991e-05,0.000110666,4.71684e-05,8.65812e-05,2.06074e-05,0.000173434,0.000106376,4.12737e-06,4.65804e-05,1.81938e-05,3.13209e-05
4,7.29938e-05,8.01997e-06,4.8266e-05,7.96526e-07,0.000106439,6.75365e-05,3.91772e-05,4.85724e-05,4.09429e-06,1.19741e-05,2.3636e-05,3.00905e-05,3.23473e-05,7.14483e-06,0.000126827,7.48505e-05,5.68669e-05,0.000115587,8.37324e-05,9.74841e-05
5,1.51e-05,2.05517e-05,0.000135893,3.07774e-05,6.53228e-06,6.29556e-06,4.13902e-05,2.50588e-05,3.19998e-05,2.07379e-05,5.00526e-05,2.41816e-05,6.87986e-05,8.04013e-06,0.00019479,4.36034e-05,6.06517e-05,1.48614e-05,9.60543e-05,5.81021e-05


In [24]:
best_particles = posterior_params[new_indices, :]

Row,N_value,T_value,tau_value,sigma_value,Bmax_value
Unnamed: 0_level_1,Float64,Float64,Float64,Float64,Float64
1,1.98195,5.57914,0.105587,0.256969,1.527
2,5.05373,5.06664,0.111867,0.287711,1.10294
3,1.2185,2.1392,0.114869,0.147175,2.50937
4,2.67847,8.12188,0.107341,0.275345,1.57156
5,3.51912,6.78436,0.102584,0.223802,1.66795
