In [1]:
# REAL NOTEBOOK

# This notebook runs a simulation for the Solar Dynamo model with the sABC algorithm.
# The results of the simulation are all stored in files in the directory Simulations/Real i.
# The reason behind this is to have an easier access to the results of already run simulations,
# without running them again. This notebook only processes real data!!

# NB: In this notebook, there's no function to visualize the results: the visualization is all
# contained in the "visualization_real.ipynb" notebook.

# RULES:

# There are two ways to use this notebook:
# 1) change all the parameters and then run all -> correct way
# 2) change things randomly and not in order and then run -> wrong way
# Please be careful, some functions change the directory in which everything is being saved; the
# order of the calling of the functions is organized to start from a generic dir, create the dir
# Simulations/Simulation i, go to that directory and then, when everythingis finished, go back to
# the initial dir!!! If you don't do that, it will stay in the subdir and at the next run it will
# create a subdir of a subdir -> if you need to stop midway through because u forgot something, 
# remember to come back to the initial directory (and eliminate the directory that has not correct
# files inside).

# GG EZ - kallo27

In [2]:
# NEEDED PACKAGES -> no visualization!!

using StochasticDelayDiffEq
using SpecialFunctions
using Distributions
using SimulatedAnnealingABC
using Distances
using DataFrames
using FFTW
using CSV
using XLSX
using ThreadPinning

In [3]:
# FUNCTIONS NEDEED FOR THE MODEL

# Box-shaped function for the magnetic field range 
function f(B, B_max = 10, B_min = 1)
  return 1 / 4 * (1 .+ erf.(B .^ 2 .- B_min ^ 2)) .* (1 .- erf.(B .^ 2 .- B_max ^ 2))
end

# Model function for the DDE
function MagneticField(du, u, h, p, t)
  N, T, tau, sigma, Bmax = p
  q = T / tau

  B, dB = u

  du[1] = dB
  du[2] = - ((2 / tau) * dB + (B / tau^2) + N * h(p, t - q)[1] * f(h(p, t - q)[1], Bmax))
end

# Noise function for the DDE
function noise!(du, u, h, p, t)
  N, T, tau, sigma, Bmax = p
  du[1] = (sigma * Bmax)
end

# Distance function in the sABC algorithm
function f_dist(θ::Vector{Float64}; type::Int64 = 1, indeces::Union{Vector{Int64}, StepRange{Int64, Int64}} = 1:6:120, fourier_data::Vector{Float64})
  prob = SDDEProblem(MagneticField, noise!, B0, h, tspan, θ)
  sol = solve(prob, EM(), dt = dt)
  
  simulated_data = sol[1,:]
  fourier_transform = abs.(fft(simulated_data))
  fourier_stats = fourier_transform[indeces]

  rho = [euclidean(fourier_stats[i], fourier_data[i]) for i in 1:length(fourier_stats)]
  return rho
end

# function for the summary statistics
function reduced_fourier_spectrum(u::Vector{Float64}, indeces::Union{Vector{Int64}, StepRange{Int64, Int64}} = 1:6:120)
  fourier_transform = abs.(fft(u))
  return fourier_transform[indeces]
end

reduced_fourier_spectrum (generic function with 2 methods)

In [4]:
# FUNCTIONS NEEDED FOR SAVING THE RESULTS OF A SIMULATION

# function to create a new directory for each simulations, in order to store the needed files
function create_directory()
  base_path = pwd()
  base_path = joinpath(base_path, "Simulations")
  i = 1
  dir_name = "Real $i"
  dir_path = joinpath(base_path, dir_name)
  
  while isdir(dir_path)
    i += 1
    dir_name = "Real $i"
    dir_path = joinpath(base_path, dir_name)
  end
  
  mkpath(dir_path)
  println("Directory created at: $dir_path")
  cd(dir_path)
end

# function to save the prior as a string
function get_prior_string(prior)
  parts = []
  for d in prior.dists
    if isa(d, Uniform)
      push!(parts, "Uniform($(minimum(d)), $(maximum(d)))")
    else
      error("Unsupported distribution type: $(typeof(d))")
    end
  end
  
  return "product_distribution(" * join(parts, ", ") * ")"
end

# function to save the sabc parameters
function save_sabc_params(prior, n_particles::Int, n_simulation::Int, v::Float64, type::Int, indeces::Union{Vector{Int}, StepRange{Int64, Int64}})
  curr_path = pwd()
  filename = "sabc_params.csv"
  path = joinpath(curr_path, filename)
    
  sabc_params = DataFrame(
    Parameter = ["prior", "n_particles", "n_simulation", "v", "type", "indeces"],
    Value = [get_prior_string(prior), n_particles, n_simulation, v, type, string(indeces)]
  )
    
 CSV.write(filename, sabc_params) 
 println("Parameters saved to: $path")
end

# Function to save the result object of a sABC algorithm
function save_result(result::SimulatedAnnealingABC.SABCresult{Vector{Float64}, Float64})
  curr_path = pwd()
  filenames = ["eps_hist.csv", "u_hist.csv", "rho_hist.csv"]
  variables = [result.state.ϵ_history, result.state.u_history, result.state.ρ_history]

  for (filename, variable) in zip(filenames, variables)
    labels = string.(1:size(variable, 1))
    path = joinpath(curr_path, filename)
    CSV.write(path, DataFrame(variable, labels))
    println("$filename data saved to: $path")
  end

  filename = "pop.csv"
  path = joinpath(curr_path, filename)

  param_samples = hcat(result.population...)

  posterior_params = DataFrame(
    N_value = param_samples[1, :],
    T_value = param_samples[2, :],
    tau_value = param_samples[3, :],
    sigma_value = param_samples[4, :],
    Bmax_value = param_samples[5, :]
  )

  CSV.write(path, posterior_params)
  println("Posterior parameters saved to: $path")

  filename = "rho.csv"
  path = joinpath(curr_path, filename)

  rho = result.ρ

  rho_values = DataFrame(rho, [:ss1, :ss2, :ss3, :ss4, :ss5, :ss6])

  CSV.write(path, rho_values)
  println("Rho values saved to: $path")
end

save_result (generic function with 1 method)

In [5]:
# THREADS SETTINGS AND INFO

ThreadPinning.pinthreads(:cores)
ThreadPinning.threadinfo()


System: 8 cores (no SMT), 8 sockets, 1 NUMA domains

[0m[1m| [22m[33m[1m0[22m[39m[0m[1m | [22m[33m[1m1[22m[39m[0m[1m | [22m[33m[1m2[22m[39m[0m[1m | [22m[33m[1m3[22m[39m[0m[1m | [22m[33m[1m4[22m[39m[0m[1m | [22m[33m[1m5[22m[39m[0m[1m | [22m[33m[1m6[22m[39m[0m[1m | [22m[33m[1m7[22m[39m[0m[1m | [22m

[33m[1m#[22m[39m = Julia thread, [0m[1m|[22m = Socket seperator

Julia threads: [32m8[39m
├ Occupied CPU-threads: [32m8[39m
└ Mapping (Thread => CPUID): 1 => 0, 2 => 1, 3 => 2, 4 => 3, 5 => 4, ...


In [6]:
# EXTRACTING OPEN MAGNETIC FLUX AND SUNSPOT NUMBER RECORDS FROM XLSX FILE

# Define DataFrame object
data = DataFrame(
  year = Int[],
  open_magn_flux = Float64[],
  open_magn_flux_err = Float64[],
  ssa_open_magn_flux = Float64[],
  sunspots_num = Float64[],
  sunspots_err = Float64[],
  ssa_sunspots = Float64[]
)

# Open file and for each row write data into the DataFrame
XLSX.openxlsx("SN Usoskin Brehm.xlsx") do file
  sheet = file["Data"] 

  for row in XLSX.eachrow(sheet)
    if isa(row[2], Number)
      push!(data, (
        year = row[2],
        open_magn_flux = row[3],
        open_magn_flux_err = row[4],
        ssa_open_magn_flux = row[5],
        sunspots_num = row[7],
        sunspots_err = row[8],
        ssa_sunspots = row[9]
      ))
    end
  end
end

In [7]:
# DIRECTORY MANAGING

# Current directory
initial_directory = pwd()

# New directory
create_directory()

# NB: After "create_directory", we move to the new directory.
# DON'T RUN THIS AGAIN, wait for the simulation to finish!!!! If you made errors,
# eliminate the Simulations/Simulation i directory and then rerun everything

Directory created at: /home/ubuntu/LCP_B/Project/Simulations/Real 4


In [8]:
# SIMULATION PARAMETERS MANAGING

# Parameters that can be tuned for new simulations
prior = product_distribution(Uniform(1, 15), Uniform(0.1, 10.0), Uniform(0.1, 6.0), Uniform(0.01, 0.3), Uniform(1, 15))
n_particles = 1000
n_simulation = 10000000
v = 1.0
type = 1
indeces = Vector{Int64}([1, 2, 37, 50, 78, 85])

# Writing on file "sabc_params.csv" of the values set in this cell.
save_sabc_params(prior, n_particles, n_simulation, v, type, indeces)

Parameters saved to: /home/ubuntu/LCP_B/Project/Simulations/Real 4/sabc_params.csv


In [9]:
# SIMULATION

# Initial conditions
B0 = [3.0, 0.0]
h0 = [0.0, 0.0]
noise0 = [1.0]
h(p, t) = h0
tmin = data.year[1]; tmax = data.year[length(data.year)]
tspan = (tmin, tmax)
dt = 0.01

# Creation of the summary statistics from the simulated data
u = data.open_magn_flux
sim_ss = reduced_fourier_spectrum(u, indeces)

# Actual usage of the sABC algorithm
result = sabc(f_dist, prior;
              n_particles = n_particles, 
              n_simulation = n_simulation,
              v = v,
              type = type,
              indeces = indeces,
              fourier_data = sim_ss)

# Display of the summary of the results
display(result)

# Saving the results to the files: "eps_hist.csv", "u_hist.csv", "rho_hist.csv", "pop.csv", "rho.csv".
save_result(result)

┌ Info: Preparing to run SABC algorithm: 'single-epsilon'
└ @ SimulatedAnnealingABC /home/ubuntu/.julia/packages/SimulatedAnnealingABC/e8QsC/src/SimulatedAnnealingABC.jl:188
┌ Info: Using threads: 8 
└ @ SimulatedAnnealingABC /home/ubuntu/.julia/packages/SimulatedAnnealingABC/e8QsC/src/SimulatedAnnealingABC.jl:199
┌ Info: Set BLAS threads = 1 
└ @ SimulatedAnnealingABC /home/ubuntu/.julia/packages/SimulatedAnnealingABC/e8QsC/src/SimulatedAnnealingABC.jl:202
┌ Info: Set 'pinthreads(:cores)' for optimal multi-threading performance
└ @ SimulatedAnnealingABC /home/ubuntu/.julia/packages/SimulatedAnnealingABC/e8QsC/src/SimulatedAnnealingABC.jl:204
┌ Info: Initializing population...
└ @ SimulatedAnnealingABC /home/ubuntu/.julia/packages/SimulatedAnnealingABC/e8QsC/src/SimulatedAnnealingABC.jl:223
┌ Info: Initial resampling (δ = 0.1) - ESS = 996.6799495718252 
└ @ SimulatedAnnealingABC /home/ubuntu/.julia/packages/SimulatedAnnealingABC/e8QsC/src/SimulatedAnnealingABC.jl:277
┌ Info: Population

Approximate posterior sample with 1000 particles:
  - simulations used: 10000000
  - average transformed distance: 0.0004364
  - ϵ: [3.297e-5]
  - population resampling: 8
  - acceptance rate: 0.001728
The sample can be accessed with the field `population`.
The history of ϵ can be accessed with the field `state.ϵ_history`.
 -------------------------------------- 


eps_hist.csv data saved to: /home/ubuntu/LCP_B/Project/Simulations/Real 4/eps_hist.csv
u_hist.csv data saved to: /home/ubuntu/LCP_B/Project/Simulations/Real 4/u_hist.csv
rho_hist.csv data saved to: /home/ubuntu/LCP_B/Project/Simulations/Real 4/rho_hist.csv
Posterior parameters saved to: /home/ubuntu/LCP_B/Project/Simulations/Real 4/pop.csv
Rho values saved to: /home/ubuntu/LCP_B/Project/Simulations/Real 4/rho.csv


In [10]:
# DIRECTORY MANAGING

# WE go back to the initial directory
cd(initial_directory)
pwd()

"/home/ubuntu/LCP_B/Project"

In [11]:
rho = result.ρ

1000×6 Matrix{Float64}:
 0.0162792   0.00174622   0.00336853   0.000329594  5.28893e-5   0.00271282
 0.0236717   0.0124744    0.00808388   0.00864552   0.0106054    0.0103204
 0.00523064  0.00632868   0.00349511   0.00467725   0.0143491    0.0115984
 0.0102636   0.00370187   0.00596121   0.0048132    0.00899902   0.0122394
 0.01803     0.00226034   0.00876126   0.00604997   0.0101458    0.0037832
 0.008345    0.00904363   0.0092495    0.0127779    0.00454618   0.012354
 0.00740197  0.00144561   0.00315735   0.00868657   0.00773756   0.00497746
 0.0112838   0.00486077   0.00619416   0.0120433    0.00541906   0.00625919
 0.00597408  0.00812247   0.00387919   0.00399493   0.00124204   0.00158533
 0.0136629   0.00132741   0.00577723   0.00351383   0.00408802   0.00515002
 ⋮                                                               ⋮
 0.00104555  0.00353968   0.012059     0.00959644   0.0148309    0.0172227
 0.0158363   0.000746034  0.00160547   0.00545723   0.0120248    0.00153367
 0.0

In [12]:
df = DataFrame(rho, [:ss1, :ss2, :ss3, :ss4, :ss5, :ss6])

df_squared = DataFrame()

# Iterate over each column and compute the square of each element
for col in names(df)
    df_squared[!, col] = df[!, col] .^ 2
end

# Now `df_squared` contains the square of each entry in the original DataFrame
println(df_squared)

[1m1000×6 DataFrame[0m
[1m  Row [0m│[1m ss1         [0m[1m ss2         [0m[1m ss3         [0m[1m ss4         [0m[1m ss5         [0m[1m ss6         [0m
      │[90m Float64     [0m[90m Float64     [0m[90m Float64     [0m[90m Float64     [0m[90m Float64     [0m[90m Float64     [0m
──────┼──────────────────────────────────────────────────────────────────────────────
    1 │ 0.000265011  3.04927e-6   1.1347e-5    1.08632e-7   2.79728e-9   7.3594e-6
    2 │ 0.000560348  0.000155612  6.53491e-5   7.47451e-5   0.000112474  0.000106511
    3 │ 2.73596e-5   4.00522e-5   1.22158e-5   2.18766e-5   0.000205896  0.000134523
    4 │ 0.000105341  1.37038e-5   3.5536e-5    2.31669e-5   8.09824e-5   0.000149802
    5 │ 0.00032508   5.10915e-6   7.67597e-5   3.66022e-5   0.000102938  1.43126e-5
    6 │ 6.9639e-5    8.17872e-5   8.55533e-5   0.000163274  2.06677e-5   0.000152622
    7 │ 5.47892e-5   2.08978e-6   9.96884e-6   7.54565e-5   5.98698e-5   2.47751e-5
    8 │ 0.000127

In [29]:
row_sums = Vector{Float64}(undef, size(df, 1))

# Compute the sum of each row and store it in `row_sums`
for (i, row) in enumerate(eachrow(df_squared))
    row_sums[i] = sum(row)
end

# Now `row_sums` contains the sum of each row
k = 15  # Number of minimum values you want to find
new_indices = partialsortperm(row_sums, 1:k)  # Indices of the 5 smallest values
min_values = row_sums[new_indices]  # The 5 smallest values

println("Minimum values: ", min_values)
println("Indices of minimum values: ", new_indices)

Minimum values: [4.805099846718786e-5, 5.17209452080962e-5, 7.166959596753352e-5, 8.869751205503015e-5, 9.919271313155853e-5, 0.00010189290926905582, 0.00011571888141312115, 0.00011766305091826326, 0.00012273304514271837, 0.0001273983997863969, 0.00012807852030744223, 0.0001293174741534103, 0.00012995895056205277, 0.00013125171823971288, 0.00013246966976017524]
Indices of minimum values: [772, 787, 978, 340, 551, 859, 42, 256, 646, 822, 351, 176, 510, 949, 913]


In [30]:
param_samples = hcat(result.population...)

posterior_params = DataFrame(
  N_value = param_samples[1, :],
  T_value = param_samples[2, :],
  tau_value = param_samples[3, :],
  sigma_value = param_samples[4, :],
  Bmax_value = param_samples[5, :]
)

Row,N_value,T_value,tau_value,sigma_value,Bmax_value
Unnamed: 0_level_1,Float64,Float64,Float64,Float64,Float64
1,1.36897,0.309298,1.94516,0.0132121,8.87411
2,12.2415,0.148307,0.254494,0.255458,6.33526
3,7.43064,6.64081,0.312212,0.0974577,12.545
4,4.98167,0.405776,1.60041,0.14623,3.80425
5,6.00146,0.413574,0.852082,0.111758,5.6758
6,9.70061,0.300386,0.241156,0.1894,4.16296
7,2.54447,2.25744,0.306353,0.0732894,14.0655
8,2.4657,3.36048,0.682221,0.0490212,7.46855
9,4.96963,4.31629,0.176912,0.192051,7.12995
10,5.20023,0.910901,0.839202,0.187807,1.2918


In [31]:
new_indices

15-element view(::Vector{Int64}, 1:15) with eltype Int64:
 772
 787
 978
 340
 551
 859
  42
 256
 646
 822
 351
 176
 510
 949
 913

In [32]:
df[new_indices, :]

Row,ss1,ss2,ss3,ss4,ss5,ss6
Unnamed: 0_level_1,Float64,Float64,Float64,Float64,Float64,Float64
1,0.0021935,0.0031167,0.00247383,0.00272911,0.00134562,0.00425995
2,0.00384866,0.000146953,0.00298671,0.00473632,0.00197177,0.00128301
3,0.000656345,0.00699919,0.00140165,0.00176955,0.00330787,0.00249245
4,0.00842542,0.000581968,0.00333261,8.35891e-05,0.00223582,0.00112201
5,0.00254517,0.00848417,0.00185271,0.00169864,0.00367872,0.000939536
6,0.00256295,0.00252178,0.00667752,0.0035725,0.00483213,0.0028746
7,0.00539415,0.000993582,0.00185588,0.00494362,0.00453381,0.00609883
8,0.00669509,0.00247548,0.00204591,0.000805597,0.00650283,0.00442598
9,0.00633198,0.00522448,0.00287568,0.00678291,0.000765111,0.000693625
10,0.00665476,0.0012622,0.00180264,0.00732081,0.000184112,0.00496404


In [33]:
best_particles = posterior_params[new_indices, :]

Row,N_value,T_value,tau_value,sigma_value,Bmax_value
Unnamed: 0_level_1,Float64,Float64,Float64,Float64,Float64
1,2.97473,3.32641,0.414924,0.249085,2.46746
2,1.54787,0.430114,0.434765,0.0497595,10.2135
3,1.11931,2.61234,0.390092,0.138509,2.56179
4,13.4137,3.59711,0.149483,0.155223,10.4131
5,2.06505,3.85977,0.45139,0.1535,5.71798
6,6.57112,6.57931,0.241702,0.249964,3.18693
7,7.08402,1.74282,0.177105,0.197443,5.56716
8,3.24083,4.95947,0.176478,0.0830449,11.8728
9,2.37575,2.54263,0.501928,0.0811489,8.37312
10,2.6567,6.90638,0.231386,0.116549,5.85756
