In [1]:
# REAL NOTEBOOK

# This notebook runs a simulation for the Solar Dynamo model with the sABC algorithm.
# The results of the simulation are all stored in files in the directory Simulations/Real i.
# The reason behind this is to have an easier access to the results of already run simulations,
# without running them again. This notebook only processes real data!!

# NB: In this notebook, there's no function to visualize the results: the visualization is all
# contained in the "visualization_real.ipynb" notebook.

# RULES:

# There are two ways to use this notebook:
# 1) change all the parameters and then run all -> correct way
# 2) change things randomly and not in order and then run -> wrong way
# Please be careful, some functions change the directory in which everything is being saved; the
# order of the calling of the functions is organized to start from a generic dir, create the dir
# Simulations/Simulation i, go to that directory and then, when everythingis finished, go back to
# the initial dir!!! If you don't do that, it will stay in the subdir and at the next run it will
# create a subdir of a subdir -> if you need to stop midway through because u forgot something, 
# remember to come back to the initial directory (and eliminate the directory that has not correct
# files inside).

# GG EZ - kallo27

In [2]:
# NEEDED PACKAGES -> no visualization!!

using StochasticDelayDiffEq
using SpecialFunctions
using Distributions
using SimulatedAnnealingABC
using Distances
using DataFrames
using FFTW
using CSV
using XLSX
using ThreadPinning

In [3]:
# FUNCTIONS NEDEED FOR THE MODEL

# Box-shaped function for the magnetic field range 
function f(B, B_max = 10, B_min = 1)
  return 1 / 4 * (1 .+ erf.(B .^ 2 .- B_min ^ 2)) .* (1 .- erf.(B .^ 2 .- B_max ^ 2))
end

# Model function for the DDE
function MagneticField(du, u, h, p, t)
  N, T, tau, sigma, Bmax = p

  B, dB = u

  du[1] = dB
  du[2] = - ((2 / tau) * dB + (B / tau^2) + (N / tau^2) * h(p, t - T)[1] * f(h(p, t - T)[1], Bmax))
end

# Noise function for the DDE
function noise!(du, u, h, p, t)
  N, T, tau, sigma, Bmax = p
  du[1] = 0
  du[2] = (sigma * Bmax)/(tau^(3/2))
end

# Distance function in the sABC algorithm
function f_dist(θ::Vector{Float64}; type::Int64 = 1, indeces::Union{Vector{Int64}, StepRange{Int64, Int64}} = 1:6:120, fourier_data::Vector{Float64})
  prob = SDDEProblem(MagneticField, noise!, B0, h, tspan, θ)
  sol = solve(prob, EM(), dt = dt)
  
  simulated_data = sol[1,:]
  simulated_data = simulated_data .^ 2
  fourier_transform = abs.(fft(simulated_data))
  fourier_stats = fourier_transform[indeces]

  rho = [euclidean(fourier_stats[i], fourier_data[i]) for i in 1:length(fourier_stats)]
  return rho
end

# function for the summary statistics
function reduced_fourier_spectrum(u::Vector{Float64}, indeces::Union{Vector{Int64}, StepRange{Int64, Int64}} = 1:6:120)
  fourier_transform = abs.(fft(u))
  return fourier_transform[indeces]
end

reduced_fourier_spectrum (generic function with 2 methods)

In [4]:
# FUNCTIONS NEEDED FOR SAVING THE RESULTS OF A SIMULATION

# function to create a new directory for each simulations, in order to store the needed files
function create_directory()
  base_path = pwd()
  base_path = joinpath(base_path, "Simulations")
  i = 1
  dir_name = "Real $i"
  dir_path = joinpath(base_path, dir_name)
  
  while isdir(dir_path)
    i += 1
    dir_name = "Real $i"
    dir_path = joinpath(base_path, dir_name)
  end
  
  mkpath(dir_path)
  println("Directory created at: $dir_path")
  cd(dir_path)
end

# function to save the prior as a string
function get_prior_string(prior)
  parts = []
  for d in prior.dists
    if isa(d, Uniform)
      push!(parts, "Uniform($(minimum(d)), $(maximum(d)))")
    else
      error("Unsupported distribution type: $(typeof(d))")
    end
  end
  
  return "product_distribution(" * join(parts, ", ") * ")"
end

# function to save the sabc parameters
function save_sabc_params(prior, n_particles::Int, n_simulation::Int, v::Float64, type::Int, indeces::Union{Vector{Int}, StepRange{Int64, Int64}})
  curr_path = pwd()
  filename = "sabc_params.csv"
  path = joinpath(curr_path, filename)
    
  sabc_params = DataFrame(
    Parameter = ["prior", "n_particles", "n_simulation", "v", "type", "indeces"],
    Value = [get_prior_string(prior), n_particles, n_simulation, v, type, string(indeces)]
  )
    
 CSV.write(filename, sabc_params) 
 println("Parameters saved to: $path")
end

# Function to save the result object of a sABC algorithm
function save_result(result::SimulatedAnnealingABC.SABCresult{Vector{Float64}, Float64})
  curr_path = pwd()
  filenames = ["eps_hist.csv", "u_hist.csv", "rho_hist.csv"]
  variables = [result.state.ϵ_history, result.state.u_history, result.state.ρ_history]

  for (filename, variable) in zip(filenames, variables)
    labels = string.(1:size(variable, 1))
    path = joinpath(curr_path, filename)
    CSV.write(path, DataFrame(variable, labels))
    println("$filename data saved to: $path")
  end

  filename = "pop.csv"
  path = joinpath(curr_path, filename)

  param_samples = hcat(result.population...)

  posterior_params = DataFrame(
    N_value = param_samples[1, :],
    T_value = param_samples[2, :],
    tau_value = param_samples[3, :],
    sigma_value = param_samples[4, :],
    Bmax_value = param_samples[5, :]
  )

  CSV.write(path, posterior_params)
  println("Posterior parameters saved to: $path")

  filename = "rho.csv"
  path = joinpath(curr_path, filename)

  rho = result.ρ

  rho_values = DataFrame(rho, [:ss1, :ss2, :ss3, :ss4, :ss5, :ss6])

  CSV.write(path, rho_values)
  println("Rho values saved to: $path")
end

save_result (generic function with 1 method)

In [5]:
# THREADS SETTINGS AND INFO

ThreadPinning.pinthreads(:cores)
ThreadPinning.threadinfo()


System: 8 cores (no SMT), 8 sockets, 1 NUMA domains

[0m[1m| [22m[33m[1m0[22m[39m[0m[1m | [22m[33m[1m1[22m[39m[0m[1m | [22m[33m[1m2[22m[39m[0m[1m | [22m[33m[1m3[22m[39m[0m[1m | [22m[33m[1m4[22m[39m[0m[1m | [22m[33m[1m5[22m[39m[0m[1m | [22m[33m[1m6[22m[39m[0m[1m | [22m[33m[1m7[22m[39m[0m[1m | [22m

[33m[1m#[22m[39m = Julia thread, [0m[1m|[22m = Socket seperator

Julia threads: [32m8[39m
├ Occupied CPU-threads: [32m8[39m
└ Mapping (Thread => CPUID): 1 => 0, 2 => 1, 3 => 2, 4 => 3, 5 => 4, ...


In [6]:
# EXTRACTING OPEN MAGNETIC FLUX AND SUNSPOT NUMBER RECORDS FROM XLSX FILE

# Define DataFrame object
data = DataFrame(
  year = Int[],
  open_magn_flux = Float64[],
  open_magn_flux_err = Float64[],
  ssa_open_magn_flux = Float64[],
  sunspots_num = Float64[],
  sunspots_err = Float64[],
  ssa_sunspots = Float64[]
)

# Open file and for each row write data into the DataFrame
XLSX.openxlsx("SN Usoskin Brehm.xlsx") do file
  sheet = file["Data"] 

  for row in XLSX.eachrow(sheet)
    if isa(row[2], Number)
      push!(data, (
        year = row[2],
        open_magn_flux = row[3],
        open_magn_flux_err = row[4],
        ssa_open_magn_flux = row[5],
        sunspots_num = row[7],
        sunspots_err = row[8],
        ssa_sunspots = row[9]
      ))
    end
  end
end

In [7]:
# DIRECTORY MANAGING

# Current directory
initial_directory = pwd()

# New directory
create_directory()

# NB: After "create_directory", we move to the new directory.
# DON'T RUN THIS AGAIN, wait for the simulation to finish!!!! If you made errors,
# eliminate the Simulations/Simulation i directory and then rerun everything

Directory created at: /home/ubuntu/LCP_B/Project/Simulations/Real 7


In [8]:
# SIMULATION PARAMETERS MANAGING

# Parameters that can be tuned for new simulations
prior = product_distribution(Uniform(1, 15), Uniform(0.1, 10.0), Uniform(0.1, 6.0), Uniform(0.01, 0.3), Uniform(1, 15))
n_particles = 1000
n_simulation = 10000000
v = 1.0
type = 1
indeces = [1, 2, 85]

# Writing on file "sabc_params.csv" of the values set in this cell.
save_sabc_params(prior, n_particles, n_simulation, v, type, indeces)

Parameters saved to: /home/ubuntu/LCP_B/Project/Simulations/Real 7/sabc_params.csv


In [9]:
# SIMULATION

# Initial conditions
B0 = [1.0, 0.0]
h0 = [0.0, 0.0]
noise0 = [1.0]
h(p, t) = h0
tmin = data.year[1]; tmax = data.year[length(data.year)]
tspan = (tmin, tmax)
dt = 0.01

# Creation of the summary statistics from the simulated data
u = data.open_magn_flux
sim_ss = reduced_fourier_spectrum(u, indeces)

# Actual usage of the sABC algorithm
result = sabc(f_dist, prior;
              n_particles = n_particles, 
              n_simulation = n_simulation,
              v = v,
              type = type,
              indeces = indeces,
              fourier_data = sim_ss)

# Display of the summary of the results
display(result)

# Saving the results to the files: "eps_hist.csv", "u_hist.csv", "rho_hist.csv", "pop.csv", "rho.csv".
save_result(result)

┌ Info: Preparing to run SABC algorithm: 'single-epsilon'
└ @ SimulatedAnnealingABC /home/ubuntu/.julia/packages/SimulatedAnnealingABC/e8QsC/src/SimulatedAnnealingABC.jl:188
┌ Info: Using threads: 8 
└ @ SimulatedAnnealingABC /home/ubuntu/.julia/packages/SimulatedAnnealingABC/e8QsC/src/SimulatedAnnealingABC.jl:199
┌ Info: Set BLAS threads = 1 
└ @ SimulatedAnnealingABC /home/ubuntu/.julia/packages/SimulatedAnnealingABC/e8QsC/src/SimulatedAnnealingABC.jl:202
┌ Info: Set 'pinthreads(:cores)' for optimal multi-threading performance
└ @ SimulatedAnnealingABC /home/ubuntu/.julia/packages/SimulatedAnnealingABC/e8QsC/src/SimulatedAnnealingABC.jl:204
┌ Info: Initializing population...
└ @ SimulatedAnnealingABC /home/ubuntu/.julia/packages/SimulatedAnnealingABC/e8QsC/src/SimulatedAnnealingABC.jl:223
┌ Info: Initial resampling (δ = 0.1) - ESS = 996.6799495718246 
└ @ SimulatedAnnealingABC /home/ubuntu/.julia/packages/SimulatedAnnealingABC/e8QsC/src/SimulatedAnnealingABC.jl:277
┌ Info: Population

Approximate posterior sample with 1000 particles:
  - simulations used: 10000000
  - average transformed distance: 0.0001789
  - ϵ: [1.006e-5]
  - population resampling: 10
  - acceptance rate: 0.002019
The sample can be accessed with the field `population`.
The history of ϵ can be accessed with the field `state.ϵ_history`.
 -------------------------------------- 


eps_hist.csv data saved to: /home/ubuntu/LCP_B/Project/Simulations/Real 7/eps_hist.csv
u_hist.csv data saved to: /home/ubuntu/LCP_B/Project/Simulations/Real 7/u_hist.csv
rho_hist.csv data saved to: /home/ubuntu/LCP_B/Project/Simulations/Real 7/rho_hist.csv
Posterior parameters saved to: /home/ubuntu/LCP_B/Project/Simulations/Real 7/pop.csv


DimensionMismatch: DimensionMismatch: Number of columns (3) and number of column names (6) are not equal

In [10]:
# DIRECTORY MANAGING

# WE go back to the initial directory
cd(initial_directory)
pwd()

"/home/ubuntu/LCP_B/Project"

In [11]:
rho = result.ρ

1000×3 Matrix{Float64}:
 5.15605e-5  9.06781e-5  4.44132e-5
 9.25022e-6  5.89493e-6  1.86468e-5
 7.64241e-6  2.13834e-5  1.49479e-5
 6.99701e-6  6.53997e-6  1.90112e-5
 5.40451e-7  9.84845e-6  4.7725e-6
 1.38788e-5  3.14031e-6  1.30468e-5
 1.8469e-5   1.6594e-5   2.16735e-9
 3.1805e-6   3.71048e-5  2.63695e-6
 3.92092e-5  9.44767e-6  1.84309e-5
 3.76248e-5  2.35828e-6  1.97432e-6
 ⋮                       
 4.44245e-5  1.13922e-5  4.2422e-5
 9.3635e-7   6.78932e-6  2.24867e-5
 2.57444e-5  1.99354e-5  1.39772e-5
 9.14417e-6  4.49711e-6  1.53673e-6
 2.86788e-5  3.41107e-5  5.79642e-6
 2.42685e-6  2.33928e-5  4.20191e-5
 1.0561e-5   1.56802e-5  3.68952e-7
 3.97658e-6  5.68706e-6  2.99707e-5
 1.73891e-5  5.23601e-5  9.18233e-6

In [12]:
df = DataFrame(rho, :auto)

df_squared = DataFrame()

# Iterate over each column and compute the square of each element
for col in names(df)
    df_squared[!, col] = df[!, col] .^ 2
end

# Now `df_squared` contains the square of each entry in the original DataFrame
println(df_squared)

[1m1000×3 DataFrame[0m
[1m  Row [0m│[1m x1          [0m[1m x2          [0m[1m x3          [0m
      │[90m Float64     [0m[90m Float64     [0m[90m Float64     [0m
──────┼───────────────────────────────────────
    1 │ 2.65849e-9   8.22252e-9   1.97253e-9
    2 │ 8.55666e-11  3.47502e-11  3.47702e-10
    3 │ 5.84064e-11  4.57251e-10  2.2344e-10
    4 │ 4.89582e-11  4.27712e-11  3.61426e-10
    5 │ 2.92087e-13  9.6992e-11   2.27768e-11
    6 │ 1.92622e-10  9.86158e-12  1.7022e-10
    7 │ 3.41103e-10  2.75361e-10  4.69738e-18
    8 │ 1.01156e-11  1.37677e-9   6.95349e-12
    9 │ 1.53736e-9   8.92584e-11  3.39698e-10
   10 │ 1.41563e-9   5.56149e-12  3.89794e-12
   11 │ 2.37045e-9   6.20926e-10  2.08137e-10
   12 │ 1.93524e-9   1.11964e-9   1.28702e-10
   13 │ 1.93842e-9   5.73424e-10  3.11401e-11
   14 │ 3.59823e-10  2.0427e-10   6.36573e-10
   15 │ 3.5815e-10   2.12881e-9   9.16872e-10
   16 │ 8.04094e-10  3.51774e-11  5.70609e-11
   17 │ 1.18805e-9   1.4271e-11   1.31865e

In [13]:
row_sums = Vector{Float64}(undef, size(df, 1))

# Compute the sum of each row and store it in `row_sums`
for (i, row) in enumerate(eachrow(df_squared))
    row_sums[i] = sum(row)
end

# Now `row_sums` contains the sum of each row
k = 5  # Number of minimum values you want to find
new_indices = partialsortperm(row_sums, 1:k)  # Indices of the 5 smallest values
min_values = row_sums[new_indices]  # The 5 smallest values

println("Minimum values: ", min_values)
println("Indices of minimum values: ", new_indices)

Minimum values: [7.729871444859524e-12, 9.975613551552487e-12, 1.8216954432787778e-11, 4.377303153959497e-11, 4.477707526576819e-11]
Indices of minimum values: [281, 310, 477, 523, 512]


In [14]:
param_samples = hcat(result.population...)

posterior_params = DataFrame(
  N_value = param_samples[1, :],
  T_value = param_samples[2, :],
  tau_value = param_samples[3, :],
  sigma_value = param_samples[4, :],
  Bmax_value = param_samples[5, :]
)

Row,N_value,T_value,tau_value,sigma_value,Bmax_value
Unnamed: 0_level_1,Float64,Float64,Float64,Float64,Float64
1,7.22122,2.51493,3.04291,0.0902499,4.48552
2,1.01301,7.38302,2.76367,0.070656,5.98282
3,3.72747,3.12617,4.24637,0.23911,1.69927
4,4.07121,0.979967,4.4054,0.121872,4.3318
5,1.4548,0.884179,3.1,0.0415184,9.97548
6,2.49417,5.25815,4.3452,0.0609678,6.89486
7,1.90986,5.12125,3.42519,0.101621,4.38549
8,10.7522,1.56412,3.05105,0.205778,2.55405
9,4.7349,6.43357,3.84158,0.177207,2.07825
10,6.22802,1.57583,5.87744,0.0512357,8.71482


In [15]:
new_indices

5-element view(::Vector{Int64}, 1:5) with eltype Int64:
 281
 310
 477
 523
 512

In [16]:
df[new_indices, :]

Row,x1,x2,x3
Unnamed: 0_level_1,Float64,Float64,Float64
1,1.78834e-07,1.7518e-06,2.15153e-06
2,2.54808e-06,7.19044e-07,1.72217e-06
3,9.21992e-07,4.15598e-06,3.07708e-07
4,4.55108e-06,2.04892e-06,4.34311e-06
5,5.20788e-06,4.08236e-06,9.94685e-07


In [17]:
best_particles = posterior_params[new_indices, :]

Row,N_value,T_value,tau_value,sigma_value,Bmax_value
Unnamed: 0_level_1,Float64,Float64,Float64,Float64,Float64
1,3.30943,3.75205,4.7946,0.0561615,7.73158
2,3.96074,8.1693,3.31341,0.12205,2.74108
3,2.80119,2.39564,5.11828,0.0314953,13.5373
4,5.33623,1.7645,5.70751,0.0423393,11.4233
5,5.44817,4.68198,2.9052,0.11011,3.53608
