In [1]:
# REAL NOTEBOOK

# This notebook runs a simulation for the Solar Dynamo model with the sABC algorithm.
# The results of the simulation are all stored in files in the directory Simulations/Real i.
# The reason behind this is to have an easier access to the results of already run simulations,
# without running them again. This notebook only processes real data!!

# NB: In this notebook, there's no function to visualize the results: the visualization is all
# contained in the "visualization_real.ipynb" notebook.

# RULES:

# There are two ways to use this notebook:
# 1) change all the parameters and then run all -> correct way
# 2) change things randomly and not in order and then run -> wrong way
# Please be careful, some functions change the directory in which everything is being saved; the
# order of the calling of the functions is organized to start from a generic dir, create the dir
# Simulations/Simulation i, go to that directory and then, when everythingis finished, go back to
# the initial dir!!! If you don't do that, it will stay in the subdir and at the next run it will
# create a subdir of a subdir -> if you need to stop midway through because u forgot something, 
# remember to come back to the initial directory (and eliminate the directory that has not correct
# files inside).

# GG EZ - kallo27

In [2]:
# NEEDED PACKAGES -> no visualization!!

using StochasticDelayDiffEq
using SpecialFunctions
using Distributions
using SimulatedAnnealingABC
using Distances
using DataFrames
using FFTW
using CSV
using XLSX
using ThreadPinning

In [3]:
# FUNCTIONS NEDEED FOR THE MODEL

# Box-shaped function for the magnetic field range 
function f(B, B_max = 10, B_min = 1)
  return 1 / 4 * (1 .+ erf.(B .^ 2 .- B_min ^ 2)) .* (1 .- erf.(B .^ 2 .- B_max ^ 2))
end

# Drift function for the DDE
function drift(du, u, h, p, t)
  N, T, tau, sigma, Bmax = p

  Bhist = h(p, t - T)[1]
  B, dB = u

  du[1] = dB
  du[2] = - ((2 / tau) * dB + (B / tau^2) + (N / tau^2) * Bhist * f(Bhist, Bmax))
end

# Noise function for the DDE
function noise!(du, u, h, p, t)
  N, T, tau, sigma, Bmax = p
  du[1] = 0
  du[2] = (sigma * Bmax)/(tau^(3/2))
end

# DDE problem solver
function bfield(θ, Tsim, dt)
	τ, T, Nd, sigma, Bmax = θ
	h(p, t) = [Bmax, 0.]
  B0 = [Bmax, 0.]
	tspan = (Tsim[1], Tsim[2])

	prob = SDDEProblem(drift, noise!, B0, h, tspan, θ)
	solve(prob, EM(), dt = dt, saveat = 1.0)
end

# Distance function in the sABC algorithm
function f_dist(θ::Vector{Float64}; type::Int64 = 1, indeces::Union{Vector{Int64}, StepRange{Int64, Int64}} = 1:6:120, fourier_data::Vector{Float64})
  sol = bfield(θ, Tsim, dt)
  
  simulated_data = sol[1,:]
  simulated_data = simulated_data .^ 2
  fourier_stats = reduced_fourier_spectrum(simulated_data, indeces)

  rho = [euclidean(fourier_stats[i], fourier_data[i]) for i in 1:length(fourier_stats)]
  return rho
end

# function for the summary statistics
function reduced_fourier_spectrum(u::Vector{Float64}, indeces::Union{Vector{Int64}, StepRange{Int64, Int64}} = 1:6:120)
  fourier_transform = abs.(fft(u))
  return fourier_transform[indeces]
end

reduced_fourier_spectrum (generic function with 2 methods)

In [4]:
# FUNCTIONS NEEDED FOR SAVING THE RESULTS OF A SIMULATION

# function to create a new directory for each simulations, in order to store the needed files
function create_directory()
  base_path = pwd()
  base_path = joinpath(base_path, "Simulations")
  i = 1
  dir_name = "Real $i"
  dir_path = joinpath(base_path, dir_name)
  
  while isdir(dir_path)
    i += 1
    dir_name = "Real $i"
    dir_path = joinpath(base_path, dir_name)
  end
  
  mkpath(dir_path)
  println("Directory created at: $dir_path")
  cd(dir_path)
end

# function to save the prior as a string
function get_prior_string(prior)
  parts = []
  for d in prior.dists
    if isa(d, Uniform)
      push!(parts, "Uniform($(minimum(d)), $(maximum(d)))")
    else
      error("Unsupported distribution type: $(typeof(d))")
    end
  end
  
  return "product_distribution(" * join(parts, ", ") * ")"
end

# function to save the sabc parameters
function save_sabc_params(prior, n_particles::Int, n_simulation::Int, v::Float64, type::Int, indeces::Union{Vector{Int}, StepRange{Int64, Int64}})
  curr_path = pwd()
  filename = "sabc_params.csv"
  path = joinpath(curr_path, filename)
    
  sabc_params = DataFrame(
    Parameter = ["prior", "n_particles", "n_simulation", "v", "type", "indeces"],
    Value = [get_prior_string(prior), n_particles, n_simulation, v, type, string(indeces)]
  )
    
 CSV.write(filename, sabc_params) 
 println("Parameters saved to: $path")
end

# Function to save the result object of a sABC algorithm
function save_result(result::SimulatedAnnealingABC.SABCresult{Vector{Float64}, Float64})
  curr_path = pwd()
  filenames = ["eps_hist.csv", "u_hist.csv", "rho_hist.csv"]
  variables = [result.state.ϵ_history, result.state.u_history, result.state.ρ_history]

  for (filename, variable) in zip(filenames, variables)
    labels = string.(1:size(variable, 1))
    path = joinpath(curr_path, filename)
    CSV.write(path, DataFrame(variable, labels))
    println("$filename data saved to: $path")
  end

  filename = "pop.csv"
  path = joinpath(curr_path, filename)

  param_samples = hcat(result.population...)

  posterior_params = DataFrame(
    N_value = param_samples[1, :],
    T_value = param_samples[2, :],
    tau_value = param_samples[3, :],
    sigma_value = param_samples[4, :],
    Bmax_value = param_samples[5, :]
  )

  CSV.write(path, posterior_params)
  println("Posterior parameters saved to: $path")

  filename = "rho.csv"
  path = joinpath(curr_path, filename)

  rho = result.ρ

  rho_values = DataFrame(rho, [:ss1, :ss2, :ss3, :ss4, :ss5, :ss6])

  CSV.write(path, rho_values)
  println("Rho values saved to: $path")
end

save_result (generic function with 1 method)

In [5]:
# THREADS SETTINGS AND INFO

ThreadPinning.pinthreads(:cores)
ThreadPinning.threadinfo()


System: 8 cores (no SMT), 8 sockets, 1 NUMA domains

[0m[1m| [22m[33m[1m0[22m[39m[0m[1m | [22m[33m[1m1[22m[39m[0m[1m | [22m[33m[1m2[22m[39m[0m[1m | [22m[33m[1m3[22m[39m[0m[1m | [22m[33m[1m4[22m[39m[0m[1m | [22m[33m[1m5[22m[39m[0m[1m | [22m[33m[1m6[22m[39m[0m[1m | [22m[33m[1m7[22m[39m[0m[1m | [22m

[33m[1m#[22m[39m = Julia thread, [0m[1m|[22m = Socket seperator

Julia threads: [32m8[39m
├ Occupied CPU-threads: [32m8[39m
└ Mapping (Thread => CPUID): 1 => 0, 2 => 1, 3 => 2, 4 => 3, 5 => 4, ...


In [6]:
# EXTRACTING OPEN MAGNETIC FLUX AND SUNSPOT NUMBER RECORDS FROM XLSX FILE

# Define DataFrame object
data = DataFrame(
  year = Int[],
  open_magn_flux = Float64[],
  open_magn_flux_err = Float64[],
  ssa_open_magn_flux = Float64[],
  sunspots_num = Float64[],
  sunspots_err = Float64[],
  ssa_sunspots = Float64[]
)

# Open file and for each row write data into the DataFrame
XLSX.openxlsx("SN Usoskin Brehm.xlsx") do file
  sheet = file["Data"] 

  for row in XLSX.eachrow(sheet)
    if isa(row[2], Number)
      push!(data, (
        year = row[2],
        open_magn_flux = row[3],
        open_magn_flux_err = row[4],
        ssa_open_magn_flux = row[5],
        sunspots_num = row[7],
        sunspots_err = row[8],
        ssa_sunspots = row[9]
      ))
    end
  end
end

# Creation of the summary statistics for the real data
u = data.open_magn_flux
sim_ss = reduced_fourier_spectrum(u, indeces)

In [7]:
# DIRECTORY MANAGING

# Current directory
initial_directory = pwd()

# New directory
create_directory()

# NB: After "create_directory", we move to the new directory.
# DON'T RUN THIS AGAIN, wait for the simulation to finish!!!! If you made errors,
# eliminate the Simulations/Simulation i directory and then rerun everything

Directory created at: /home/ubuntu/LCP_B/Project/Simulations/Real 8


In [8]:
# SIMULATION PARAMETERS MANAGING

# Parameters that can be tuned for new simulations
prior = product_distribution(Uniform(5, 15), Uniform(0.1, 10.0), Uniform(0.1, 6.0), Uniform(0.01, 0.3), Uniform(1, 15))
n_particles = 1000
n_simulation = 10000000
v = 1.0
type = 1
indeces = [1, 2, 37, 50, 78, 85]

# Writing on file "sabc_params.csv" of the values set in this cell.
save_sabc_params(prior, n_particles, n_simulation, v, type, indeces)

Parameters saved to: /home/ubuntu/LCP_B/Project/Simulations/Real 8/sabc_params.csv


In [9]:
# SIMULATION

# Conditions
tmin = data.year[1]; tmax = data.year[end]
Tsim = [tmin, tmax]
dt = 0.01

# Actual usage of the sABC algorithm
result = sabc(f_dist, prior;
              n_particles = n_particles, 
              n_simulation = n_simulation,
              v = v,
              type = type,
              indeces = indeces,
              fourier_data = sim_ss)

# Display of the summary of the results
display(result)

# Saving the results to the files: "eps_hist.csv", "u_hist.csv", "rho_hist.csv", "pop.csv", "rho.csv".
save_result(result)

┌ Info: Preparing to run SABC algorithm: 'single-epsilon'
└ @ SimulatedAnnealingABC /home/ubuntu/.julia/packages/SimulatedAnnealingABC/e8QsC/src/SimulatedAnnealingABC.jl:188
┌ Info: Using threads: 8 
└ @ SimulatedAnnealingABC /home/ubuntu/.julia/packages/SimulatedAnnealingABC/e8QsC/src/SimulatedAnnealingABC.jl:199
┌ Info: Set BLAS threads = 1 
└ @ SimulatedAnnealingABC /home/ubuntu/.julia/packages/SimulatedAnnealingABC/e8QsC/src/SimulatedAnnealingABC.jl:202
┌ Info: Set 'pinthreads(:cores)' for optimal multi-threading performance
└ @ SimulatedAnnealingABC /home/ubuntu/.julia/packages/SimulatedAnnealingABC/e8QsC/src/SimulatedAnnealingABC.jl:204
┌ Info: Initializing population...
└ @ SimulatedAnnealingABC /home/ubuntu/.julia/packages/SimulatedAnnealingABC/e8QsC/src/SimulatedAnnealingABC.jl:223
┌ Info: Initial resampling (δ = 0.1) - ESS = 996.6799495718244 
└ @ SimulatedAnnealingABC /home/ubuntu/.julia/packages/SimulatedAnnealingABC/e8QsC/src/SimulatedAnnealingABC.jl:277
┌ Info: Population

In [None]:
# DIRECTORY MANAGING

# WE go back to the initial directory
cd(initial_directory)
pwd()

In [None]:
rho = result.ρ

In [None]:
df = DataFrame(rho, :auto)

df_squared = DataFrame()

# Iterate over each column and compute the square of each element
for col in names(df)
    df_squared[!, col] = df[!, col] .^ 2
end

# Now `df_squared` contains the square of each entry in the original DataFrame
println(df_squared)

In [None]:
row_sums = Vector{Float64}(undef, size(df, 1))

# Compute the sum of each row and store it in `row_sums`
for (i, row) in enumerate(eachrow(df_squared))
    row_sums[i] = sum(row)
end

# Now `row_sums` contains the sum of each row
k = 5  # Number of minimum values you want to find
new_indices = partialsortperm(row_sums, 1:k)  # Indices of the 5 smallest values
min_values = row_sums[new_indices]  # The 5 smallest values

println("Minimum values: ", min_values)
println("Indices of minimum values: ", new_indices)

In [None]:
param_samples = hcat(result.population...)

posterior_params = DataFrame(
  N_value = param_samples[1, :],
  T_value = param_samples[2, :],
  tau_value = param_samples[3, :],
  sigma_value = param_samples[4, :],
  Bmax_value = param_samples[5, :]
)

In [None]:
new_indices

In [None]:
df[new_indices, :]

In [None]:
best_particles = posterior_params[new_indices, :]