In [None]:
using HDF5;                     # for .hd5 file manipulation
using DifferentialEquations;    # Provides a variety of differential solvers
using LinearAlgebra: Diagonal;  # Efficient Diagonal matrixes
using Statistics;               # Let's get this fucking bread 
using DataFrames;               # For succinct data manipulation
using CSV;                      # For writing the data to prevent constant re-running
using ForwardDiff;              # For profiling the gradients 
using Gadfly, Fontconfig, Cairo;# For plotting

In [None]:
"""
get_assets() uses the current working directory to retrieve the datasets (.csv) and models (.hd5) assumed to be in folders with those names.
"""
function get_assets()::Tuple{Vector{String}, Vector{String}}
    models = Vector{String}(undef, 0)       # An array to house the file address of the models 
    datasets = Vector{String}(undef, 0)     # An array to store the dataset file addresses 
    miscellaneous = Vector{String}(undef, 0)# An array for other files

    for (root, dirs, files) in walkdir(pwd())   # Walking through the working directory 
        storage_array = miscellaneous
        if occursin("models", root) # If the root path contains "models" appending to models array
            storage_array = models  
        elseif occursin("datasets", root)   # If the root path contains "datasets" appending to datasets
            storage_array = datasets
        end

        for file in files   # Looping over files in the directory 
            file = string(root, "/", file)  # Formatting file as a filepath 
            push!(storage_array, file)  # Appending to the correct array 
        end
    end
    return models, datasets
end

In [None]:
""" 
This function will loads the derivative function from a `.hd5` file. 

Parameters:
 - `model: str` The name of the model that is to be used. 

 Returns:
 - `Function` The entire model is summarised by the derivative since it is an ode. So I want to return the derivative as a function of parameters. I have decided that the binning will automatically occur within this function based on the hemisphere loaded from the `.hd5` file. I need to check this against `ticktack`
"""
function get_derivative(model_file_name::String)::Function
    #! I need to get better variable names for this function 
    #! I also need to make a list of the equilibrium steady_state_productions using python.
        #? I could do this using pycall and ticktack but I think not. Since it is intrinsic to the model we could add it to the hd5 files.
    
    local hd5 = h5open(model_file_name);                        # Opening the HDF5 file
    local fluxes = hd5["fluxes"][1:end, 1:end];                 # Flux amount matrix 
    local projection = hd5["production coefficients"][1:end];   # Production prjection 
    local masses = hd5["reservoir content"][1:end, 1:end];      # The C14 reserviour contents 
    local hemispheres = hd5["hemispheres"][1:end]               # Hemispheres of the model
    close(hd5);                                                 # Closing the file 

    local decay = Diagonal([log(2) / 5730 for i in 1:11]);  # Constructing the decay matrix
    local fluxes = transpose(fluxes) ./ masses;             # The proportion flux
    local transfer = transpose(fluxes) - Diagonal(vec(sum(fluxes, dims=2))) - decay;# Construncting the transfer operator

    """
    The production function of Carbon 14. There are two components; the steady state sinusoidal production based on the Schwabe cycle, and the super-gaussian Miyake event.

    Parameters:
     - t::Float64: The time in years.
     - p::Vector{Float64}: The parameters of the model. 

    Returns:
     - Vector{Float64}: The Carbon 14 production of each box of the model
    """
    function production(time::Vector{Float64}, parameters::Vector{Float64})::Vector{Float64}
        local ϕ = parameters[4];    # Phase of the sinusoid 
        local middle = parameters[1] + parameters[2] / 2;   # Center of the super-gaussian  
        local height = parameters[3] / parameters[2];       # Amplitude of the super-gaussian
        local duration = 0.5167531366915398 * parameters[2];# Numeric factor controlling width

        #! Currently set for guttler
        local sine = 1.88 + 0.18 * 1.88 * sin(2π / 11 * time + ϕ);   # Sinusoidal production components  
        local gauss = height * exp(- ((time - middle) / duration) ^ 16.);   # event
        return sine + gauss
    end

    """
    The ODE model of the earth system. 

    Parameters:
     - position::Vector{Float64}: The current position of the system 
     - parameters::Vector{Float64}: The parameters of the production function/Miyake event
     - time::Vector{Float64}: The current year
    Returns:
     - Function: The derivative function of the system
    """
    function derivative(position, parameters, time)::Vector{Float64}
        return transfer * position + production(time, parameters) * projection;  
    end
    return derivative
end

In [None]:
function get_carbon(model_derivative::Function, solver::String)::Function 
    #! So this function needs to implement an equilibrate function or contain a prebuilt dictionary of steady state productions for the models
    function equilibrate()
    u0 = TO \ (- params[1] * P);  # Equilibriating the system

    burnproblem = ODEProblem(derivative, u0, (-360.0, 760.0), params);  # Burn in problem  
    burnsolution = solve(burnproblem, reltol=1e-6).u[end];              # Running model 
end

In [None]:
models, datasets = get_assets();
derivative = get_derivative(models[3]);
get_carbon(derivative, "BS3")

In [None]:
h5open(models[3])["hemispheres"][1:end]

From here is the old version.

In [None]:
"""
Takes time series data and calculates the average of each year.
"""
function bin(time_series::Vector, solution_vector::Vector)::Vector
    local binned_solution = Vector(undef, 0);   # Setting a vector to hold the bins 
    local whole_times = @.floor(time_series);   # Creating a vector of discrete time.
    for whole_time in unique(whole_times)       # Looping over the unique elements discrete times 
        local indexes = findall(whole_times .== whole_time);        # Getting the indexes
        push!(binned_solution, mean(solution_vector[indexes]));   # Appending to binned_solution
    end
    return binned_solution
end

In [None]:
"""
Reads the flux (amounts), production (projection) and reserviour contents from 
a .hd5 file with file_name. It returns the transfer operator and production 
projection 
"""
function read_hd5(file_name::String)::Tuple{Matrix{Float64}, Vector{Float64}}
    local hd5 = h5open(file_name);                      # Opening the HDF5 file
    local F = hd5["fluxes"][1:end, 1:end];              # Retrieving the flux matrix 
    local P = hd5["production coefficients"][1:end];    # Retrieving the production prjection 
    local N = hd5["reservoir content"][1:end, 1:end];   # The C14 reserviour contents 
    close(hd5);                                         # Closing the file 

    local λ = Diagonal([log(2) / 5730 for i in 1:11]);          # Constructing the decay matrix
    F = transpose(F) ./ N;                                      # The proportion flux
    local TO = transpose(F) - Diagonal(vec(sum(F, dims=2))) - λ;# Construncting the transfer operator
    return TO, P                                           
end

In [None]:
"""
Passed a solver function runs the solver and returns the speed and binned data
"""
function run_solver(solver, ∇::Function, U0::Vector, p)::Vector
    local problem = ODEProblem(∇, U0, (760.0, 790.0), p);   # Creating the ODEProblem instance
    local solution = solve(problem, reltol = 1e-6, solver()); # Solving the ODE  
    local time = Array(solution.t);                         # Storing the time sampling 
    solution = Array(solution)[2, 1:end];   # Storing the solution for troposphere 
    solution = bin(time, solution);         # Getting the annual means
    return solution;                        # Binning the results into years 
end

In [None]:
"""
So this is highly experimental basically I am passing a function into the profile function, which takes only solver as an argument. This done using the wrapper method that I used earlier
"""
function profile(solvers::Vector, f::Function, args::Vector)
    local acc_data = Vector{Any}(undef, length(solvers));   # Creating the storage Matrix 
    local time_data = Vector{Any}(undef, length(solvers));  # For the mean of the times
    local results = DataFrame(solver = @.string(solvers));  # Storage system

    for (index, solver) in enumerate(solvers)           # Looping over the solvers 
        local time_sample = Vector{Float64}(undef, 10); # The different run times of each trial 
        for i in 1:10
            local timer = time();           # Starting a timer
            solution = f(solver, args...);  # Running the solver
            time_sample[i] = time() - timer;# ending the timer 

            if i == 10                  # Storing final run
                acc_data[index] = solution; # filling C14
            end
        end
        time_data[index] = time_sample;   # Storing run time ignoring compile run.
    end 

    #* Accuracy Analysis 
    acc_data = cat(acc_data..., dims=3);                # Array construction for ease of use
    local meds = median(acc_data, dims=3);              # Calculating the medians
    local devs = @.abs((acc_data .- meds) ./ acc_data); # Calculating the normalised deviations

    accuracy = Vector{Float64}(undef, 11);  # A vector to store solver accuracy
    acc_errs = Vector{Float64}(undef, 11);  # A vector to store the minimum accuracy 

    for i in 1:size(devs)[3];   # Looping through the sovler dimension 
        accuracy[i] = mean(devs[1:end, 1:end, i]);                  # Calculating the accuracy
        acc_errs[i] = mean(devs[1:end, 1:end, i] .- accuracy[i]);   # Finding the shortest run 
    end

    #* Time analysis 
    times = hcat(time_data...);                 # Array construction for ease of manipulation 
    times = times[2:end, 1:end];                # Removing the compilation run
    local tmean = mean(times, dims=1);          # Calculating the average time
    local terrs = mean(times .- tmean, dims=1); # Calculating the MAE error

    #* Assinging to the data frame 
    results.time_mean = vec(tmean);     # Storing the times 
    results.time_errs = vec(terrs);     # Storing the shortest run
    results.accuracy_mean = accuracy;   # Storing the accuracies 
    results.accuracy_errs = acc_errs;   # Least accurate fields

    if f == select_autodiff # Storing the profiled function
        results.method = [string(args[end]) for i in 1:nrow(results)];
    else
        results.method = [string(f) for i in 1:nrow(results)];     
    end
    
    return results
end

In [None]:

"""
Calculates the gradient using a χ² loss function. ∇ is the gradient of the differential equations and u0 is the starting position. Parameters are the values of the parameters that the gradient is getting calculated for and f is the function i.e. gradient in forward or reverse or hessian. This means that f tkes two arguments. Man this is sought of shit programing,
"""
function select_autodiff(solver, ∇::Function, u0::Vector{Float64}, parameters, f::Function)
    function loss_function(params)  
        local solution = run_solver(solver, ∇, u0, params); #! Naming
        local ΔC14 = (solution .- u0[2]) ./ u0[2] .* 1000;  # Calculating ΔC14
        local miyake = DataFrame(CSV.File("Miyake12.csv")); # Reading Miyake data
        local ΔC14 .+= mean(miyake.d14c[1:4]);  # Calculates the ticktack offset
        local χ² = sum(((miyake.d14c .- ΔC14[1:28]) ./ miyake.sig_d14c) .^ 2);  # Calculating χ² 
        return -0.5 * χ²
    end
    return f(loss_function, parameters);
end

In [None]:
TO, P = read_hd5("Guttler14.hd5");      # Reading the data into the scope 

params = Vector{Float64}(undef, 6); # Storing the model params 
params[1] = 7.044873503263437;      # The mean position of the sinusoid 
params[2] = 0.18;                   # The modulation of the sinusoid w. r. t the mean
params[3] = 11.0;                   # Setting period of the sinusoid 
params[4] = 1.25;                   # The phase shift of the sinusoid
params[5] = 120.05769867244142;     # The height of the super gaussian 
params[6] = 12.0;                   # Width of the super-gaussian 

production(t, params) = params[1] * (1 + params[2] * 
    sin(2 * π / params[3] * t + params[4])) +           # Sinusoidal production 
    params[5] * exp(- (params[6] * (t - 775)) ^ 16);    # Super Gaussian event
derivative(x, params, t) = vec(TO * x + production(t, params) * P);  # The derivative system 

u0 = TO \ (- params[1] * P);  # Equilibriating the system

burnproblem = ODEProblem(derivative, u0, (-360.0, 760.0), params);  # Burn in problem  
burnsolution = solve(burnproblem, reltol=1e-6).u[end];              # Running model 

solvers = [TRBDF2, BS3, Tsit5, Rosenbrock23, ROS34PW1a, QNDF1, ABDF2, ExplicitRK, DP5, TanYam7, Vern6]

fgrad = profile(solvers, select_autodiff,
    [derivative, burnsolution, params, ForwardDiff.gradient]);  # Running the forward mode 
hessians = profile(solvers, select_autodiff,
    [derivative, burnsolution, params, ForwardDiff.hessian]);   # Running the hessians 
solvers = profile(solvers, run_solver,
    [derivative, burnsolution, params]); # Running the solvers

In [None]:
results = vcat(fgrad, hessians, solvers);   # Concatenating the arrays 

In [None]:
something = Gadfly.plot(
    results,                # Efficient data call
    layer(# Layer for the point geometry 
        x=:time_mean,       # X data as time 
        y=:accuracy_mean,   # Y data as accuracy (normalised)
        xmin=results.time_mean - results.time_errs,           # X error bar lower bounds
        xmax=results.time_mean + results.time_errs,           # x error bar upper bounds
        ymin=results.accuracy_mean - results.accuracy_errs,   # Y error bar lower bounds
        ymax=results.accuracy_mean + results.accuracy_errs,   # Y error bar upper bounds
        Geom.point,     # Layer geometry 
        Geom.xerrorbar, # X error bar geometry 
        Geom.yerrorbar, # Y error bar geometry
        color=:method   # Layer color scheme 
    ),
    layer(
        x=:time_mean,       # X data as time 
        y=:accuracy_mean,   # Y data as accuracy (normalised)
        group=:solver,      # Sorting by the solver that was used 
        Geom.line,          # Adding the line geometry
        Theme(
            default_color="grey",   # Grey lines in the background 
            alphas=[0.1],           # Highly transparent background lines
            line_style=[:dot]       # Dotted lines 
        )
    ),
    layer(
        x=:time_mean,       # X Coordinates for the labels
        y=:accuracy_mean,   # Y Coordinates for the labels
        label=:solver,      # Chosing the labels of the solvers 
        Geom.label          # Adding the solvers as labels
    ),
    Scale.x_log10,              # X Scaling as logarithm 
    Scale.y_log10,              # Y Scales 
    Guide.xlabel("Time (s)", orientation=:horizontal),
    Guide.ylabel("Accuracy", orientation=:vertical),
    Guide.title("Forward Autodiff, Hessians and Performance")
)

In [None]:
draw(PDF("solver_profiles.pdf", 20cm, 20cm), something); # Saving the figure 
CSV.write("solver_profiles.csv", results);  # Writing the results to a CSV

So basically I want to try and use Pycall (maybe), but I definitely want to loop over multiple models and multiple datasets.
```
parameters = load_params()  # Parameters of the model 
solvers = []                # List of solvers 
results = DataFrame()

for model in models     # Path of the .hd5 files 
    raw_solvers = profile_solvers()
    #! Repeat in the dataframe for long format 
    for datum in data   # Path of the .csv files 
        grads = profile_gradients()
        hessians = profile_hessians()
```

