# Comparing different Neural Networks

In this notebook we use 47 Networks to predict the individual rest-eigen contributions on each time slice

In [None]:
using LmaPredict, Flux, Statistics, Plots, Random, StatsBase, JLD2, PrettyTables, TickTock

## Reading the data

In [None]:
const path_config = "/Users/lukasgeyer/Studium/Computational Sciences/Masterarbeit/Daten Simon/dat"
const path_plot = "../plots"

In [None]:
fname = readdir(path_config)[2:5001]
idx = sortperm( parse.(Int64, fname))
fname = fname[idx]
em_n = "PA"

cnfgarr = Vector{LMAConfig}(undef, 0)
for f in fname
    push!(cnfgarr, get_LMAConfig(joinpath(path_config, f), "g5-g5", em=em_n, bc=false))
end

## Splitting data in training and test sets

In [None]:
# Select a specific Tsource and divide data into training and test set for eigenvalues, rr re and ee components
TSRC = "24"
NCNFG = length(cnfgarr)
TVALS = length(cnfgarr[1].data["rr"][TSRC]) - 1
if em_n == "PA"
    EIGVALS = 32
else 
    EIGVALS = 64
end

eigvals_data_train = Array{Float32}(undef, EIGVALS, 500)
rr_data_train = Array{Float32}(undef, TVALS, 500)
ee_data_train = Array{Float32}(undef, TVALS, 500)
re_data_train = Array{Float32}(undef, TVALS, 500)

eigvals_data_test = Array{Float32}(undef, EIGVALS, 4500)
rr_data_test = Array{Float64}(undef, TVALS, 4500)
ee_data_test = Array{Float64}(undef, TVALS, 4500)
re_data_test = Array{Float64}(undef, TVALS, 4500)

for (k, dd) in enumerate(getfield.(cnfgarr, :data)[1:500])
    eigvals_data_train[:,k] = copy(cnfgarr[k].data["eigvals"][1:EIGVALS])
    rr_data_train[:,k] = getindex(getindex(dd, "rr"), TSRC)[2:end]
    ee_data_train[:,k] = getindex(getindex(dd, "ee"), TSRC)[2:end]
    re_data_train[:,k] = getindex(getindex(dd, "re"), TSRC)[2:end]
end
for (k, dd) in enumerate(getfield.(cnfgarr, :data)[501:5000])
    eigvals_data_test[:,k] = copy(cnfgarr[k].data["eigvals"][1:EIGVALS])
    rr_data_test[:,k] = getindex(getindex(dd, "rr"), TSRC)[2:end]
    ee_data_test[:,k] = getindex(getindex(dd, "ee"), TSRC)[2:end]
    re_data_test[:,k] = getindex(getindex(dd, "re"), TSRC)[2:end]
end

## Defining and rescaling input data

In [None]:
#input_length = EIGVALS + 2
input_length = 2
output_length = 1

input_data_train = Array{Matrix}(undef, TVALS)
means_data_train = Array{Float32}(undef, TVALS)
std_data_train = Array{Float32}(undef, TVALS)
for i in 1:TVALS 
    means_data_train[i] = mean([ee_data_train[i,:]'; rr_data_train[i,:]'])
    std_data_train[i] = std([ee_data_train[i,:]'; rr_data_train[i,:]'])
    input_data_train[i] = ([ee_data_train[i,:]'; rr_data_train[i,:]'] .- means_data_train[i]) ./ std_data_train[i]
    
    #means_data_train[i] = mean([1 ./ eigvals_data_train; ee_data_train[i,:]'; rr_data_train[i,:]'])
    #std_data_train[i] = std([1 ./ eigvals_data_train; ee_data_train[i,:]'; rr_data_train[i,:]'])
    #input_data_train[i] = ([1 ./ eigvals_data_train; ee_data_train[i,:]'; rr_data_train[i,:]'] .- means_data_train[i]) ./ std_data_train[i]
end

target_train = Array{Array}(undef, TVALS)
for i in 1:TVALS
    target_train[i] = re_data_train[i,:]'
end

input_data_test = Array{Matrix}(undef, TVALS)
for i in 1:47
    input_data_test[i] = ([ee_data_test[i,:]'; rr_data_test[i,:]'] .- means_data_train[i]) ./ std_data_train[i]
    #input_data_test[i] = ([1 ./ eigvals_data_test; ee_data_test[i,:]'; rr_data_test[i,:]'] .- means_data_train[i]) ./ std_data_train[i]
end

target_test = Array{Array}(undef, TVALS)
for i in 1:TVALS
    target_test[i] = re_data_test[i,:]'
end
target_test = vcat(target_test...);

## Describing different Neural Networks

We want to compare different Networks with respect to the parameters:

- How many trainable variables has the Network?
- How long does training take?
- How good is the perfomance - measured by the standard deviation of the difference - with respect to
     - How many configurations have been used for the bias correction

In [None]:
activation_function = NNlib.elu

models = [
    [Chain(
        Dense(input_length => 100, activation_function),
        Dense(100 => 100, activation_function),
        Dense(100 => 100, activation_function),
        Dense(100 => output_length, activation_function),
    ) for i in 1:TVALS]
];

In [None]:
using Flux:params

optimizer = Flux.Adam(0.001)
loss_function(x,y) = Flux.mse(x,y)
loss_discription = "MSE"

epochs = 50_000
batch_size = 32

loaders = [Flux.DataLoader((input_data_train[i], target_train[i]), batchsize=batch_size, shuffle=true) for i in 1:TVALS]

for model in models
    parameters = 0
    layers = params(model)
    for layer in layers
        parameters += length(hcat(layer...))
    end

    output_dir = string("benchmarks/each_timeSlice/", parameters)
    mkpath(output_dir)
    rm(output_dir * "/results.txt", force=true)

    optim = [Flux.setup(optimizer, model[i]) for i in 1:TVALS]
    
    function training()
        losses = Matrix{Float64}(undef, TVALS, Int(ceil(500 / batch_size))*epochs)
        for (i, model_time) in enumerate(model)
            j = 1
            for epoch in 1:epochs
                for (x, y) in loaders[i]
                    loss, grads = Flux.withgradient(model_time) do m
                        y_hat = m(x)
                        loss_function(y_hat, y)
                    end
                    Flux.update!(optim[i], model_time, grads[1])
                    losses[i,j] = loss 
                    j += 1
                end
            end
        end
        return losses
    end

    #training_time = @time training()
    losses = training()
    
    minimum_training = minimum(losses)
    maximum_training = maximum(losses)
    average_training = mean(losses)

    out_of_sample_predictions = vcat([model[i](input_data_test[i]) for i in 1:TVALS]...)
    out_of_sample_error = [loss_function(out_of_sample_predictions[:,i], target_test[:,i]) for i in 1:4500]

    c = 3333
    p = scatter(target_test[:,c], label="Actual")
    scatter!(p, out_of_sample_predictions[:,c], label="Prediction", legend=:top)
    xaxis!(p,"t")
    yaxis!(p,"rest-eigen")
    savefig(p,output_dir * "/sample.png")

    minimum_test = minimum(out_of_sample_error)
    maximum_test = maximum(out_of_sample_error)
    average_test = mean(out_of_sample_error)

    open(output_dir * "/results.txt", "a") do file
        println(file, "Time source position: ", TSRC)
        println(file, "Number of used eigenvalues: ", EIGVALS, "\n")

        println(file, "Optimizer: ", optimizer)
        println(file, "Loss function: ", loss_discription)
        println(file, "Epochs: ", epochs)
        println(file, "Batch size: ", batch_size, "\n")
        
        println(file, "Minimum training error (∑⁴⁷", loss_discription, "): ", minimum_training)
        println(file, "Maximum training error (∑⁴⁷", loss_discription, "): ", maximum_training)
        println(file, "Average training error (∑⁴⁷", loss_discription, "): ", average_training, "\n")
        
        println(file, "Minimum test error (∑⁴⁷", loss_discription, "): ",  minimum_test)
        println(file, "Maximum test error (∑⁴⁷", loss_discription, "): ", maximum_test)
        println(file, "Average test error (∑⁴⁷", loss_discription, "): ", average_test, "\n")
    end

    percentages = [0.0, 0.01, 0.02, 0.05, 0.1, 0.12]
    n_configs = Int.(4500 .* percentages)

    header = [
        "Number of configs used for bc",
        "|μ_diff/μ| (max)",
        "|μ_diff/μ| (min)",
        "|μ_diff/μ| (average)",
    ]

    table = Matrix{Float64}(undef, length(percentages), length(header))

    l = @layout [a b c; d e f]
    
    means_target = Matrix{Float64}(undef, TVALS, length(percentages_bc))
    stds_target = Matrix{Float64}(undef, TVALS, length(percentages_bc))
    
    means_pred = Matrix{Float64}(undef, TVALS, length(percentages_bc))
    stds_pred = Matrix{Float64}(undef, TVALS, length(percentages_bc))
    
    means_diff = Matrix{Float64}(undef, TVALS, length(percentages_bc))
    stds_diff = Matrix{Float64}(undef, TVALS, length(percentages_bc))
    
    for (i,n) in enumerate(n_configs_bc)
        Random.seed!(10)
        configs = sort!(sample([i for i in 1:4500], n, replace = false))
        
        uncorr_target_configs = stack(deleteat!([target_test[:,i] for i in 1:4500],configs), dims=2)

        mean_target = mean.([uncorr_target_configs[i,:] for i in 1:TVALS])
        σ_mean_target = std.([uncorr_target_configs[i,:] for i in 1:TVALS])
        
        mean_predicted = mean.([out_of_sample_predictions[i,:] for i in 1:TVALS])

        if n > 0
            ias_correction = mean(hcat([[mean_predicted - target_test[:,i] for i in configs][i] for i in 1:length(configs)]...), dims=2)
        else
            bias_correction = zeros(TVALS)
        end

        mean_predicted = mean_predicted - bias_correction
        σ_mean_predicted = std.([(out_of_sample_predictions .- bias_correction)[i,:] for i in 1:TVALS]) 

        mean_diff = (mean_target .- mean_predicted) ./ mean_target
        σ_diff = sqrt.(sum.(hcat([((mean_target .- out_of_sample_predictions[:,i] - bias_correction) ./ mean_target).^2 for i in 1:4500]...)[k,:] for k in 1:TVALS) ./ (4500 - 1)) 
        
        means_target[:,i] = mean_target
        stds_target[:,i] = σ_mean_target
        
        means_pred[:,i] = mean_predicted
        stds_pred[:,i] = σ_mean_predicted

        means_diff[:,i] = mean_diff
        stds_diff[:,i] = σ_diff

        max_mean_diff = maximum(abs.(mean_diff))
        min_mean_diff = minimum(abs.(mean_diff))
        average_mean_diff = mean(abs.(mean_diff))

        table[i,1] = n
        table[i,2] = max_mean_diff
        table[i,3] = min_mean_diff
        table[i,4] = average_mean_diff
    end

    p = scatter(
        means_diff,
        yerr=stds_diff,
        layout = l,
        size=(1400,1000),
        dpi = 1000,
        legend=:false,
        thickness_scaling = 1.1,
        title=reshape(["bc: $n" for n in n_configs_bc],1,length(n_configs_bc)),
        marker=:+,
        markersize = 2,
        markerstrokewidth = 0.3
    )
    savefig(p,output_dir * "/mean_diff.png")

    p = scatter(
        layout = l,
        size=(1400,1000),
        dpi = 1000,
        thickness_scaling = 1.1,
        title=reshape(["bc: $n" for n in n_configs_bc],1,length(n_configs_bc)))
    
    scatter!(p,
        means_target,
        label="actual",
        legend=:top,
        linecolor=:blue,
        marker=:xcross,
        markersize = 2,
        markerstrokewidth = 0.3
    )
    scatter!(p,
        means_pred,
        label="predicted",
        legend=:top,
        linecolor=:red,
        marker =:+,
        markersize = 2,
        markerstrokewidth = 0.3
    )
    savefig(p,output_dir * "/mean.png")

    p = scatter(
        layout = l,
        size=(1400,1000),
        dpi = 1000,
        thickness_scaling = 1.1,
        title=reshape(["bc: $n" for n in n_configs_bc],1,length(n_configs_bc)))
    
    scatter!(p,
        means_target,
        yerr=stds_target,
        label="actual",
        legend=:top,
        linecolor=:blue,
        marker=:xcross,
        markersize = 2,
        markerstrokewidth = 0.3
    )
    scatter!(p,
        means_pred,
        yerr=stds_pred,
        label="predicted",
        legend=:top,
        linecolor=:red,
        marker =:+,
        markersize = 2,
        markerstrokewidth = 0.3
    )
    savefig(p,output_dir * "/mean_errorbar.png")

    open(output_dir * "/results.txt", "a") do file
        pretty_table(file, table, header=header)
        println(file)
        println(file, "Model: ", model)
    end
end