# Comparing different Neural Networks

In this notebook we use 47 Networks to predict the individual rest-eigen contributions on each time slice

In [1]:
using LmaPredict, Flux, Statistics

## Reading the data

In [2]:
const path_config = "/Users/lukasgeyer/Studium/Computational Sciences/Masterarbeit/Daten Simon/dat"
const path_plot = "../plots"

"../plots"

In [3]:
fname = readdir(path_config)[2:5001]
idx = sortperm( parse.(Int64, fname))
fname = fname[idx]
em_n = "VV"

cnfgarr = Vector{LMAConfig}(undef, 0)
for f in fname
    push!(cnfgarr, get_LMAConfig(joinpath(path_config, f), "g5-g5", em=em_n, bc=false))
end

## Splitting data in training and test sets

In [9]:
NCNFG = length(cnfgarr)
train_size = 500
test_size = NCNFG - train_size

TSRC = "24"
TVALS = length(cnfgarr[1].data["rr"][TSRC]) - 1
if em_n == "PA"
    EIGVALS = 32
else 
    EIGVALS = 64
end

eigvals_data_train = Array{Float64}(undef, EIGVALS, train_size)
rr_data_train = Array{Float64}(undef, TVALS, train_size)
ee_data_train = Array{Float64}(undef, TVALS, train_size)
re_data_train = Array{Float64}(undef, TVALS, train_size)

eigvals_data_test = Array{Float64}(undef, EIGVALS, test_size)
rr_data_test = Array{Float64}(undef, TVALS, test_size)
ee_data_test = Array{Float64}(undef, TVALS, test_size)
re_data_test = Array{Float64}(undef, TVALS, test_size)

for (k, dd) in enumerate(getfield.(cnfgarr, :data)[1:train_size])
    eigvals_data_train[:,k] = copy(cnfgarr[k].data["eigvals"][1:EIGVALS])
    rr_data_train[:,k] = getindex(getindex(dd, "rr"), TSRC)[2:end]
    re_data_train[:,k] = getindex(getindex(dd, "re"), TSRC)[2:end]
    
    ee_all_TSRC = Matrix{Float64}(undef, TVALS, TVALS)
    for ee_TSRC in 0:TVALS-1
        ee_all_TSRC[:,ee_TSRC+1] = getindex(getindex(dd, "ee"), "$ee_TSRC")[2:end]
    end
    
    ee_data_train[:,k] = mean(ee_all_TSRC, dims=2)
end
for (k, dd) in enumerate(getfield.(cnfgarr, :data)[train_size+1:NCNFG])
    eigvals_data_test[:,k] = copy(cnfgarr[k].data["eigvals"][1:EIGVALS])
    rr_data_test[:,k] = getindex(getindex(dd, "rr"), TSRC)[2:end]
    re_data_test[:,k] = getindex(getindex(dd, "re"), TSRC)[2:end]

    ee_all_TSRC = Matrix{Float64}(undef, TVALS, TVALS)
    for ee_TSRC in 0:TVALS-1
        ee_all_TSRC[:,ee_TSRC+1] = getindex(getindex(dd, "ee"), "$ee_TSRC")[2:end]
    end
    
    ee_data_test[:,k] = mean(ee_all_TSRC, dims=2)
end

## Defining training and test data

In [10]:
input_length = 2
output_length = 1

input_shape_train = Array{Matrix{Float64}}(undef, TVALS)
output_shape_train = Array{Matrix{Float64}}(undef, TVALS)
input_shape_test = Array{Matrix{Float64}}(undef, TVALS)

for i in 1:TVALS
    input_shape_train[i] = permutedims(hcat(ee_data_train[i,:], rr_data_train[i,:]))
    output_shape_train[i] = permutedims(reshape(re_data_train[i,:], :, 1))
    input_shape_test[i] = permutedims(hcat(ee_data_test[i,:], rr_data_test[i,:]))
end

output_shape_test = re_data_test;

### Input data, normalized and standardized

In [11]:
input_data_train_normalized = Array{Matrix{Float64}}(undef, TVALS)
input_data_train_standardized = Array{Matrix{Float64}}(undef, TVALS)
input_data_test_normalized = Array{Matrix{Float64}}(undef, TVALS)
input_data_test_standardized = Array{Matrix{Float64}}(undef, TVALS)

for i in 1:TVALS
    max_input_train = maximum(input_shape_train[i])
    min_input_train = minimum(input_shape_train[i])

    mean_input_train = mean(input_shape_train[i], dims=ndims(input_shape_train[i]))
    std_input_train = std(input_shape_train[i], dims=ndims(input_shape_train[i]))

    input_data_train_normalized[i] = (input_shape_train[i] .- max_input_train) ./ (max_input_train - min_input_train) 
    input_data_train_standardized[i] = (input_shape_train[i] .- mean_input_train) ./ std_input_train

    input_data_test_normalized[i] = (input_shape_test[i] .- max_input_train) ./ (max_input_train - min_input_train)
    input_data_test_standardized[i] = (input_shape_test[i] .- mean_input_train) ./ std_input_train
end

### Output data, normalized and standardized

In [12]:
output_data_train_normalized = Array{Matrix{Float64}}(undef, TVALS)
output_data_train_standardized = Array{Matrix{Float64}}(undef, TVALS)

for i in 1:TVALS
    max_output_train = maximum(output_shape_train[i])
    min_output_train = minimum(output_shape_train[i])

    mean_output_train = mean(output_shape_train[i])
    std_output_train = std(output_shape_train[i])

    output_data_train_normalized[i] = (output_shape_train[i] .- max_output_train) ./ (max_output_train - min_output_train)
    output_data_train_standardized[i] = (output_shape_train[i] .- mean_output_train) ./ std_output_train
end

## Describing different Neural Networks

In [17]:
activation_function = NNlib.leakyrelu

model = Chain(
        Dense(input_length => 8, activation_function),
        Dense(8 => 8, activation_function),
        Dense(8 => 1, identity)
    )

models = [model for i in 1:TVALS] |> f64;

In [14]:
function loss(flux_model, x, y)
    ŷ = flux_model(x)
    Flux.mse(ŷ, y, agg=mean)
end

loss (generic function with 1 method)

In [18]:
using Flux:params

mean_train = repeat(mean(re_data_train, dims=2), 1, test_size)

optimizer = Flux.Adam(0.001)
loss_function = Flux.mse
loss_discription = "MSE"

epochs = 100
batch_size = 10

percentages_bc = [0.0, 0.01, 0.02, 0.05, 0.1, 0.12]
n_configs_bc = Int.(test_size .* percentages_bc)

loaders = [Flux.DataLoader((input_data_train_standardized[i], output_data_train_standardized[i]), batchsize=batch_size, shuffle=true) for i in 1:TVALS]

parameters = 0
layers = params(model)
for layer in layers
    parameters += length(hcat(layer...))
end
outputDirectory = "/Users/lukasgeyer/Studium/Computational Sciences/Masterarbeit/Tool Allesandro/repo/LmaPredict/results/benchmarks/eachTimeSlice/$parameters"

model_losses = []
for (i, model) in enumerate(models)

    optim = Flux.setup(optimizer, model)
    
    function training()
        losses = []
        for epoch in 1:epochs
            for (x, y) in loaders[i]
                grads = gradient(m -> loss(m, x, y), model)
                Flux.update!(optim, model, grads[1])
                push!(losses, loss(model, x, y))
            end
        end
        return losses
    end

    #training_time = @time training()
    losses = training()
    push!(model_losses, losses)
    
end

out_of_sample_predictions = Matrix{Float64}(undef, TVALS, test_size)

for i in 1:TVALS
    mean_output_train = mean(output_shape_train[i])
    std_output_train = std(output_shape_train[i])

    out_of_sample_predictions[i,:] = (models[i](input_data_test_standardized[i]) .* std_output_train) .+ mean_output_train
end


analyse_predictions(
        out_of_sample_predictions,
        output_shape_test,
        TSRC,
        EIGVALS,
        model,
        optimizer,
        loss_function,
        loss_discription,
        epochs,
        batch_size,
        model_losses[1],
        outputDirectory
    )

R = 1 - (Flux.mse(out_of_sample_predictions, output_shape_test, agg=sum) / Flux.mse(mean_train, output_shape_test, agg=sum))

-1.747455019352072