# HDF5 Manager - Do Maintenance on Data Files

In [None]:
using PyPlot, HDF5, Printf, Crayons
PyPlot.plt.style.use("./paper.mplstyle")

In [None]:
Base.show(io::IO, f::Float64) = @printf(io, "%1.4f", f)
println(Crayon(foreground = :blue), "In blue. ", Crayon(bold = true), "Red and bold")

In [None]:
N = 9
pattern = r"random_SK_instance_N_9_seed_(\d+)\.h5"

N = 11
pattern = r"random_SK_instance_N_11_seed_(\d+)\.h5"

N = 13
pattern = r"random_SK_instance_N_13_seed_(\d+)\.h5"

N = 15 
pattern = r"random_SK_instance_N_15_seed_(\d+)\.h5"

# N = 17
# pattern = r"random_SK_instance_N_17_seed_(\d+)\.h5"

# N = 19
# pattern = r"random_SK_instance_N_19_seed_(\d+)\.h5"

In [None]:
# final time for mean-field
T_final = 10000.

# number of points to get Lyapunov exponent for
npts = 256

# tolerance for DifferentialEquations.jl when solving mean-field 
tol = 1e-8;

In [None]:
PATH = "/home/ubuntu/Archives/"

subdir = "small_gaps"
# subdir = "large_gaps"
# subdir = "extra_data"
folder_name = PATH * @sprintf("data/SK_model/N_%i/%s/", N, subdir)

# folder_name = PATH * @sprintf("data/SK_model/N_%i/", N)
instance_names = filter(!isdir, readdir(folder_name; join=true))
filter!(x -> !occursin("results", x), instance_names)
filter!(x -> !occursin("undecided", x), instance_names)
filter!(x -> !occursin("frustrated", x), instance_names)
filter!(x -> !occursin("late", x), instance_names)
filter!(x -> !occursin("main_df", x), instance_names);

In [None]:
length(instance_names)

In [None]:
test_seed = parse(Int, match(pattern, instance_names[1])[1])

In [None]:
h5open(folder_name * @sprintf("random_SK_instance_N_%i_seed_%i.h5", N, test_seed), "r")

### Check degeneracy at final time

In [None]:
seeds_to_plot = []
for (k, instance_name) in enumerate(instance_names)
    # println(test_seed, k)
    test_seed = parse(Int, match(pattern, instance_name)[1])
    λ = h5read(folder_name * @sprintf("random_SK_instance_N_%i_seed_%i.h5", N, test_seed), "exact_ARPACK_LM_eigvals")

    final_gap = λ[2, end] .- λ[1, end]
    if final_gap < 0.01
        push!(seeds_to_plot, test_seed)
    end
end

In [None]:
numplots = 16
length(seeds_to_plot)

In [None]:
start_idx = 1
figure(figsize=(2.5, 2numplots))
# for (k, instance_name) in enumerate(instance_names[start_idx:start_idx-1+numplots])
for (k, test_seed) in enumerate(seeds_to_plot[start_idx:start_idx-1+numplots])
    # println(test_seed, k)
    λ = h5read(folder_name * @sprintf("random_SK_instance_N_%i_seed_%i.h5", N, test_seed), "exact_ARPACK_LM_eigvals")

    final_gap = λ[2, end] .- λ[1, end]
    if final_gap < 0.01
        ax = subplot(numplots, 1, k)
        ax.plot(range(0, 1, 33), λ[2, :] .- λ[1, :], label=string(test_seed) * ", " * @sprintf("%0.3f", final_gap))
        ax.plot(range(0, 1, 33), λ[3, :] .- λ[1, :])
        xlim(0, 1)
        ylim(0, 0.5)
        legend()
    end
end
tight_layout()

### Plot all gaps

In [None]:
seeds_to_plot = [2339, 2354, 2357, 2390, 2434, 2446, 2508]

In [None]:
start_idx = 1
figure(figsize=(2.5, length(seeds_to_plot)))
# for (k, instance_name) in enumerate(instance_names[start_idx:start_idx-1+numplots])
for (k, test_seed) in enumerate(seeds_to_plot)
    # println(test_seed, k)
    λ = h5read(folder_name * @sprintf("random_SK_instance_N_%i_seed_%i.h5", N, test_seed), "exact_ARPACK_LM_eigvals")

    ax = subplot(length(seeds_to_plot), 1, k)
    ax.plot(range(0, 1, 33), λ[2, :] .- λ[1, :], label=string(test_seed))
    ax.plot(range(0, 1, 33), λ[3, :] .- λ[1, :])
    xlim(0, 1)
    ax.set_xticklabels([])
    ylim(0, 0.5)
    legend()
end
tight_layout()

In [None]:
for (k, instance_name) in enumerate(instance_names)
    test_seed = parse(Int, match(pattern, instance_name)[1])
    print(test_seed, ", ", k, "\t")
    λ = h5read(folder_name * @sprintf("random_SK_instance_N_%i_seed_%i.h5", N, test_seed), "exact_ARPACK_LM_eigvals")
    final_gap = λ[2, end] .- λ[1, end]

    figure(figsize=(3, 2))
    plot(range(0, 1, 33), λ[2, :] .- λ[1, :], label=string(test_seed) * ", " * @sprintf("%0.3f", final_gap))
    xlim(0, 1)
    ylim(0, 2)
    legend()
    savefig(PATH * @sprintf("plots/SK_model/N_%i/%s/", N, subdir) * @sprintf("random_SK_instance_N_%i_seed_%i.pdf", N, test_seed), dpi=128, bbox_inches="tight")
    close()
end

In [None]:
# check what's inside
h5open(folder_name * @sprintf("results_random_SK_instance_N_%i_seed_%i.h5", N, test_seed))

## Deleting data (careful!)

In [None]:
# instances
# del_keys = ["exact_ARPACK_LM_lowest_eigvecs"]
del_keys = ["J"]
# del_keys = ["bogoliubov_spectrum", "fluctuations", "lyapunov_exponent_T_final_10000_tol_1e-8_npts_256", "spectra", 
# "spectra_T_final_16000_tau_final_1000", "spectra_T_final_16000_tau_final_2000", 
# "spectra_T_final_32000_tau_final_2000", "spectra_T_final_32000_tau_final_4000"]

for instance_name in filter(x -> !occursin("results", x), instance_names)
    # print(instance_name, "\t")    
    f = h5open(folder_name * instance_name, "r+")
    for del_key in del_keys
        if haskey(f, del_key)
            print(instance_name, "\t")
            printstyled(@sprintf("key %s", del_key) * "\n", color=:red)
            # delete_object(f, del_key)
        end
    end
end


In [None]:
# results

del_keys = ["bogoliubov_spectrum_T_final_32000_tol_1e-8_npts_32"]
# del_keys = ["fluctuations_T_final_32000_tol_1e-8_npts_2048"]

counter = 0
done_instances = []
for instance_name in filter(x -> occursin("results", x), readdir(folder_name))
    try
        # print(instance_name, "\t")
    
        f = h5open(folder_name * instance_name, "r+")
        for del_key in del_keys
            if haskey(f, del_key)
                counter += 1
                push!(done_instances, instance_name)
                # print(instance_name, "\t")
                # printstyled(@sprintf("key %s", del_key) * "\n", color=:red)
                # delete_object(f, del_key)
            # else
            #     println(instance_name, "\t")
            end
        end
    catch
        println(instance_name, "\t")
    end    
end
counter / length(del_keys)

In [None]:
done_instances

In [None]:
# for (k, instance_name) in enumerate(instance_names)
#    println(k)
#    cp(folder_name * instance_name, folder_name * "results_" * instance_name)
# end

In [None]:
missing_seeds = ["13873", "21890", "29855", "29900"]
for seed in 1:30000
    if string(seed) ∉ missing_seeds
        continue
    end
    println(seed)
end

In [None]:
for instance_name in instance_names
    seed = match(pattern, instance_name)[1]
    if seed ∉ missing_seeds
        continue
    end
    printstyled(instance_name, "\n", color=:blue)    
end

## Old data admin (copied from `random_SK_instances.ipynb`)

#### Delete data...

In [None]:
for instance_name in instance_names#[1:110]
    delfile = folder_name * instance_name
    seed = match(pattern, instance_name)[1]    
    if seed == "52411"
        println(seed)
        h5open(delfile, "r+") do f
            key = @sprintf("lyapunov_exponent_T_final_%.0f_tol_1e%.0f_npts_%i", T_final, log10(tol), npts) 
            println(key)       
            if haskey(f, key)
                printstyled(@sprintf("key %s", key) * "\n", color=:red)
                # delete_object(f, key)
            end
        end
    end
end

In [None]:
for instance_name in instance_names[1:110]
    delfile = folder_name * instance_name
    seed = match(pattern, instance_name)[1]    

    λ = h5read(folder_name * instance_name, "exact_ARPACK_LM_eigvals")

    gap = λ[2, :] .- λ[1, :];
    exact_times = range(0, 1, 33)
    gaploc = exact_times[findfirst(x -> x == minimum(gap), gap)] 
    T_diags = T_final .* [gaploc - 0.05, gaploc]  

    h5open(delfile, "r+") do f
        key = "bogoliubov_spectrum"
        key = "fluctuations"
        # key = @sprintf("spectra/T_%0.3f/omegas", T_diags[1] / T_final)
        # key = @sprintf("spectra/T_%0.3f/data", T_diags[1] / T_final)
        # key = @sprintf("spectra/T_%0.3f/omegas", T_diags[2] / T_final)
        # key = @sprintf("spectra/T_%0.3f/data", T_diags[2] / T_final)        
        if haskey(f, key)
            printstyled(@sprintf("key %s", key) * "\n", color=:red)
            # delete_object(f, key)
        end
        
    end
end

In [None]:
N = 11
folder_name = PATH * @sprintf("data/SK_model/N_%i/", N)
instance_names = readdir(folder_name)

In [None]:
minigaps = Dict()
for instance_name in instance_names
    try
        pattern = r"random_SK_instance_N_17_seed_(\d+)\.h5"
        seed = match(pattern, instance_name)[1]    

        # couplings = h5read(folder_name * instance_name, "J", J_mat)
        λ = h5read(folder_name * instance_name, "exact_ARPACK_LM_eigvals")
        minigap = minimum(λ[2, :] .- λ[1, :])
        minigaps[seed] = minigap
    catch
        print(instance_name, ", ")
    end
end


In [None]:
sorted_minigaps = sort(collect(minigaps), by=x->x[2])
length(sorted_minigaps)

In [None]:
first_large_gap_idx = findfirst(x -> x > 0.1, [x for (key, x) in sorted_minigaps])

In [None]:
small_minigaps = sorted_minigaps[1:first_large_gap_idx-1]
large_minigaps = sorted_minigaps[first_large_gap_idx:end];

In [None]:
large_minigaps

#### Copy data

In [None]:
# N = 9
missing_seeds = []

for seed in missing_seeds
    seed = parse(Int, seed)
    # cp(PATH * @sprintf("data/SK_model/N_%i/%s/", N, "large_gaps") * @sprintf("results_random_SK_instance_N_%i_seed_%i.h5", N, seed), 
    #    PATH * @sprintf("data/SK_model/N_%i/%s/", N, "extra_data") * @sprintf("large/results_random_SK_instance_N_%i_seed_%i.h5", N, seed), 
    #    force=true)
    # mv(PATH * @sprintf("data/SK_model/N_%i/%s/", N, "large_gaps") * @sprintf("results_random_SK_instance_N_%i_seed_%i.h5", N, seed), 
    #    PATH * @sprintf("data/SK_model/N_%i/%s/", N, "extra_data") * @sprintf("large/results_random_SK_instance_N_%i_seed_%i.h5", N, seed),
    #    force=true)
    mv(PATH * @sprintf("data/SK_model/N_%i/%s/", N, "large_gaps") * @sprintf("random_SK_instance_N_%i_seed_%i.h5", N, seed), 
       PATH * @sprintf("data/SK_model/N_%i/%s/", N, "extra_data") * @sprintf("large/random_SK_instance_N_%i_seed_%i.h5", N, seed), 
       force=true)
end

## Move data

In [None]:
# mv_list = [k for (k, v) in small_minigaps]
# mv_list = [k for (k, v) in large_minigaps]
mv_list = seeds_to_plot

type_str = "data"
file_suffix = "h5"

for seed in mv_list
    try
        # move to small
        # mv(PATH * @sprintf("%s/SK_model/N_%i/", type_str, N) * @sprintf("random_SK_instance_N_%i_seed_%s.%s", N, seed, file_suffix), PATH * @sprintf("%s/SK_model/N_%i/", type_str, N) * "small_gaps/" * @sprintf("random_SK_instance_N_%i_seed_%s.%s", N, seed, file_suffix))
       
        # move to large
        # mv(PATH * @sprintf("%s/SK_model/N_%i/", type_str, N) * @sprintf("random_SK_instance_N_%i_seed_%s.%s", N, seed, file_suffix), PATH * @sprintf("%s/SK_model/N_%i/", type_str, N) * "large_gaps/" * @sprintf("random_SK_instance_N_%i_seed_%s.%s", N, seed, file_suffix))
        
        mv(PATH * @sprintf("%s/SK_model/N_%i/", type_str, N) * "small_gaps/" * @sprintf("random_SK_instance_N_%i_seed_%s.%s", N, seed, file_suffix), 
           PATH * @sprintf("%s/SK_model/N_%i/", type_str, N) * "degenerate/" * @sprintf("random_SK_instance_N_%i_seed_%s.%s", N, seed, file_suffix))

        mv(PATH * @sprintf("%s/SK_model/N_%i/", type_str, N) * "small_gaps/" * @sprintf("results_random_SK_instance_N_%i_seed_%s.%s", N, seed, file_suffix), 
           PATH * @sprintf("%s/SK_model/N_%i/", type_str, N) * "degenerate/" * @sprintf("results_random_SK_instance_N_%i_seed_%s.%s", N, seed, file_suffix))           
    catch
        print(seed, ", ")
    end
end

#### Move plots

In [None]:
# # mv_list = [k for (k, v) in small_minigaps]
# mv_list = [k for (k, v) in large_minigaps]

# type_str = "plots"
# file_suffix = "pdf"

# for seed in mv_list
#     try
#         # mv(PATH * @sprintf("%s/SK_model/N_%i/", type_str, N) * @sprintf("random_SK_instance_N_%i_seed_%s.%s", N, seed, file_suffix), PATH * @sprintf("%s/SK_model/N_%i/", type_str, N) * "small_gaps/" * @sprintf("random_SK_instance_N_%i_seed_%s.%s", N, seed, file_suffix))
#         mv(PATH * @sprintf("%s/SK_model/N_%i/", type_str, N) * @sprintf("random_SK_instance_N_%i_seed_%s.%s", N, seed, file_suffix), PATH * @sprintf("%s/SK_model/N_%i/", type_str, N) * "large_gaps/" * @sprintf("random_SK_instance_N_%i_seed_%s.%s", N, seed, file_suffix))
#     catch
#         print(seed, ", ")
#     end
# end