# HDF5 Manager - Do Maintenance on Data Files

In [None]:
using PyPlot, HDF5, Printf
PyPlot.plt.style.use("./paper.mplstyle")

In [None]:
N = 9
pattern = r"random_SK_instance_N_9_seed_(\d+)\.h5"

N = 11
pattern = r"random_SK_instance_N_11_seed_(\d+)\.h5"

N = 13
pattern = r"random_SK_instance_N_13_seed_(\d+)\.h5"

N = 15 
pattern = r"random_SK_instance_N_15_seed_(\d+)\.h5"

N = 17
pattern = r"random_SK_instance_N_17_seed_(\d+)\.h5"

# N = 19
# pattern = r"random_SK_instance_N_19_seed_(\d+)\.h5"

In [None]:
# final time for mean-field
T_final = 10000.

# number of points to get Lyapunov exponent for
npts = 256

# tolerance for DifferentialEquations.jl when solving mean-field 
tol = 1e-8;

In [None]:
PATH = "/home/ubuntu/Archives/"

subdir = "small_gaps"
subdir = "large_gaps"
# subdir = "extra_data"
folder_name = PATH * @sprintf("data/SK_model/N_%i/%s/", N, subdir)
instance_names = readdir(folder_name);

In [None]:
length(instance_names)

In [None]:
# test_seed = parse(Int, match(pattern, instance_names[4101])[1])
test_seed = parse(Int, match(pattern, instance_names[103])[1])

In [None]:
# check what's inside
h5open(folder_name * @sprintf("random_SK_instance_N_%i_seed_%i.h5", N, test_seed), "r") do f
        println.(keys(f))
    end;

In [None]:
# check what's inside
h5open(folder_name * @sprintf("results_random_SK_instance_N_%i_seed_%i.h5", N, test_seed), "r") do f
    println.(keys(f))
end;

In [None]:
instance_seeds = []
results_seeds = []

for instance_name in instance_names
    seed = match(pattern, instance_name)[1]
    if occursin("results", instance_name)
        push!(results_seeds, seed)        
        continue
    end
    push!(instance_seeds, seed)        
end

instance_seeds = parse.(Int, instance_seeds)
results_seeds = parse.(Int, results_seeds);

In [None]:
instance_seeds |> length |> println
results_seeds |> length |> println

In [None]:
for seed in results_seeds
    deleteat!(instance_seeds, findfirst(x -> x == seed, instance_seeds))
end

In [None]:
instance_seeds |> length

### Deleting data (careful!)

In [None]:
# instances
del_keys = ["bogoliubov_spectrum", "fluctuations", "lyapunov_exponent_T_final_10000_tol_1e-8_npts_256", "spectra", 
"spectra_T_final_16000_tau_final_1000", "spectra_T_final_16000_tau_final_2000", 
"spectra_T_final_32000_tau_final_2000", "spectra_T_final_32000_tau_final_4000"]

for instance_name in instance_names
    print(instance_name, "\t")
    if occursin("results", instance_name)
        continue
    end
    
    f = h5open(folder_name * instance_name, "r+")
    for del_key in del_keys
        if haskey(f, del_key)
            printstyled(@sprintf("key %s", del_key) * "\n", color=:red)
            # delete_object(f, del_key)
        end
    end
end


In [None]:
# results
# del_keys = ["bogoliubov_spectrum", "fluctuations"]
del_keys = ["J", "exact_ARPACK_LM_eigvals", "exact_ARPACK_LM_final_eigvecs", "spectra", "spectra_T_final_16000_tau_final_1000", "spectra_T_final_16000_tau_final_2000", "spectra_T_final_32000_tau_final_4000"]

for instance_name in instance_names
    if occursin("results", instance_name)
        print(instance_name, "\t")
    
        f = h5open(folder_name * instance_name, "r+")
        for del_key in del_keys
            if haskey(f, del_key)
                printstyled(@sprintf("key %s", del_key) * "\n", color=:red)
                # delete_object(f, del_key)
            end
        end
    end
end


In [None]:
# for (k, instance_name) in enumerate(instance_names)
#    println(k)
#    cp(folder_name * instance_name, folder_name * "results_" * instance_name)
# end

## Data admin (copied from `random_SK_instances.ipynb`)

#### Delete data...

In [None]:
for instance_name in instance_names#[1:110]
    delfile = folder_name * instance_name
    seed = match(pattern, instance_name)[1]    
    if seed == "52411"
        println(seed)
        h5open(delfile, "r+") do f
            key = @sprintf("lyapunov_exponent_T_final_%.0f_tol_1e%.0f_npts_%i", T_final, log10(tol), npts) 
            println(key)       
            if haskey(f, key)
                printstyled(@sprintf("key %s", key) * "\n", color=:red)
                # delete_object(f, key)
            end
        end
    end
end

In [None]:
for instance_name in instance_names[1:110]
    delfile = folder_name * instance_name
    seed = match(pattern, instance_name)[1]    

    λ = h5read(folder_name * instance_name, "exact_ARPACK_LM_eigvals")

    gap = λ[2, :] .- λ[1, :];
    exact_times = range(0, 1, 33)
    gaploc = exact_times[findfirst(x -> x == minimum(gap), gap)] 
    T_diags = T_final .* [gaploc - 0.05, gaploc]  

    h5open(delfile, "r+") do f
        key = "bogoliubov_spectrum"
        key = "fluctuations"
        # key = @sprintf("spectra/T_%0.3f/omegas", T_diags[1] / T_final)
        # key = @sprintf("spectra/T_%0.3f/data", T_diags[1] / T_final)
        # key = @sprintf("spectra/T_%0.3f/omegas", T_diags[2] / T_final)
        # key = @sprintf("spectra/T_%0.3f/data", T_diags[2] / T_final)        
        if haskey(f, key)
            printstyled(@sprintf("key %s", key) * "\n", color=:red)
            # delete_object(f, key)
        end
        
    end
end

In [None]:
N = 11
folder_name = PATH * @sprintf("data/SK_model/N_%i/", N)
instance_names = readdir(folder_name)

In [None]:
minigaps = Dict()
for instance_name in instance_names
    try
        pattern = r"random_SK_instance_N_17_seed_(\d+)\.h5"
        seed = match(pattern, instance_name)[1]    

        # couplings = h5read(folder_name * instance_name, "J", J_mat)
        λ = h5read(folder_name * instance_name, "exact_ARPACK_LM_eigvals")
        minigap = minimum(λ[2, :] .- λ[1, :])
        minigaps[seed] = minigap
    catch
        print(instance_name, ", ")
    end
end


In [None]:
sorted_minigaps = sort(collect(minigaps), by=x->x[2])
length(sorted_minigaps)

In [None]:
first_large_gap_idx = findfirst(x -> x > 0.1, [x for (key, x) in sorted_minigaps])

In [None]:
small_minigaps = sorted_minigaps[1:first_large_gap_idx-1]
large_minigaps = sorted_minigaps[first_large_gap_idx:end];

In [None]:
large_minigaps

#### Move data

In [None]:
# mv_list = [k for (k, v) in small_minigaps]
# mv_list = [k for (k, v) in large_minigaps]
mv_list = instance_seeds

type_str = "data"
file_suffix = "h5"

for seed in mv_list
    try
        # move to small
        # mv(PATH * @sprintf("%s/SK_model/N_%i/", type_str, N) * @sprintf("random_SK_instance_N_%i_seed_%s.%s", N, seed, file_suffix), PATH * @sprintf("%s/SK_model/N_%i/", type_str, N) * "small_gaps/" * @sprintf("random_SK_instance_N_%i_seed_%s.%s", N, seed, file_suffix))
       
        # move to large
        # mv(PATH * @sprintf("%s/SK_model/N_%i/", type_str, N) * @sprintf("random_SK_instance_N_%i_seed_%s.%s", N, seed, file_suffix), PATH * @sprintf("%s/SK_model/N_%i/", type_str, N) * "large_gaps/" * @sprintf("random_SK_instance_N_%i_seed_%s.%s", N, seed, file_suffix))
        
        # move from large to extra
        mv(PATH * @sprintf("%s/SK_model/N_%i/", type_str, N) * "large_gaps/" * @sprintf("random_SK_instance_N_%i_seed_%s.%s", N, seed, file_suffix), PATH * @sprintf("%s/SK_model/N_%i/", type_str, N) * "extra_data/" * @sprintf("random_SK_instance_N_%i_seed_%s.%s", N, seed, file_suffix))
    catch
        print(seed, ", ")
    end
end

#### Move plots

In [None]:
# # mv_list = [k for (k, v) in small_minigaps]
# mv_list = [k for (k, v) in large_minigaps]

# type_str = "plots"
# file_suffix = "pdf"

# for seed in mv_list
#     try
#         # mv(PATH * @sprintf("%s/SK_model/N_%i/", type_str, N) * @sprintf("random_SK_instance_N_%i_seed_%s.%s", N, seed, file_suffix), PATH * @sprintf("%s/SK_model/N_%i/", type_str, N) * "small_gaps/" * @sprintf("random_SK_instance_N_%i_seed_%s.%s", N, seed, file_suffix))
#         mv(PATH * @sprintf("%s/SK_model/N_%i/", type_str, N) * @sprintf("random_SK_instance_N_%i_seed_%s.%s", N, seed, file_suffix), PATH * @sprintf("%s/SK_model/N_%i/", type_str, N) * "large_gaps/" * @sprintf("random_SK_instance_N_%i_seed_%s.%s", N, seed, file_suffix))
#     catch
#         print(seed, ", ")
#     end
# end

#### Late gaps...

In [None]:
# N = 15
# all_late_gap_seeds = []
# for screen in 1:5000:45000
#     late_gap_seeds = h5read(PATH * @sprintf("data/SK_model/N_%i/", N) * @sprintf("random_SK_instances_N_%i_screen_%i_late_minigaps.h5", N, screen), "late_gap_seeds")
#     push!(all_late_gap_seeds, late_gap_seeds)
# end
# all_late_gap_seeds = reduce(vcat, all_late_gap_seeds)
# # h5write(PATH * @sprintf("data/SK_model/N_%i/", N) *@sprintf("random_SK_instances_N_%i_late_minigaps.h5", N), "late_gap_seeds", all_late_gap_seeds);

In [None]:
N = 17
h5read(PATH * @sprintf("data/SK_model/N_%i/", N) *@sprintf("random_SK_instances_N_%i_late_minigaps.h5", N), "late_gap_seeds") # |> length

In [None]:
# all_late_gap_seeds_N_17 = [15001, 17004, 3005, 9006, 4008, 16008, 11009, 15010, 10012, 9014, 15020, 22, 2028, 12027, 14029, 17035, 1034, 9037, 2041, 37, 14039, 8039, 10041, 3040, 8040, 15046, 10045, 15049, 11046, 12047, 13048, 18053, 7051, 20052, 8057, 4059, 9060, 14060, 16064, 18064, 13061, 13062, 18066, 66, 5065, 14065, 6066, 74, 17077, 14076, 19083, 15084, 8082, 17085, 12084, 8087, 15096, 20087, 14091, 12091, 3091, 13093, 17101, 8103, 19110, 2117, 2121, 10109, 16117, 1119, 5119, 5121, 4119, 6117, 122, 17120, 18125, 4123, 16138, 12131, 3129, 19140, 8135, 19141, 6136, 140, 5142, 14138, 16148, 145, 4144, 9154, 2157, 5154, 5158, 10153, 2171, 8165, 12163, 4169, 12166, 19181, 5178, 12175, 4178, 16187, 10173, 20174, 11188, 14181, 10183, 17185, 3186, 17188, 16205, 19207, 9210, 5205, 209, 18207, 3199, 8210, 15232, 13213, 4224, 17221, 11233, 233, 11235, 19240, 18232, 15246]
# h5write(PATH * @sprintf("data/SK_model/N_%i/", 17) *@sprintf("random_SK_instances_N_%i_late_minigaps.h5", 17), "late_gap_seeds", sort(all_late_gap_seeds_N_17));

In [None]:
minigap_locs = []
for (k, v) in eigvals_small_gap
    minigap = minimum(v[2, :] .- v[1, :])
    push!(minigap_locs, findfirst(x -> x == minigap, v[2, :] .- v[1, :]))
end

In [None]:
findall(x -> x != 30, minigap_locs)