In [1]:
include("/home/pietaril/Documents/MI-AIM/julia/KoLesky.jl-master/src/KoLesky.jl")

#in LUMI
#include("/project/project_462000039/maija/kl_inv/KoLesky.jl-master/src/KoLesky.jl")


using Plots
using DataFrames
using LinearAlgebra
using NCDatasets
using SparseArrays
using JLD
using HDF5

In [4]:

#*****************#
# Maija's helpers #
#*****************#


function compute_explicit_factor(x::Matrix{T}, K::Matrix{T}, rho::Int64, n_neighbors::Int64) where T <: Real
    #returns ExplicitKLFactorization explicit_factor 
    measurements = KoLesky.point_index_measurements(x)
    G = KoLesky.MatrixCovariance(K)
    implicit_factor = KoLesky.ImplicitKLFactorization(G, measurements, rho, n_neighbors)
    explicit_factor = KoLesky.ExplicitKLFactorization(implicit_factor)
    #return explicit_factor.U, explicit_factor.P
    return explicit_factor
end    


function assemble_inv_covariance(factor)
    #returns approximation of the inverse of K
    U_matrix = Matrix(factor.U)
    inv_P = similar(factor.P)
    inv_P[factor.P] = 1 : length(inv_P)

    return (U_matrix * U_matrix')[inv_P, inv_P]
end 


function make_PD!(K::Matrix{T}, a::T) where T
    # Make input matrix positive definite by adding the identity matrix I multiplied by a small constant. 
    K[:,:] = K + a*I;

    return K
end


function read_cov_from_file(filepath::String)
    ds = Dataset(filepath);
    K = ds["covariance"][:,:];
    #julia thinks K still contains missing values, convert the type
    K = convert(Matrix{Float64}, K)
    lon = ds["lon"][:];
    lat = ds["lat"][:];
    close(ds);
    x = [lat'; lon'];
    x = convert(Matrix{Float64}, x)

    return K, x
end

function KL_invert(x::Matrix{T}, K::Matrix{T}, rho::Int64, n_neighbors::Int64) where T <: Real
    factor = compute_explicit_factor(x, K, rho, n_neighbors)
    iK_approx = assemble_inv_covariance(factor);
    
    return iK_approx
end




function write_factor_to_file(factor, rho::Int64, n_neighbors::Int64, filename::String)
    #For saving U and P and parameters n, rho, n_neighbors. Filetype here
    # is jld because couldn't save sparse matrices in netcdf
    U = factor.U
    P = factor.P
    n = size(U, 1);
    
    file = jldopen("/home/pietaril/Documents/data/outputs/$filename.jld", "w", compress=true);
    write(file, "U", U);
    write(file, "P", P);
    write(file, "n", n);
    write(file, "rho", rho);
    write(file, "n_neighbors", n_neighbors);
    close(file);
    
end

function write_iK_to_file(iK_approx::Matrix{T}, filename::String, rho::Int64, n_neighbors::Int64, time::Float64) where T <: Real
    #For saving the entire inverted cov matrix as a netcdf file
    n = size(iK_approx, 1);
    out = NCDataset("$filename.nc", "c");
    defDim(out, "nstate", n);
    v = defVar(out, "inverse_cov_approx", Float64, ("nstate", "nstate"));
    v[:,:] = iK_approx;
    v.attrib["rho"] = rho;
    v.attrib["n_neighbors"] = n_neighbors;
    v.attrib["n"] = n;
    #v.attrib["time"] = time;
    close(out)
    
end 


function main(filepath::String, rho::Int64, n_neighbors::Int64, a=1e-16)
    # read in covariance matrix K and latlon -coordinates x
    K, x = read_cov_from_file(filepath);
    #ensure K positive definite
    K = make_PD!(K, a);
    n = size(K)[1];
    #factorize
    factor = compute_explicit_factor(x, K, rho, n_neighbors);
    date = basename(filepath)[15:22];
    filename = "KL_Factorization$(date)_n$n";
    write_factor_to_file(factor, rho, n_neighbors, filename);

end


0.0014901161193847656

# Test setup 

* One 10^5 size matrix

* What to time? Options: 
    * compute_explicit_factor
    * KL_invert
    * For comparison: default inv or cholesky? If cholesky, do we want the exact same cholesky factor, i.e.
        C = cholesky(K[P, P]);
        U = inv(C.U);

* Relative error - with regard to 
    * K*iK_approx vs. I 
    * which matrix norm to use?

In [None]:

function vary_params_test(filepath::String, rholist::Vector{Int64}, n_neighbors::Int64, a=1e-16)
    # read in covariance matrix K and latlon -coordinates x
    #make sure everything compiled for timing
    @elapsed read_cov_from_file(filepath); #here we could have a path to a smaller file
    read_timing = @elapsed K, x = read_cov_from_file(filepath);
    #ensure K positive definite
    K = make_PD!(K, a);
    KLfactor_timings = zeros(size(rholist));
    rel_errs_Uapprox = zeros(size(rholist));
    rel_errs_iK = zeros(size(rholist))
    stats = DataFrame();
    
    for (rho_ind, rho) in rholist
        if rho_ind == 1
            # on the first run of @elapsed it compiles everything so discarding that result
            @elapsed compute_explicit_factor(x, K, rho, n_neighbors);
        end
        klfactor_times(rho_ind) = @elapsed factor = compute_explicit_factor(x, K, rho, n_neighbors);
        U_approx = factor.U;
        P = factor.P;
        C = cholesky(K[P, P]);
        U = inv(C.U);
        rel_errs_Uapprox(rho_ind) = norm(U.-U_approx)/norm(U); #Frobenius norm
        iK_approx = assemble_inv_covariance(factor);
        rel_errs_iK(rho_ind) = norm(I.-iK_approx*K)/norm(I);

        if rho_ind ==1
            #make sure functions are compiled
            #here timing of write_factor_to_file and write_iK_to_file but how to ensure that first time around it doesn't actually write to file? maybe check if Florian timed sth like this
        end
        
        
    end
    stats = DataFrame(rel_errs_Uapprox = rel_errs_Uapprox, rel_errs_iK = rel_errs_iK,  
    KLfactor_timings = KLfactor_timings, rhos = rholist, n_neighbors = repeat([n_neighbors], rho_ind), read_timings = repeat([read_timing], rho_ind)
end

5-element Vector{Int64}:
 2
 2
 2
 2
 2