# Testing DIVAnd_cv in Julia

In [1]:
using DIVAnd
using CSV
using DataFrames
using DelimitedFiles
using Statistics
using NCDatasets

In [2]:
var_name = "Oxy"
year = 2010
szn = "OND"
subsamp_interval = 1

obs_dir = string("C:\\Users\\HourstonH\\Documents\\NEP_climatology\\data\\", 
    "value_vs_depth\\14_sep_by_sl_and_year\\")

# GEBCO 6 minute mask
mask_dir = string("C:\\Users\\HourstonH\\Documents\\NEP_climatology\\data\\",
    "value_vs_depth\\16_diva_analysis\\masks\\")

output_dir = string("C:\\Users\\HourstonH\\Documents\\NEP_climatology\\", 
    "diva_explore\\correlation_length\\cross_validation\\from_julia\\")

"C:\\Users\\HourstonH\\Documents\\NEP_climatology\\diva_explore\\correlation_length\\cross_validation\\from_julia\\"

In [3]:
# Assign some parameters

# domain_size_x_deg = -115 - (-160)  # Degrees
# domain_size_y_deg = 60 - 30
# domain_size_x_m = DIVAnd.deg2m(domain_size_x_deg)
# domain_size_y_m = DIVAnd.deg2m(domain_size_y_deg)

# println(domain_size_x_m/10)
# println(domain_size_y_m/10)

# lenx = 500e3 # domain_size_x_m/10  # 500e3
# leny = 500e3 # domain_size_y_m/10  # 500e3

signal_to_noise_ratio = 50.  # Default from Lu ODV session
epsilon2 = 1/signal_to_noise_ratio  # 1.

# Choose number of testing points around the current value of L (corlen)
nl = 1

# Choose number of testing points around the current value of epsilon2
ne = 1

# Choose cross-validation method
# 1: full CV; 2: sampled CV; 3: GCV; 0: automatic choice between the three
method = 3

3

In [None]:
# Initialize arrays to hold new lenx and epsilon2
lenx_01_domain = []
leny_01_domain = []
lenx_cv = []
leny_cv = []
epsilon2_arr = []

# Iterate through standard depths
for standard_depth=[0:5:100;]
    
    # Read in standard level data file
    obs_filename = string(obs_dir, var_name, "_", standard_depth, "m_", year, 
        "_", szn, ".csv")
    
    println(obs_filename)
    
    # Pipe operator to dataframe
    obs_df = CSV.File(obs_filename) |> DataFrame
    
    if size(obs_df)[1] == 0
        println("DataFrame empty -- skipping")
        continue
    end
    
    xobs = obs_df[!, :Longitude]
    yobs = obs_df[!, :Latitude]
    vobs = obs_df[!, :SL_value]
    
    # Calculate domain size based on the observations
    # Set first guesses for correlation length as 1/10 domain size
    domain_size_x_deg = maximum(xobs) - minimum(xobs)
    domain_size_y_deg = maximum(yobs) - minimum(xobs)
    lenx_guess = domain_size_x_deg/10
    leny_guess = domain_size_y_deg/10
    
    # Append first guesses to arrays
    push!(lenx_01_domain, lenx_guess)
    push!(leny_01_domain, leny_guess)
    
    println("lenx: ", lenx_guess, "; leny: ", leny_guess)
    
    # Read in mask
    mask_filename = string(mask_dir, var_name, "_", standard_depth, "m_", 
        year, "_", szn, "_mask_6min.nc")
    
    mask_ds = Dataset(mask_filename)
    
    # Equivalent to numpy.meshgrid()
    Lon2d, Lat2d = ndgrid(mask_ds["lon"][1:subsamp_interval:end], 
        mask_ds["lat"][1:subsamp_interval:end])
    
    println("Made Lon2d, Lat2d")
    
    mask = mask_ds["mask"][1:subsamp_interval:end, 1:subsamp_interval:end]
    mask = Bool.(mask)
#     println(typeof(mask))
        
    # Assign parameters
    pm, pn = DIVAnd_metric(Lon2d, Lat2d)
    
    println("Computed pm, pn")
        
    # Compute anomaly field
    vmean = mean(vobs)
    vanom = vobs .- vmean
    
#     println(size(vanom))
    
    # Run the cross-validation
    # Need to take transpose?? transpose(A) = A'
    bestfactorl, bestfactore, cvval, cvvalues, x2Ddata, y2Ddata, cvinter, xi2D, yi2D = DIVAnd_cv(
        mask, (pm, pn), (Lon2d, Lat2d), (xobs, yobs), vanom, (lenx_guess, leny_guess), 
        epsilon2, nl, ne, method)
    
    new_lenx = bestfactorl .* lenx_guess
    new_leny = bestfactorl .* leny_guess
    new_epsilon2 = bestfactore .* epsilon2
    println("new lenx: ", new_lenx, "; new leny: ", new_leny)
    println(new_epsilon2)
    
    # Append new values to the arrays
    push!(lenx_cv, new_lenx)
    push!(leny_cv, new_leny)
    push!(epsilon2_arr, new_epsilon2)
    
    # Close mask dataset
    close(mask_ds)
end

C:\Users\HourstonH\Documents\NEP_climatology\data\value_vs_depth\14_sep_by_sl_and_year\Oxy_0m_2010_OND.csv
lenx: 1.6538494; leny: 19.0285664
Made Lon2d, Lat2d
Computed pm, pn
new lenx: 0.8020662566955302; new leny: 9.228271342439246
0.0308746060066084
C:\Users\HourstonH\Documents\NEP_climatology\data\value_vs_depth\14_sep_by_sl_and_year\Oxy_5m_2010_OND.csv
lenx: 1.6538494; leny: 19.0285664
Made Lon2d, Lat2d
Computed pm, pn
new lenx: 0.6004764230346993; new leny: 6.90885487357571
0.0004477442277136678
C:\Users\HourstonH\Documents\NEP_climatology\data\value_vs_depth\14_sep_by_sl_and_year\Oxy_10m_2010_OND.csv
lenx: 1.6538494; leny: 19.030883
Made Lon2d, Lat2d

In [9]:
# Make dataframe
df_out = DataFrame(depth=[0:5:100;], lenx_01_domain=lenx_01_domain, 
    leny_01_domain=leny_01_domain, lenx_cv = lenx_cv, leny_cv = leny_cv, 
    epsilon2 = epsilon2_arr)

# print(df_out)

Unnamed: 0_level_0,depth,lenx_01_domain,leny_01_domain,lenx_cv,leny_cv,epsilon2
Unnamed: 0_level_1,Int64,Any,Any,Any,Any,Any
1,0,1.65385,19.0286,20.8207,239.555,0.0531271
2,5,1.65385,19.0286,20.8207,239.555,0.0344147
3,10,1.65385,19.0309,20.8207,239.585,0.0476621
4,15,1.65118,19.0309,20.7872,239.585,0.0592187
5,20,1.65118,19.0309,20.7872,239.585,0.0592187
6,25,1.65118,19.0309,20.7872,239.585,0.0592187
7,30,1.65118,19.0309,20.7872,239.585,0.0592187
8,35,1.65118,19.0309,20.7872,239.585,0.0592187
9,40,1.65118,19.0309,0.800773,9.22939,0.000447744
10,45,1.65118,19.0309,0.744871,8.58509,0.000447744


In [11]:
# Print summary stats
mean_lenx = mean(df_out[!, "lenx_cv"])
mean_leny = mean(df_out[!, "leny_cv"])
mean_epsilon2 = mean(df_out[!, "epsilon2"])

println(mean(df_out[!, "lenx_01_domain"]), " ", mean(df_out[!, "leny_01_domain"]))
println(mean_lenx, " ", mean_leny, " ", mean_epsilon2)
println(median(df_out[!, "lenx_cv"]), "; ", median(df_out[!, "leny_cv"]), "; ", 
    median(df_out[!, "epsilon2"]))

1.6485082476190478 19.030662371428576
12.18514070949573 140.64340617680034 0.031169203073309153
20.67175668972613; 239.55545790972658; 0.04766206480316496


In [12]:
# Export the dataframe as a csv file
df_filename = string(output_dir, var_name, "_", year, "_", szn, 
    "_cv_results_top100m.csv")

CSV.write(df_filename, df_out)

"C:\\Users\\HourstonH\\Documents\\NEP_climatology\\diva_explore\\correlation_length\\cross_validation\\from_julia\\Oxy_2010_OND_cv_results_top100m.csv"

# Random code

In [None]:
xi, yi = ndgrid(range(0, stop=1, length=20), range(0, stop=1, length=15))

In [None]:
xi[1:5:length(xi)]  # Take transpose??

In [None]:
arr = [1:2:10;]
maximum(arr)

In [None]:
arr[1:3:end]

In [None]:
length(([0:1:10], [0:2:20]))

In [None]:
using LinearAlgebra
arr = [1 2 3; 4 5 6; 7 8 9]

In [None]:
# Read in mask
standard_depth = 0

mask_filename = string(mask_dir, var_name, "_", standard_depth, "m_", 
    year, "_", szn, "_mask_6min.nc")
    
mask_ds = Dataset(mask_filename)

# convert to true/false
mask_tf = mask_ds["mask"][:]

# mask_tf[mask_tf .== 1] .= true
# mask_tf[mask_tf .== 0] .= false

typeof(Bool.(mask_tf))

In [None]:
true == 1

In [None]:
obs_filename = string(obs_dir, var_name, "_", 0, "m_", year, 
        "_", szn, ".csv")

obs_df = CSV.File(obs_filename) |> DataFrame

size(obs_df)[1] == 0