In [13]:
import numpy as np
import pandas as pd
import os
import h5py

In [2]:
def gen_outputs(output_folder_path):
    """
    Generates an output tensor of shape (1728, 235002, 9).
    
    Args:
        output_folder_path: path to the folder containing the libradtran output files
        
    Returns:
        output_tensor: tensor of shape (1728, 235002, 9).
    """
    output_files_list = sorted(os.listdir(output_folder_path))
    Y = np.zeros((1728, 235002, 9))
    for i, file in enumerate(output_files_list):
        full_path = output_folder_path + file
        M = pd.read_json(full_path)[["wavelength", "tdir_down", "tdif_down", "tdir_up", "tdif_up", "spherical_albedo", "edir", "edif", "path_rad"]].values
        Y[i, :, :] = M
    return Y

In [3]:
output_folder_path = "/fmi/projappl/project_2004400/jamin/data/libradtran_data/NN_data/output_params/"
outputs = gen_outputs(output_folder_path)

In [4]:
outputs.shape

(1728, 235002, 9)

In [14]:
def save_HDF5(Y, save_loc, file_name):
    """
    Saves the given NumPy array into the desired location in HDF5 format.
    
    Args:
        Y: NumPy array
        save_loc: the path to the desired saving location
        file_name: name of the saved file
        
    Returns:
        None.
    """
    full_path = save_loc + file_name
    with h5py.File(full_path, "w") as hf:
        hf.create_dataset("output_data", data=Y)

In [18]:
output_save_loc = "/fmi/projappl/project_2004400/jamin/data/libradtran_data/NN_data/"
output_file_name = "outputs.h5"
save_HDF5(Y=outputs, save_loc=output_save_loc, file_name=output_file_name)

**Making sure that the inputs match the outputs**

In [25]:
X = pd.read_csv("/fmi/projappl/project_2004400/jamin/data/libradtran_data/NN_data/inputs.csv", index_col=0)

In [35]:
mls_files = list(X["atmosphere_file"])
output_files = sorted(os.listdir(output_folder_path))

In [40]:
print(len(mls_files))
print(len(output_files))

1728
1728


In [42]:
mls_files[:3]

['afglms_Q1_0', 'afglms_Q1_0', 'afglms_Q1_0']

In [43]:
output_files[:3]

['afglms_Q1_0_sza25_vza1.0_phi0_phi00_alt0_tau0.05.json',
 'afglms_Q1_0_sza25_vza1.0_phi0_phi00_alt0_tau0.15.json',
 'afglms_Q1_0_sza25_vza1.0_phi0_phi00_alt0_tau0.3.json']

In [51]:
for i in range(len(mls_files)):
    # Length of the ith mls file name
    str_len = len(mls_files[i])
    # Slice from the output file names 
    output_name = output_files[i][:str_len]
    # Check that they match
    assert output_name == mls_files[i], "names don't match"
    if i == len(mls_files) - 1:
        print("All names match")

All names match
