# Ensemble Learning - Stacking

In [1]:
import os
import numpy as np
import tensorflow as tf
import h5py
import matplotlib.pyplot as plt

from Network import loss_utils
from utils import evaluation_utils as e_utils

%load_ext autoreload
%autoreload 2

Stacking Parameters

In [None]:
K_FOLDS = 3
NUM_BASE_MODELS = 3

### Generate training & validation data for base models

Approach #1: K-folds on subset of train + validation data

In [None]:
# Files
data_dir = '../data'
save_dir = '../data/stacking_3test'

# Training patch files
train_patches = ["aortaTrain_patches.csv", "cerebTrain_patches.csv", "cardiacTrain_patches.csv"]
training_files = [f'{data_dir}/{file}' for file in train_patches]

# Validation patch files
val_patches = ["aortaVal_patches.csv", "cerebVal_patches.csv", "cardiacVal_patches.csv"]
validate_files = [f'{data_dir}/{file}' for file in val_patches]

# CSV Header file
header_file = f'{data_dir}/header.csv'

# Parameters
sampling_fraction = 0.15

# Create save directory
if not os.path.isdir(save_dir):
    os.makedirs(save_dir)

# Load data file and indexes
base_trainset = [np.genfromtxt(file, delimiter=',', skip_header=True, dtype='unicode') for file in training_files]
base_valset = [np.genfromtxt(file, delimiter=',', skip_header=True, dtype='unicode') for file in validate_files]

header = ','.join(list(np.genfromtxt(header_file, delimiter=',', dtype='unicode')))
                  
# Random sampling
train_indices = [np.random.choice(comp.shape[0], int(comp.shape[0]*sampling_fraction), replace=False) for comp in base_trainset]
val_indices = [np.random.choice(comp.shape[0], int(comp.shape[0]*sampling_fraction), replace=False) for comp in base_valset]

# Slice sample 
base_trainset = [[data[indices] for j, indices in enumerate(train_indices) if j == i] for i, data in enumerate(base_trainset)]
base_valset = [[data[indices] for j, indices in enumerate(val_indices) if j == i] for i, data in enumerate(base_valset)]

base_trainset = np.concatenate([comp[0] for comp in base_trainset], axis=0)
base_valset = np.concatenate([comp[0] for comp in base_valset], axis=0)

np.random.shuffle(base_trainset) 
np.random.shuffle(base_valset) 

train_folds = np.array_split(base_trainset, K_FOLDS)
val_folds = np.array_split(base_valset, K_FOLDS)

# Save training folds
for k, fold in enumerate(train_folds):
    trainset_meta = fold
    trainset_base = np.concatenate([f for j, f in enumerate(train_folds) if j != k])
    
    output_meta = f'{save_dir}/fold{k}_meta_patches.csv'
    output_base = f'{save_dir}/fold{k}_base_patches.csv'
    np.savetxt(output_meta, trainset_meta,fmt='%s',delimiter=',', comments='', header=header)
    np.savetxt(output_base, trainset_base,fmt='%s',delimiter=',', comments='',header=header)

# Save validation folds
for k, fold in enumerate(val_folds):
    np.savetxt(f'{save_dir}/fold{k}_val_patches.csv', fold, fmt='%s',delimiter=',', comments='', header=header)

Approach #2: K-folds on entire training data

In [None]:
# Files
data_dir = '../data'
save_dir = '../data/stacking_small'
training_file = '{}/train_patches.csv'.format(data_dir)
validation_file = '{}/val_patches.csv'.format(data_dir)

# Create save directory
if not os.path.isdir(save_dir):
    os.makedirs(save_dir)
    
# Load data file and indexes
base_trainset = np.genfromtxt(training_file, delimiter=',', dtype='unicode')
base_valset = np.genfromtxt(validation_file, delimiter=',', dtype='unicode')

# Save header
header = ','.join(list(base_trainset[0,:]))
base_trainset = base_trainset[1:, :]

np.random.shuffle(base_trainset) 
np.random.shuffle(base_valset) 

train_folds = np.array_split(base_trainset, K_FOLDS)
val_folds = np.array_split(base_valset, K_FOLDS)

# Save training folds
for k, fold in enumerate(train_folds):
    trainset_meta = fold
    trainset_base = np.concatenate([f for j, f in enumerate(train_folds) if j != k])
    
    output_meta = f'{save_dir}/fold{k}_meta_patches.csv'
    output_base = f'{save_dir}/fold{k}_base_patches.csv'
    np.savetxt(output_meta, trainset_meta,fmt='%s',delimiter=',', comments='', header=header)
    np.savetxt(output_base, trainset_base,fmt='%s',delimiter=',', comments='',header=header)
   
# Save validation folds 
for k, valfold in enumerate(val_folds):
    np.savetxt(f'{save_dir}/fold{k}_val_patches.csv', valfold, fmt='%s',delimiter=',', comments='', header=header)

Approach #3: K training splits and 1 meta split over entire data

In [2]:
# Files
data_dir = '../data'
save_dir = '../data/stacking_2splits'
training_file = '{}/train_patches.csv'.format(data_dir)
validation_file = '{}/val_patches.csv'.format(data_dir)

# Validation patch files
val_patches = ["aortaVal_patches.csv", "cerebVal_patches.csv", "cardiacVal_patches.csv"]
validate_files = [f'{data_dir}/{file}' for file in val_patches]

# Base learner splits
K_SPLITS = 2

# Number of samples to save/put aside for meta learner
meta_train_samples = 8e3
meta_val_samples = 2.5e3

# Create save directory
if not os.path.isdir(save_dir):
    os.makedirs(save_dir)
    
# Prepare data - Base Learner   
    
# Load data file and indexes
base_trainset = np.genfromtxt(training_file, delimiter=',', dtype='unicode')
base_valset = np.genfromtxt(validation_file, delimiter=',', skip_header=True, dtype='unicode')

# Save header
header = ','.join(list(base_trainset[0,:]))
base_trainset = base_trainset[1:, :]

np.random.shuffle(base_trainset) 
np.random.shuffle(base_valset) 

base_train_splits = np.array_split(base_trainset, K_SPLITS)
base_val_splits = np.array_split(base_valset, K_SPLITS)

# Prepare data - Meta Learner

# Load data file and indexes
meta_data = [np.genfromtxt(file, delimiter=',', skip_header=True, dtype='unicode') for file in validate_files]

meta_train = []
meta_val = []

for compartment_data in meta_data:
    c_splits = np.array_split(compartment_data, 3)
    c_splits = [np.concatenate([c_splits[0], c_splits[1]]), c_splits[2]] # Join the first 2 splits
    
    # Sample meta data
    train_indices = np.random.choice(len(c_splits[0]), int(meta_train_samples/len(meta_data)), replace=False)
    val_indices = np.random.choice(len(c_splits[1]), int(meta_val_samples/len(meta_data)), replace=False)
    
    # Extract indices for meta training and validation
    c_train = c_splits[0][train_indices]
    c_val = c_splits[1][val_indices]
    
    meta_train += c_train.tolist()
    meta_val += c_val.tolist()

# Save base learner training splits
for k, t_split in enumerate(base_train_splits):
    name = f'{save_dir}/split{k}_base_patches.csv'
    np.savetxt(name, t_split, fmt='%s',delimiter=',', comments='', header=header)
   
# Save base learner validation splits 
for k, v_split in enumerate(base_val_splits):
    name = f'{save_dir}/split{k}_val_patches.csv'
    np.savetxt(name, v_split, fmt='%s',delimiter=',', comments='', header=header)

# Save meta learner data
np.savetxt(f'{save_dir}/meta_train_patches.csv', meta_train, fmt='%s',delimiter=',', comments='', header=header)
np.savetxt(f'{save_dir}/meta_val_patches.csv', meta_val, fmt='%s',delimiter=',', comments='', header=header)


Approach #4: Compartment-wise splits with extracted meta split

In [None]:
data_dir = '../data'
save_dir = '../data/stacking_compartment'

# Training patch files
train_patches = ["aortaTrain_patches.csv", "cerebTrain_patches.csv", "cardiacTrain_patches.csv"]
training_files = [f'{data_dir}/{file}' for file in train_patches]

# Validation patch files
val_patches = ["aortaVal_patches.csv", "cerebVal_patches.csv", "cardiacVal_patches.csv"]
validate_files = [f'{data_dir}/{file}' for file in val_patches]

# CSV Header file
header_file = f'{data_dir}/header.csv'

# Number of samples to save/put aside for meta learner
meta_train_samples = 1e4
meta_val_samples = 2.5e3

# Splits per compartment
K_SPLITS = 1

# Create save directory
if not os.path.isdir(save_dir):
    os.makedirs(save_dir)

# Load data file and indexes
base_trainset = [np.genfromtxt(file, delimiter=',', skip_header=True, dtype='unicode') for file in training_files]
valset = [np.genfromtxt(file, delimiter=',', skip_header=True, dtype='unicode') for file in validate_files]

header = ','.join(list(np.genfromtxt(header_file, delimiter=',', dtype='unicode')))

# Sample meta data
train_indices = [np.random.choice(comp.shape[0], int(meta_train_samples/len(base_trainset)), replace=False) for comp in base_trainset]
val_indices = [np.random.choice(comp.shape[0], int(meta_val_samples/len(valset)), replace=False) for comp in valset]

# Extract indices to create disjoint meta / base sets
metatrain = [[data[indices] for j, indices in enumerate(train_indices) if j == i] for i, data in enumerate(base_trainset)]
metaval = [[data[indices] for j, indices in enumerate(val_indices) if j == i] for i, data in enumerate(valset)]

basetrain = [[np.delete(data,indices, axis=0) for j, indices in enumerate(train_indices) if j == i] for i, data in enumerate(base_trainset)]
baseval = [[np.delete(data,indices, axis=0) for j, indices in enumerate(val_indices) if j == i] for i, data in enumerate(valset)]

for (compartment, train, val) in zip(["aorta", "cerebro", "cardiac"], basetrain, baseval):
    train = train[0]
    val = val[0]
    np.random.shuffle(train)
    np.random.shuffle(val)
    
    base_train_splits = np.array_split(train, K_SPLITS)
    val_splits = np.array_split(val, K_SPLITS)
    
    # Save training splits
    for k, t_splits in enumerate(base_train_splits):
        name = f'{save_dir}/{compartment}{k}_base_patches.csv'
        np.savetxt(name, t_splits, fmt='%s',delimiter=',', comments='', header=header)
    
    # Save validation splits 
    for k, v_splits in enumerate(val_splits):
        name = f'{save_dir}/{compartment}{k}_val_patches.csv'
        np.savetxt(name, v_splits, fmt='%s',delimiter=',', comments='', header=header)


# Combine meta data across compartments
metatrain = np.concatenate([comp[0] for comp in metatrain], axis=0)
metaval = np.concatenate([comp[0] for comp in metaval], axis=0)

# Shuffle for good measure
np.random.shuffle(metatrain) 
np.random.shuffle(metaval)  

# Save
np.savetxt(f'{save_dir}/meta_train_patches.csv', metatrain, fmt='%s',delimiter=',', comments='', header=header)
np.savetxt(f'{save_dir}/meta_val_patches.csv', metaval, fmt='%s',delimiter=',', comments='', header=header)

## Cells below have been extracted into seperate script files

Train base models

In [None]:
import subprocess

data_dir = "../data"
validate_file = f"{data_dir}/val_patches.csv"
benchmark_file = f"{data_dir}/test_patches.csv"

for i in range(NUM_BASE_MODELS):
    for k in range(K_FOLDS):
        training_file = f"{data_dir}/fold{k}_base_patches.csv"
        network_name = f"4DFlowNet-stacking{i}-fold{k}"
        subprocess.call(f"bash ../../train_base.sbatch {data_dir} {training_file} {validate_file} {benchmark_file} {network_name}", shell=True)        

### Generating training / validation data for meta learner

In [None]:
import tensorflow as tf
import numpy as np
import time
import h5py
import os
from Network.PatchHandler3D import PatchHandler3D
from utils import prediction_utils


def save(predictions, hr, venc, mask, compartment, output_filepath, models):
    """Save generated predictions (meta-learner input) along with the rest of the training data as a single HDF5 file"""
    
    # Base model predictions - U, V, W
    for i in range(0, predictions.shape[-1], 3):
        prediction_utils.save_to_h5(output_filepath, f'u_m{i}', predictions[:,:,:,:i], compression='gzip')
        prediction_utils.save_to_h5(output_filepath, f'v_m{i}', predictions[:,:,:,:i+1], compression='gzip')
        prediction_utils.save_to_h5(output_filepath, f'w_m{i}', predictions[:,:,:,:i+2], compression='gzip')
        
    # Model names
    models = np.asarray(models, dtype=h5py.special_dtype(vlen=str))
    prediction_utils.save_to_h5(output_filepath, 'base_models', models, compression='gzip')

    # HR - U, V, W
    prediction_utils.save_to_h5(output_filepath, 'u_hr', hr[0], compression='gzip')
    prediction_utils.save_to_h5(output_filepath, 'v_hr', hr[1], compression='gzip')
    prediction_utils.save_to_h5(output_filepath, 'w_hr', hr[2], compression='gzip')
    
    # VENC, Mask, Compartment
    prediction_utils.save_to_h5(output_filepath, 'venc', venc, compression='gzip')
    prediction_utils.save_to_h5(output_filepath, 'mask', mask, compression='gzip')
    prediction_utils.save_to_h5(output_filepath, 'compartment', compartment, compression='gzip')
    return

def load_indexes(index_file):
    """
        Load patch index file (csv). This is the file that is used to load the patches based on x,y,z index
    """
    indexes = np.genfromtxt(index_file, delimiter=',', skip_header=True, dtype='unicode') # 'unicode' or None
    return indexes

def load_model(model_path, model):
    name = model[:model.rindex("_")]
    model = tf.keras.models.load_model(f"{model_path}/{name}-best.h5")
    return  model

data_dir = '../data'
stacking_dir = f"{data_dir}/stacking"

data_files = [f"{stacking_dir}/fold{k}_meta_patches.csv" for k in range(K_FOLDS)]

model_dir = "../models"
output_dir = f"../results/stacking_compartment"

# Folds and their corresponding models - Adjust for training/validation
fold_model_set = {
    "fold0_meta_patches.csv": ["4DFlowNet-stacking1_20230303-1415", 
                                "4DFlowNet-stacking2_20230303-1433", "4DFlowNet-stacking3_20230303-1435"],
    "fold1_meta_patches.csv": ["4DFlowNet-stacking1_20230303-1415", 
                                "4DFlowNet-stacking2_20230303-1433", "4DFlowNet-stacking3_20230303-1435"],
    "fold2_meta_patches.csv": ["4DFlowNet-stacking1_20230303-1415", 
                                "4DFlowNet-stacking2_20230303-1433", "4DFlowNet-stacking3_20230303-1435"],
}

# Params
patch_size = 12
res_increase = 2
batch_size = 64
mask_threshold = 0.6
round_small_values = True
sr_patch_size = patch_size*res_increase

# Network
low_resblock=8
hi_resblock=4

if not os.path.isdir(output_dir):
    os.makedirs(output_dir)
    
# Iterate over the data and generate predictions
for fold_idx, (fold_name, model_names) in enumerate(fold_model_set.items()):
    fold_data = load_indexes(f"{stacking_dir}/{fold_name}")
    fold_models = [load_model(f"{model_dir}/{m}", m) for m in model_names]

    ph = PatchHandler3D(data_dir, patch_size, res_increase, batch_size, mask_threshold)
    metaset = ph.initialize_dataset(fold_data, shuffle=False, n_parallel=None, drop_remainder=True)

    num_batches = tf.data.experimental.cardinality(metaset).numpy()
    nr_samples = num_batches * batch_size
    
    
    print(f"Fold {fold_idx}, number of batches: {num_batches}")
    predictions = np.zeros((nr_samples, sr_patch_size, sr_patch_size, sr_patch_size, 3*NUM_BASE_MODELS), dtype='float32')
    hr = np.zeros((3, nr_samples, sr_patch_size, sr_patch_size, sr_patch_size), dtype='float32')
    venc = np.zeros((nr_samples), dtype='float32')
    mask = np.zeros((nr_samples, sr_patch_size, sr_patch_size, sr_patch_size), dtype='float32')
    compartment = np.zeros((nr_samples), dtype=h5py.special_dtype(vlen=str))
    
    start_time = time.time()
    
    # Iterate over the data and generate predictions
    for batch_idx, (data_batch) in enumerate(metaset): 
        fill_range = range(batch_idx*batch_size, (batch_idx*batch_size + batch_size))
            
        lr_input = data_batch[:6]
        hr[:, fill_range] = np.squeeze(np.asarray(data_batch[6:9]))
        venc[fill_range], mask[fill_range], compartment[fill_range] = (data.numpy() for data in data_batch[9:12])
        
        batch_predictions = np.zeros((batch_size, sr_patch_size, sr_patch_size, sr_patch_size, 0))
        
        for model_idx, model in enumerate(fold_models):
            # Predict using 3D velocities and 3D magnitudes
            sr_images = model(lr_input, training=False)
            
            # Denormalize
            sr_images = sr_images * venc[fill_range].reshape(-1,1,1,1,1)
            
            # Concatenate along the channel axis
            batch_predictions = np.concatenate((batch_predictions,sr_images), axis=-1) # (Batch size, 24, 24, 24, 3 * NUM_BASE_MODELS)
        
        predictions[fill_range] = batch_predictions # (Samples, 24, 24, 24, 3 * NUM_BASE_MODELS)
        
        # Logging
        time_taken = time.time() - start_time
        print(f"\rProcessed {batch_idx}/{num_batches} Elapsed: {(time.time() - start_time):.2f} secs.", end='\r')
    time_taken = time.time() - start_time

    print(f"Processed fold {fold_idx}, Elapsed: {(time.time() - start_time):.2f} secs.")
    
    # Save the meta-learner training data patch-wise
    print("Saving fold...")
    save(predictions, hr, venc, mask, compartment, f'{output_dir}/meta_training.h5', model_names)
    print(f"Saved fold {fold_idx}, Elapsed: {(time.time() - start_time):.2f} secs.")

print("Done!")

Train meta-learner

In [None]:
# Use meta_trainer.py

### Helpers

Save subset of h5 file

In [None]:
def save_to_h5(output_filepath, col_name, dataset, chunks=True):
    #dataset = np.expand_dims(dataset, axis=0)

    # convert float64 to float32 to save space
    if dataset.dtype == 'float64':
        dataset = np.array(dataset, dtype='float32')
    
    with h5py.File(output_filepath, 'a') as hf:    
        if col_name not in hf:
            datashape = (None, )
            if (dataset.ndim > 1):
                datashape = (None, ) + dataset.shape[1:]
            hf.create_dataset(col_name, data=dataset, maxshape=datashape, chunks=chunks, compression="lzf")
        else:
            hf[col_name].resize((hf[col_name].shape[0]) + dataset.shape[0], axis = 0)
            hf[col_name][-dataset.shape[0]:] = dataset

file = "../data/stacking_small/meta_training.h5"           
frame = 10

rows = ['u_hr', 'v_hr', 'w_hr', 'u_m0', 'u_m1', 'u_m2', 'v_m0', 'v_m1', 'v_m2', 'w_m0', 'w_m1', 'w_m2', 'venc', 'mask', 'compartment']
model_r = ['u_hr', 'v_hr', 'w_hr', 'venc', 'mask', 'compartment']
data = {}
with h5py.File(file, 'r') as hf:
    for r in rows:
        data[r] = np.asarray(hf[r])
    
for colname, d in data.items():
    if colname in ['u_hr', 'v_hr', 'w_hr', 'u_m0', 'u_m1', 'u_m2', 'v_m0', 'v_m1', 'v_m2', 'w_m0', 'w_m1', 'w_m2']:
        save_to_h5("../data/stacking_small/meta_training_10chunks.h5", colname, d, chunks=(10, 24, 24, 24))
    else:
        save_to_h5("../data/stacking_small/meta_training_10chunks.h5", colname, d)


In [None]:
with h5py.File("../data/stacking_splits/meta_training.h5", 'r') as hf:
    for k in hf.keys():
        print(hf[k]) 

In [None]:
def _normalize(data, venc):
    return data / venc

def load_patches_from_h5(idx):
    with h5py.File(training_file, 'r') as hf:
        with h5py.File(self.file, 'r') as hf:
        mask, venc, compartment = [tf.convert_to_tensor(hf[ds][idx]) for ds in ['mask', 'venc', 'compartment']]
        u_hr, v_hr, w_hr = [self._normalize(tf.convert_to_tensor(hf[ds][idx]), venc) for ds in ['u_hr', 'v_hr', 'w_hr']]
        uhv_m = [] 
        for m_idx in range(3): # change to number of base models
            uhv_m.append(self._normalize(hf[f'u_m{m_idx}'][idx], venc))
            uhv_m.append(self._normalize(hf[f'v_m{m_idx}'][idx], venc))
            uhv_m.append(self._normalize(hf[f'w_m{m_idx}'][idx], venc))
        return tf.convert_to_tensor(uhv_m), u_hr[tf.newaxis], v_hr[tf.newaxis], w_hr[tf.newaxis], venc, mask, compartment
                
def load_data_using_patch_index(idx):
    return tf.py_function(func=load_patches_from_h5, 
        # U-LR, HR, MAG, V-LR, HR, MAG, W-LR, HR, MAG, venc, MASK
        inp=[idx], 
            Tout=[tf.float32, tf.float32, tf.float32,
                tf.float32, tf.float32, tf.float32,
                tf.string])

idx = tf.range(10000)

ds = tf.data.Dataset.from_tensor_slices((idx)) 

ds = ds.shuffle(buffer_size=46656) 

ds = ds.map(load_data_using_patch_index)
ds = ds.batch(batch_size=batch_size, drop_remainder=False)

# prefetch, n=number of items
ds = ds.prefetch(tf.data.AUTOTUNE)

In [None]:
for i, data in enumerate(ds):
    print(data)
    break

In [None]:
class generator:
    def __init__(self, file, base_models):
        self.file = file
        self.base_models = base_models
    
    def _normalize(self, data, venc):
        return data / venc

    def __call__(self):
        with h5py.File(self.file, 'r') as hf:
            nr_patches = hf['compartment'].shape[0]
            mask = hf['mask']
            for p_idx in range(nr_patches):
                mask, venc, compartment = [tf.convert_to_tensor(hf[ds][p_idx]) for ds in ['mask', 'venc', 'compartment']]
                u_hr, v_hr, w_hr = [self._normalize(tf.convert_to_tensor(hf[ds][p_idx]), venc) for ds in ['u_hr', 'v_hr', 'w_hr']]
                uhv_m = []
                for m_idx in range(self.base_models):
                    uhv_m.append(self._normalize(hf[f'u_m{m_idx}'][p_idx], venc))
                    uhv_m.append(self._normalize(hf[f'v_m{m_idx}'][p_idx], venc))
                    uhv_m.append(self._normalize(hf[f'w_m{m_idx}'][p_idx], venc))
                yield tf.convert_to_tensor(uhv_m), u_hr, v_hr, w_hr, mask, venc, compartment

patch = (patch_size, patch_size, patch_size)
ds = tf.data.Dataset.from_generator(
       generator(training_file, NUM_BASE_MODELS), 
       output_signature=(
           tf.TensorSpec(shape=((NUM_BASE_MODELS*3,) + patch), dtype=tf.float32, name='uhv_m'),
           tf.TensorSpec(shape=patch, dtype=tf.float32, name='u_hr'),
           tf.TensorSpec(shape=patch, dtype=tf.float32, name='v_hr'),
           tf.TensorSpec(shape=patch, dtype=tf.float32, name='w_hr'),
           tf.TensorSpec(shape=patch, dtype=tf.float32, name='mask'),
           tf.TensorSpec(shape=(), dtype=tf.float32, name='venc'),
           tf.TensorSpec(shape=(), dtype=tf.string, name='compartment')
       ))

#ds = ds.shuffle(10) 

ds = ds.batch(batch_size=batch_size)

# prefetch, n=number of items
ds = ds.prefetch(tf.data.AUTOTUNE)


In [None]:
from Network.loss_utils import calculate_relative_error

hf = h5py.File(training_file, 'r')

In [None]:
#We are using 4GB of chunk_cache_mem here ("rdcc_nbytes")
shape = (6912, 24, 24, 24)
shape2 = (6912,)
data = np.random.rand(6912, 24, 24, 24)
chunk_shape=(10, 24, 24, 24)

file = "test_c10_lzf_x.h5"
#f = h5py.File(file, 'w',rdcc_nbytes =1024**2*4000,rdcc_nslots=1e7)
with h5py.File(file, 'a') as f:
    data = np.random.rand(6912, 24, 24, 24)
    f.create_dataset(f'u_m0', shape ,dtype=np.float32,data=data,chunks=chunk_shape,compression="lzf")
    data = np.random.rand(6912, 24, 24, 24)
    f.create_dataset(f'u_m1', shape ,dtype=np.float32,data=data,chunks=chunk_shape,compression="lzf")
    data = np.random.rand(6912, 24, 24, 24)
    f.create_dataset(f'u_m2', shape ,dtype=np.float32,data=data,chunks=chunk_shape,compression="lzf")
    data = np.random.rand(6912, 24, 24, 24)
    f.create_dataset(f'v_m0', shape ,dtype=np.float32,data=data,chunks=chunk_shape,compression="lzf")
    data = np.random.rand(6912, 24, 24, 24)
    f.create_dataset(f'v_m1', shape ,dtype=np.float32,data=data,chunks=chunk_shape,compression="lzf")
    data = np.random.rand(6912, 24, 24, 24)
    f.create_dataset(f'v_m2', shape ,dtype=np.float32,data=data,chunks=chunk_shape,compression="lzf")
    data = np.random.rand(6912, 24, 24, 24)
    f.create_dataset(f'w_m0', shape ,dtype=np.float32,data=data,chunks=chunk_shape,compression="lzf")
    data = np.random.rand(6912, 24, 24, 24)
    f.create_dataset(f'w_m1', shape ,dtype=np.float32,data=data,chunks=chunk_shape,compression="lzf")
    data = np.random.rand(6912, 24, 24, 24)
    f.create_dataset(f'w_m2', shape ,dtype=np.float32,data=data,chunks=chunk_shape,compression="lzf")
    data = np.random.rand(6912, 24, 24, 24)
    f.create_dataset(f'w_m3', shape ,dtype=np.float32,data=data,chunks=chunk_shape,compression="lzf")
    data = np.random.rand(6912, 24, 24, 24)
    f.create_dataset(f'u_hr', shape ,dtype=np.float32,data=data,chunks=chunk_shape,compression="lzf")
    data = np.random.rand(6912, 24, 24, 24)
    f.create_dataset(f'v_hr', shape ,dtype=np.float32,data=data,chunks=chunk_shape,compression="lzf")
    data = np.random.rand(6912, 24, 24, 24)
    f.create_dataset(f'w_hr', shape ,dtype=np.float32,data=data,chunks=chunk_shape,compression="lzf")
    data = np.random.rand(6912, 24, 24, 24)
    f.create_dataset(f'mask', shape ,dtype=np.float32,data=data,chunks=chunk_shape,compression="lzf")
    data = np.random.rand(6912)
    f.create_dataset(f'venc', shape2 ,dtype=np.float32,data=data,compression="gzip")
    f.create_dataset(f'compartment', shape2 ,dtype=np.float32,data=data,compression="gzip")
hftest = h5py.File(file, 'r')

In [None]:
hftest = h5py.File("test_c10_lzf.h5", 'r')

In [None]:
import time
rnd = np.random.choice(6912, 500, replace=False)
start = time.time()
for i in rnd:
    x = hftest['u_hr'][i]
    x = hftest['w_hr'][i]
    x = hftest['v_hr'][i]
    x = hftest['u_m0'][i]
    x = hftest['u_m1'][i]
    x = hftest['u_m2'][i]
    x = hftest['v_m0'][i]
    x = hftest['v_m1'][i]
    x = hftest['v_m2'][i]
    x = hftest['w_m0'][i]
    x = hftest['w_m1'][i]
    x = hftest['w_m2'][i]
print(f"Elapsed: {time.time() - start}")

In [None]:
import time
start = time.time()
for i in rnd:
    x = hf['u_hr'][i]
    x = hf['w_hr'][i]
    x = hf['v_hr'][i]
    x = hf['u_m0'][i]
    x = hf['u_m1'][i]
    x = hf['u_m2'][i]
    x = hf['v_m0'][i]
    x = hf['v_m1'][i]
    x = hf['v_m2'][i]
    x = hf['w_m0'][i]
    x = hf['w_m1'][i]
    x = hf['w_m2'][i]
print(f"Elapsed: {time.time() - start}")

In [None]:
sample = 0
mask, venc, compartment = [tf.convert_to_tensor(hf[ds][sample]) for ds in ['mask', 'venc', 'compartment']]
u_hr, v_hr, w_hr = [_normalize(tf.convert_to_tensor(hf[ds][sample]), venc) for ds in ['u_hr', 'v_hr', 'w_hr']]    
uvw_m = []
for m_idx in range(3):
    uvw_m.append(_normalize(hf[f'u_m{m_idx}'][sample], venc))
    uvw_m.append(_normalize(hf[f'v_m{m_idx}'][sample], venc))
    uvw_m.append(_normalize(hf[f'w_m{m_idx}'][sample], venc))
uvw_m = tf.convert_to_tensor(uvw_m)

In [None]:
print(uvw_m.shape, u_hr.shape, v_hr.shape, w_hr.shape)

In [None]:
u_hr = tf.expand_dims(u_hr, 0)
v_hr = tf.expand_dims(v_hr, 0)
w_hr = tf.expand_dims(w_hr, 0)
mask = tf.expand_dims(mask, 0)

for i in range(0, uvw_m.shape[0], 3):
    u = tf.expand_dims(uvw_m[i], 0)
    v = tf.expand_dims(uvw_m[i+1], 0)
    w = tf.expand_dims(uvw_m[i+2], 0)
    
    batch_accuracy = calculate_relative_error(u, v, w, u_hr, v_hr, w_hr, mask)
    print(f"Model {i//3} accuracy: {tf.reduce_mean(batch_accuracy)}")
    

In [None]:
y = 1
b = False
x = 1 / (y) if b else 2
print(x)

In [None]:
with h5py.File("../data/stacking_splits/meta_training.h5", 'r') as hf:
    for k in hf.keys():
        print(hf[k])