In [None]:
# main_final_processing.ipynb

# Notebook to apply the final processing to the data, 
# such that it is ready to be used in training the model.

In [1]:
import sys
# append the path of the parent directory
sys.path.append("..")

# External Imports
import os
import numpy as np

# Internal Imports
from final_processing import FinalProcessor
from viz import ArrayVisualizer

# Get the list of all subfolders in the main_dir
main_dir = r"/home/tom/repos/dyna-landslide-surrogate/data"
subfolders = [f.name for f in os.scandir(main_dir) if f.is_dir()]

In [3]:
# Iterate through each subfolder and apply the FinalProcessor
for model_id in subfolders:
    data = FinalProcessor(root_directory=main_dir, 
                          model_id=model_id,
                          target_size=256,
                          target_resolution=2, 
                          interpolation_order=1)
    data.export_data(main_dir)

In [5]:
# Iterate over each model directory in the base directory
for model_id in os.listdir(main_dir):
    model_dir = os.path.join(main_dir, model_id, "04_FinalProcessedData_128")

    # Check if the path is indeed a directory
    if not os.path.isdir(model_dir):
        continue

    # In each model directory, look for thickness and velocity files
    thickness_dir = os.path.join(model_dir, "thickness")
    velocity_dir = os.path.join(model_dir, "velocity")

    # Get the state numbers from the thickness files assuming they match the velocity files
    state_numbers = [f.split('_')[-1].split('.')[0] for f in os.listdir(thickness_dir) if f.endswith('.npy')]

    for state in state_numbers:
        # Construct the file paths for thickness and velocity
        thickness_file = os.path.join(thickness_dir, f"{model_id}_thickness_{state}.npy")
        velocity_file = os.path.join(velocity_dir, f"{model_id}_velocity_{state}.npy")

        # Check if both files exist to avoid errors
        if not os.path.exists(thickness_file) or not os.path.exists(velocity_file):
            continue

        # Load the arrays
        thickness_array = np.load(thickness_file)
        velocity_array = np.load(velocity_file)

        # Check if both arrays only contain zeroes
        if np.all(thickness_array == 0) and np.all(velocity_array == 0):
            # Delete the files if both only contain zeroes
            os.remove(thickness_file)
            os.remove(velocity_file)
            print(f"Deleted files with only zeroes for model {model_id} state {state}")