In [3]:
import os
import numpy as np

# Paths
source_dir = '/home/tom/repos/dyna-landslide-surrogate/data_prune'

# Function to calculate the percentage of zeros in a numpy array
def calculate_zero_percentage(npy_file):
    array = np.load(npy_file)
    total_elements = array.size
    zero_count = np.count_nonzero(array == 0)
    zero_percentage = (zero_count / total_elements) * 100
    return zero_percentage

# Function to process files in a specified subdirectory (either 'thickness' or 'velocity')
def process_data_type(model_id, data_type):
    data_path = os.path.join(source_dir, model_id, '04_FinalProcessedData_256', data_type)
    zero_percentages = []
    if os.path.isdir(data_path):
        print(f"Processing {data_type} for model_id: {model_id}")
        for filename in os.listdir(data_path):
            if filename.endswith('.npy'):
                file_path = os.path.join(data_path, filename)
                zero_percentage = calculate_zero_percentage(file_path)
                zero_percentages.append(zero_percentage)
                print(f"{filename}: {zero_percentage:.2f}% of the array cells are zero.")
        if zero_percentages:
            avg_zero_percentage = sum(zero_percentages) / len(zero_percentages)
            print(f"Average sparseness for {data_type} in model {model_id}: {avg_zero_percentage:.2f}%")
    else:
        print(f"No {data_type} directory found for model_id: {model_id}")

# Iterate over each model_id directory in the source directory
for model_id in os.listdir(source_dir):
    if os.path.isdir(os.path.join(source_dir, model_id)):
        # Process thickness and velocity data
        process_data_type(model_id, 'thickness')
        process_data_type(model_id, 'velocity')

Processing thickness for model_id: 00192
00192_thickness_24.npy: 99.78% of the array cells are zero.
00192_thickness_27.npy: 99.76% of the array cells are zero.
00192_thickness_47.npy: 99.82% of the array cells are zero.
00192_thickness_6.npy: 99.94% of the array cells are zero.
00192_thickness_33.npy: 99.75% of the array cells are zero.
00192_thickness_25.npy: 99.77% of the array cells are zero.
00192_thickness_38.npy: 99.78% of the array cells are zero.
00192_thickness_20.npy: 99.77% of the array cells are zero.
00192_thickness_2.npy: 99.96% of the array cells are zero.
00192_thickness_18.npy: 99.76% of the array cells are zero.
00192_thickness_8.npy: 99.91% of the array cells are zero.
00192_thickness_40.npy: 99.79% of the array cells are zero.
00192_thickness_42.npy: 99.79% of the array cells are zero.
00192_thickness_49.npy: 99.83% of the array cells are zero.
00192_thickness_39.npy: 99.78% of the array cells are zero.
00192_thickness_28.npy: 99.76% of the array cells are zero.
00

In [4]:
# Function to calculate the percentage of zeros in a numpy array
def calculate_zero_percentage(npy_file):
    array = np.load(npy_file)
    total_elements = array.size
    zero_count = np.count_nonzero(array == 0)
    zero_percentage = (zero_count / total_elements) * 100
    return zero_percentage

# Function to process files in a specified subdirectory (either 'thickness' or 'velocity') and accumulate sparseness
def process_data_type(data_type):
    zero_percentages = []
    for model_id in os.listdir(source_dir):
        model_dir = os.path.join(source_dir, model_id)
        if os.path.isdir(model_dir):
            data_path = os.path.join(model_dir, '04_FinalProcessedData_256', data_type)
            if os.path.isdir(data_path):
                for filename in os.listdir(data_path):
                    if filename.endswith('.npy'):
                        file_path = os.path.join(data_path, filename)
                        zero_percentage = calculate_zero_percentage(file_path)
                        zero_percentages.append(zero_percentage)
    if zero_percentages:
        avg_zero_percentage = sum(zero_percentages) / len(zero_percentages)
        return avg_zero_percentage
    return 0

# Calculate overall average sparseness for velocity and thickness
avg_sparseness_velocity = process_data_type('velocity')
avg_sparseness_thickness = process_data_type('thickness')

# Print the overall averages
print(f"Overall Average Sparseness for Velocity: {avg_sparseness_velocity:.2f}%")
print(f"Overall Average Sparseness for Thickness: {avg_sparseness_thickness:.2f}%")

Overall Average Sparseness for Velocity: 99.52%
Overall Average Sparseness for Thickness: 99.76%
