In [19]:
import os
import shutil
import pandas as pd

In [20]:
# Paths
source_dir = "/home/tom/repos/dyna-landslide-surrogate/data"
target_dir = "/home/tom/repos/dyna-landslide-surrogate/data_prune"
csv_path = '/home/tom/repos/dyna-landslide-surrogate/notes/states.csv'  

In [21]:
# Read the CSV file
df = pd.read_csv(csv_path, dtype={'model_id': str, 'end_state': str})

# Ensure target directory exists
if not os.path.exists(target_dir):
    os.makedirs(target_dir)





In [22]:
# Function to copy the files
def copy_files(model_id, end_state):
    if end_state == '9999':
        print(f"Skipping model_id: {model_id} as end_state is 9999")
        return  # Skip processing for end_state 9999

    model_path = os.path.join(source_dir, model_id)
    target_model_path = os.path.join(target_dir, model_id)

    print(f"Processing model_id: {model_id}, end_state: {end_state}")

    # Elevation files
    elevation_src = os.path.join(model_path, '04_FinalProcessedData_256', 'elevation', f'{model_id}_elevation.npy')
    elevation_dest = os.path.join(target_model_path, '04_FinalProcessedData_256', 'elevation')

    if os.path.exists(elevation_src):
        os.makedirs(elevation_dest, exist_ok=True)
        shutil.copy(elevation_src, elevation_dest)
        print(f"Copied elevation file from {elevation_src} to {elevation_dest}")
    else:
        print(f"Elevation file not found: {elevation_src}")

    # Thickness and Velocity files
    for data_type in ['thickness', 'velocity']:
        source_folder = os.path.join(model_path, '04_FinalProcessedData_256', data_type)
        target_folder = os.path.join(target_model_path, '04_FinalProcessedData_256', data_type)

        if os.path.isdir(source_folder):
            os.makedirs(target_folder, exist_ok=True)
            for filename in os.listdir(source_folder):
                if filename.startswith(f'{model_id}_{data_type}_'):
                    state_number = int(filename.split('_')[-1].split('.')[0])
                    if state_number <= int(end_state):
                        shutil.copy(os.path.join(source_folder, filename), target_folder)
                        print(f"Copied {data_type} file from {source_folder}/{filename} to {target_folder}")
        else:
            print(f"No {data_type} folder found for model_id {model_id}")

In [23]:
df

Unnamed: 0,model_id,end_state
0,00002,9999
1,00003,80
2,00004,9999
3,00005,9999
4,00006,9999
...,...,...
377,00404,60
378,00405,50
379,00406,60
380,00407,50


In [24]:
# Iterate over each row in the DataFrame
for index, row in df.iterrows():
    copy_files(row['model_id'], row['end_state'])

print("Files copied successfully.")


Skipping model_id: 00002 as end_state is 9999
Processing model_id: 00003, end_state: 80
Copied elevation file from /home/tom/repos/dyna-landslide-surrogate/data/00003/04_FinalProcessedData_256/elevation/00003_elevation.npy to /home/tom/repos/dyna-landslide-surrogate/data_prune/00003/04_FinalProcessedData_256/elevation
Copied thickness file from /home/tom/repos/dyna-landslide-surrogate/data/00003/04_FinalProcessedData_256/thickness/00003_thickness_6.npy to /home/tom/repos/dyna-landslide-surrogate/data_prune/00003/04_FinalProcessedData_256/thickness
Copied thickness file from /home/tom/repos/dyna-landslide-surrogate/data/00003/04_FinalProcessedData_256/thickness/00003_thickness_54.npy to /home/tom/repos/dyna-landslide-surrogate/data_prune/00003/04_FinalProcessedData_256/thickness
Copied thickness file from /home/tom/repos/dyna-landslide-surrogate/data/00003/04_FinalProcessedData_256/thickness/00003_thickness_28.npy to /home/tom/repos/dyna-landslide-surrogate/data_prune/00003/04_FinalProc