In [10]:
import pandas as pd
import shutil
import os
import uuid

# Define paths
metadata_path = '/Users/adamkirstein/Code/EchoLock/data/edge-collected-gunshot-audio/gunshot-audio-all-metadata.csv'
base_dir = '/Users/adamkirstein/Code/EchoLock/data/edge-collected-gunshot-audio/edge-collected-gunshot-audio'
destination_dir = '/Users/adamkirstein/Code/EchoLock/data/positive_cases'

if not os.path.exists(destination_dir):
    os.makedirs(destination_dir)

# Load the metadata CSV
metadata_df = pd.read_csv(metadata_path)

# Detect and handle duplicates (just in case uuid dupes across folders)
duplicates = metadata_df[metadata_df.duplicated(subset=['filename'], keep=False)]
for _, duplicate in duplicates.iterrows():
    new_uuid = str(uuid.uuid4())
    metadata_df.loc[metadata_df['uuid'] == duplicate['uuid'], 'uuid'] = new_uuid  # Update UUID in the dataframe

# Save the updated metadata to a new CSV to preserve changes
updated_metadata_path = os.path.join(destination_dir, 'updated_gunshot-audio-all-metadata.csv')
metadata_df.to_csv(updated_metadata_path, index=False)

def copy_files_and_handle_duplicates(metadata_df, base_dir, destination_dir):
    files_not_found = []
    files_copied_count = 0

    for _, row in metadata_df.iterrows():
        file_name = row['filename'] + '.wav'
        file_copied = False

        for root, dirs, files in os.walk(base_dir):
            if file_name in files:
                source_file_path = os.path.join(root, file_name)
                shutil.copy(source_file_path, os.path.join(destination_dir, file_name))
                files_copied_count += 1
                file_copied = True
                break

        if not file_copied:
            files_not_found.append(file_name)

    return files_not_found, files_copied_count

files_not_found, files_copied_count = copy_files_and_handle_duplicates(metadata_df, base_dir, destination_dir)

print(f"Total files copied: {files_copied_count}")
if files_not_found:
    print(f"Files not found: {len(files_not_found)}")
else:
    print("All files were found and copied.")

#delete the original directories and files
if not files_not_found and files_copied_count == len(metadata_df):
    shutil.rmtree(base_dir)
    os.remove(metadata_path)  
    print(f"Original files and directories have been successfully removed.")
else:
    print("Review missing files before taking further action.")


Total files copied: 2148
All files were found and copied.
Original files and directories have been successfully removed.
