In [None]:
import os
import shutil
import glob
import re

In [None]:
#configuration
RAW_DATA_PATH = "1_raw_data"
PROCESSED_DATA_PATH = "2_processed_data"

In [4]:
# 1. Destination Folders (we will create these)
ANOMALY_PATH = os.path.join(PROCESSED_DATA_PATH, "1_anomaly_sounds")
NORMAL_PATH = os.path.join(PROCESSED_DATA_PATH, "0_normal_sounds")

# 2. Source Folders (paths from your images)
CNC_PATH = os.path.join(RAW_DATA_PATH, "cnc cutting dataset")
DRILLING_ANOMALY_PATH = os.path.join(RAW_DATA_PATH, "Drilling", "Anomaly")
DRILLING_NORMAL_PATH = os.path.join(RAW_DATA_PATH, "Drilling", "Normal")
GLASS_PATH = os.path.join(RAW_DATA_PATH, "Glassbreaking")
SAW_PATH = os.path.join(RAW_DATA_PATH, "hand_saw", "hand_saw") # You have a subfolder 'hand_saw' inside 'hand_saw'


In [5]:

# Path to the UrbanSound8K folder that contains fold1, fold2, etc.
# IMPORTANT: Update this if your path is slightly different.
# From your image: raw_data/UrbanSound8K-2025.../UrbanSound8K
URBAN_SOUND_BASE_PATH = glob.glob(os.path.join(RAW_DATA_PATH, "UrbanSound8K*", "UrbanSound8K"))[0]

# --- UrbanSound8K Class Definitions ---
# We use the class ID from the filename (e.g., 21683-9-0-34.wav -> class is '9')
# Based on UrbanSound8K documentation:
ANOMALY_CLASSES = {
    '3': 'drilling',
    '6': 'gun_shot',
    '7': 'jackhammer',
    '9': 'siren'
}
NORMAL_CLASSES = {
    '0': 'air_conditioner',
    '2': 'children_playing',
    '4': 'dog_bark',
    '5': 'engine_idling',
    '8': 'street_music'
    # Class '1' (car_horn) is omitted as it could be either normal or an alert.
}


In [6]:

# --- HELPER FUNCTION ---
def copy_files(src_path, dest_path, file_extension="*.wav"):
    """Safely copies all files with a given extension from src to dest."""
    try:
        files_copied = 0
        for file_path in glob.glob(os.path.join(src_path, file_extension)):
            shutil.copy(file_path, dest_path)
            files_copied += 1
        print(f"  Copied {files_copied} files from '{src_path}'")
    except Exception as e:
        print(f"  ERROR copying from '{src_path}': {e}")

In [7]:
# --- MAIN EXECUTION ---
print("Starting automatic data preparation...")

# 1. Create the destination folders
os.makedirs(ANOMALY_PATH, exist_ok=True)
os.makedirs(NORMAL_PATH, exist_ok=True)
print(f"Created destination folders in '{PROCESSED_DATA_PATH}'")

Starting automatic data preparation...
Created destination folders in '2_processed_data'


In [8]:
# 2. Process ANOMALY sounds
print("\n--- Processing ANOMALY Sounds ---")
copy_files(CNC_PATH, ANOMALY_PATH)
copy_files(DRILLING_ANOMALY_PATH, ANOMALY_PATH)
copy_files(GLASS_PATH, ANOMALY_PATH)
copy_files(SAW_PATH, ANOMALY_PATH)


--- Processing ANOMALY Sounds ---
  Copied 49 files from '1_raw_data\cnc cutting dataset'
  Copied 67 files from '1_raw_data\Drilling\Anomaly'
  Copied 39 files from '1_raw_data\Glassbreaking'
  Copied 22 files from '1_raw_data\hand_saw\hand_saw'


In [9]:
# 3. Process NORMAL sounds
print("\n--- Processing NORMAL Sounds ---")
copy_files(DRILLING_NORMAL_PATH, NORMAL_PATH)



--- Processing NORMAL Sounds ---
  Copied 67 files from '1_raw_data\Drilling\Normal'


In [10]:
# 4. Process UrbanSound8K
print("\n--- Processing UrbanSound8K ---")
urban_anomaly_count = 0
urban_normal_count = 0



--- Processing UrbanSound8K ---


In [11]:
urban_pattern = re.compile(r"^\d+-(\d+)-\d+-\d+\.wav$")

In [12]:
# Filename pattern like: 21683-9-0-34.wav


# Loop through all 'fold*' subdirectories
for fold_dir in glob.glob(os.path.join(URBAN_SOUND_BASE_PATH, "fold*")):
    if not os.path.isdir(fold_dir):
        continue
    
    print(f"  Scanning {os.path.basename(fold_dir)}...")
    for filename in os.listdir(fold_dir):
        match = urban_pattern.match(filename)
        if match:
            class_id = match.group(1) # This gets the class ID
            src_file = os.path.join(fold_dir, filename)
            
            if class_id in ANOMALY_CLASSES:
                shutil.copy(src_file, ANOMALY_PATH)
                urban_anomaly_count += 1
            elif class_id in NORMAL_CLASSES:
                shutil.copy(src_file, NORMAL_PATH)
                urban_normal_count += 1



  Scanning fold1...
  Scanning fold3...
  Scanning fold5...
  Scanning fold6...
  Scanning fold7...


In [13]:
print(f"  Copied {urban_anomaly_count} ANOMALY files from UrbanSound8K.")
print(f"  Copied {urban_normal_count} NORMAL files from UrbanSound8K.")

print("\n------------------------------------------")
print("Automatic data preparation finished!")
print(f"Check your '{ANOMALY_PATH}' and '{NORMAL_PATH}' folders.")
print("You are now ready to run '02_feature_extraction.py'.")

  Copied 400 ANOMALY files from UrbanSound8K.
  Copied 569 NORMAL files from UrbanSound8K.

------------------------------------------
Automatic data preparation finished!
Check your '2_processed_data\1_anomaly_sounds' and '2_processed_data\0_normal_sounds' folders.
You are now ready to run '02_feature_extraction.py'.
