In [1]:
import os
import librosa
import numpy as np
import glob
import warnings


In [2]:

# --- CONFIGURATION ---
PROCESSED_DATA_PATH = "2_processed_data"
FEATURES_PATH = "3_features"
ANOMALY_PATH = os.path.join(PROCESSED_DATA_PATH, "1_anomaly_sounds")
NORMAL_PATH = os.path.join(PROCESSED_DATA_PATH, "0_normal_sounds")

# Audio processing settings
SAMPLE_RATE = 22050  # Standard sample rate for audio analysis
DURATION = 4         # Standardize to 4-second clips
N_MFCC = 13          # Number of MFCCs to extract
HOP_LENGTH = 512     # Shift between windows
N_FFT = 2048         # Window size for FFT

# Calculate the expected number of samples
SAMPLES_PER_TRACK = SAMPLE_RATE * DURATION
# Calculate the expected number of frames for MFCCs
# We use int() + 1 to ensure we cover the last frame
EXPECTED_FRAMES = int(SAMPLES_PER_TRACK / HOP_LENGTH) + 1


In [3]:
# --- FUNCTIONS ---

def extract_features(file_path):
    """Loads an audio file, processes it, and extracts MFCCs."""
    try:
        # Suppress warnings for files that are shorter than DURATION
        with warnings.catch_warnings():
            warnings.simplefilter("ignore", UserWarning)
            # 1. Load the audio file, force to mono
            signal, sr = librosa.load(file_path, sr=SAMPLE_RATE, mono=True, duration=DURATION)

        # 2. Pad or truncate the signal to the fixed duration
        if len(signal) < SAMPLES_PER_TRACK:
            # Pad with silence
            signal = np.pad(signal, (0, SAMPLES_PER_TRACK - len(signal)), mode='constant')
        else:
            # Truncate (should be handled by duration=DURATION, but good to have)
            signal = signal[:SAMPLES_PER_TRACK]

        # 3. Extract MFCCs
        mfcc = librosa.feature.mfcc(y=signal, sr=sr, n_mfcc=N_MFCC, n_fft=N_FFT, hop_length=HOP_LENGTH)
        
        # 4. Pad the MFCC feature to have a consistent width (time dimension)
        if mfcc.shape[1] < EXPECTED_FRAMES:
            mfcc = np.pad(mfcc, ((0, 0), (0, EXPECTED_FRAMES - mfcc.shape[1])), mode='constant')
        else:
            mfcc = mfcc[:, :EXPECTED_FRAMES]
            
        return mfcc
    
    except Exception as e:
        print(f"Error processing {file_path}: {e}")
        return None

In [4]:

def process_data():
    """Iterates through data folders, extracts features, and saves them."""
    X_data = []  # To store features
    y_data = []  # To store labels (0 = normal, 1 = anomaly)
    
    # Create features folder
    os.makedirs(FEATURES_PATH, exist_ok=True)

    # 1. Process Anomaly Sounds (Label = 1)
    print("Processing anomaly sounds...")
    # Get all .wav and .mp3 files
    anomaly_files = glob.glob(os.path.join(ANOMALY_PATH, "*.wav")) + glob.glob(os.path.join(ANOMALY_PATH, "*.mp3"))
    if not anomaly_files:
        print(f"Warning: No audio files found in {ANOMALY_PATH}. Did 01_data_preparation.py run correctly?")
        
    for i, file_path in enumerate(anomaly_files):
        print(f"  Processing anomaly file {i+1}/{len(anomaly_files)}: {os.path.basename(file_path)}")
        features = extract_features(file_path)
        if features is not None:
            X_data.append(features)
            y_data.append(1)
    print(f"Processed {len(X_data)} anomaly files.")

    # 2. Process Normal Sounds (Label = 0)
    print("\nProcessing normal sounds...")
    normal_files = glob.glob(os.path.join(NORMAL_PATH, "*.wav")) + glob.glob(os.path.join(NORMAL_PATH, "*.mp3"))
    if not normal_files:
        print(f"Warning: No audio files found in {NORMAL_PATH}. Did 01_data_preparation.py run correctly?")

    total_normal = len(normal_files)
    normal_processed_count = 0
    for i, file_path in enumerate(normal_files):
        print(f"  Processing normal file {i+1}/{total_normal}: {os.path.basename(file_path)}")
        features = extract_features(file_path)
        if features is not None:
            X_data.append(features)
            y_data.append(0)
            normal_processed_count += 1
    print(f"Processed {normal_processed_count} normal files.")

    # 3. Convert to NumPy arrays
    X = np.array(X_data)
    y = np.array(y_data)
    
    print(f"\nTotal samples processed: {X.shape[0]}")
    
    # 4. Check for data
    if X.shape[0] == 0:
        print("Error: No data was processed. Check your paths in '2_processed_data'.")
        return

    # 5. Save the features
    print("Saving features...")
    np.save(os.path.join(FEATURES_PATH, "X_data.npy"), X)
    np.save(os.path.join(FEATURES_PATH, "y_labels.npy"), y)
    
    print(f"Features saved to {FEATURES_PATH}")
    print(f"Shape of X (features): {X.shape}")
    print(f"Shape of y (labels):   {y.shape}")
    print("\nFeature extraction complete! You are ready to run '03_train_model.py'.")

# --- MAIN EXECUTION ---
if __name__ == "__main__":
    process_data()

Processing anomaly sounds...
  Processing anomaly file 1/577: 1-18810-A-49.wav
  Processing anomaly file 2/577: 1-20133-A-39.wav
  Processing anomaly file 3/577: 1-23706-A-49.wav
  Processing anomaly file 4/577: 1-46353-A-49.wav
  Processing anomaly file 5/577: 1-7974-A-49.wav
  Processing anomaly file 6/577: 1-84536-A-39.wav
  Processing anomaly file 7/577: 1-84704-A-39.wav
  Processing anomaly file 8/577: 1-84705-A-39.wav
  Processing anomaly file 9/577: 1-85168-A-39.wav
  Processing anomaly file 10/577: 1-85184-A-39.wav
  Processing anomaly file 11/577: 1-88807-A-39.wav
  Processing anomaly file 12/577: 1-977-A-39.wav
  Processing anomaly file 13/577: 1-9886-A-49.wav
  Processing anomaly file 14/577: 1-9887-A-49.wav
  Processing anomaly file 15/577: 100032-3-0-0.wav
  Processing anomaly file 16/577: 102547-3-0-2.wav
  Processing anomaly file 17/577: 102547-3-0-7.wav
  Processing anomaly file 18/577: 102547-3-0-8.wav
  Processing anomaly file 19/577: 104998-7-0-5.wav
  Processing ano