In [1]:
import soundfile as sf
import numpy as np
import pandas as pd
import os
from pathlib import Path
import tensorflow as tf
import tensorflow_hub as hub
import resampy

# Load YAMNet model
print("Loading YAMNet model...")
yamnet_model = hub.load('https://tfhub.dev/google/yamnet/1')
yamnet_classes = yamnet_model.graph.get_output_by_name('class_scores').name

# Define your custom class labels and mapping to YAMNet classes (approximate)
custom_labels = [
    "bicycle", "Motorcycle", "car", "pickup", "SUV", "PSV", "Buses",
    "Light Trucks", "Medium trucks", "Heavy trucks", "car_horn",
    "drilling", "engine_idling", "siren"
]
class_id_map = {i: label for i, label in enumerate(custom_labels)}  # 0 to 13
yamnet_to_custom = {
    247: "car_horn",    # Car horn
    295: "siren",       # Siren
    149: "engine_idling",  # Engine (approximate)
    256: "Motorcycle",  # Motorcycle
    # Add more mappings if available, or use defaults for others
}
default_class_id = 0  # Default to "bicycle" if no match

def predict_class(audio, sr):
    """Predict class label using YAMNet and map to custom labels."""
    # Resample to 16000 Hz (YAMNet's expected sample rate)
    if sr != 16000:
        audio = resampy.resample(audio, sr, 16000)
        sr = 16000
    
    # Ensure audio is in the correct shape [samples, channels]
    if len(audio.shape) == 1:
        audio = np.expand_dims(audio, axis=1)
    audio = audio.astype(np.float32)

    # Get YAMNet scores
    scores, embeddings, spectrogram = yamnet_model(audio)
    predicted_class = np.argmax(scores, axis=1)[0]  # Top prediction
    class_name = yamnet_to_custom.get(predicted_class, custom_labels[default_class_id])
    class_id = custom_labels.index(class_name)
    return class_name, class_id

def split_audio_and_create_metadata(audio_dir, output_dir, class_labels, chunk_length=6, overlap=0):
    """
    Split all WAV files into chunks, predict class labels with YAMNet, and create a single metadata file.
    
    Parameters:
    - audio_dir: Directory containing WAV audio files.
    - output_dir: Directory to save road folders, audio chunks, and metadata.
    - class_labels: Dictionary mapping audio filenames to (class, classID) tuples, or path to a CSV.
    - chunk_length: Length of each chunk in seconds (default: 6).
    - overlap: Overlap between chunks in seconds (default: 0).
    """
    
    # Verify audio_dir exists
    if not os.path.exists(audio_dir):
        print(f"Error: audio_dir '{audio_dir}' does not exist.")
        return
    
    # Create output directory if it doesn't exist
    try:
        os.makedirs(output_dir, exist_ok=True)
        print(f"Output directory created or exists: {output_dir}")
    except Exception as e:
        print(f"Error creating output_dir '{output_dir}': {e}")
        return
    
    # Initialize metadata list
    metadata = []
    
    # Handle class labels (optional override)
    if isinstance(class_labels, str):
        try:
            label_df = pd.read_csv(class_labels)
            class_labels = {row['filename']: (row['class'], row['classID']) for _, row in label_df.iterrows()}
            print(f"Loaded class labels from CSV: {class_labels}")
        except Exception as e:
            print(f"Error loading class_labels CSV '{class_labels}': {e}")
            return
    
    # Find all audio files in audio_dir
    audio_files = [str(p) for p in Path(audio_dir).glob("*.wav")]
    print(f"Found {len(audio_files)} WAV files in {audio_dir}: {audio_files}")
    
    if not audio_files:
        print("No WAV files found. Exiting.")
        return
    
    # Process each audio file
    for audio_path in audio_files:
        audio_filename = os.path.basename(audio_path)
        print(f"Processing file: {audio_filename}")
        
        # Get class label and class id from dictionary (overridden by prediction)
        class_label, class_id = class_labels.get(audio_filename, ("unknown", -1))
        print(f"Initial class_label: {class_label}, class_id: {class_id}")
        
        # Generate fsID
        fsID = audio_filename.split('-')[0] if '-' in audio_filename else audio_filename.split('.')[0]
        print(f"fsID: {fsID}")
        
        # Define road name as the filename without extension
        road_name = os.path.splitext(audio_filename)[0]
        road_dir = os.path.join(output_dir, road_name)
        try:
            os.makedirs(road_dir, exist_ok=True)
            print(f"Created road directory: {road_dir}")
        except Exception as e:
            print(f"Error creating road_dir '{road_dir}': {e}")
            continue
        
        # Load audio file with soundfile
        try:
            audio, sr = sf.read(audio_path)
            audio_duration = len(audio) / sr
            print(f"Loaded audio with soundfile: {audio_path}, duration: {audio_duration}s, sample rate: {sr}Hz")
        except Exception as e:
            print(f"Error loading audio file '{audio_path}' with soundfile: {e}")
            continue
        
        # Calculate number of samples per chunk
        samples_per_chunk = int(chunk_length * sr)
        step_size = int((chunk_length - overlap) * sr)
        print(f"Samples per chunk: {samples_per_chunk}, step size: {step_size}")
        
        # Split audio into chunks
        for i in range(0, len(audio) - samples_per_chunk + 1, step_size):
            start_sample = i
            end_sample = i + samples_per_chunk
            
            # Extract chunk
            chunk = audio[start_sample:end_sample]
            
            # Define chunk filename
            chunk_index = i // step_size
            slice_file_name = f"{fsID}-{class_id}-{chunk_index}.wav"
            chunk_path = os.path.join(road_dir, slice_file_name)
            
            # Save chunk as WAV file
            try:
                sf.write(chunk_path, chunk, sr)
                print(f"Saved chunk: {chunk_path}")
            except Exception as e:
                print(f"Error saving chunk '{chunk_path}': {e}")
                continue
            
            # Predict class for this chunk
            try:
                predicted_class, predicted_id = predict_class(chunk, sr)
                print(f"Predicted class for {slice_file_name}: {predicted_class}, ID: {predicted_id}")
            except Exception as e:
                print(f"Error predicting class for '{chunk_path}': {e}")
                predicted_class, predicted_id = "unknown", -1
            
            # Calculate start and end times
            start_time = start_sample / sr
            end_time = end_sample / sr
            
            # Append to metadata with predicted values
            metadata.append({
                "slice_file_name": slice_file_name,
                "fsID": fsID,
                "start": start_time,
                "end": end_time,
                "road": road_name,
                "classID": predicted_id,
                "class": predicted_class
            })
            print(f"Added metadata for chunk: {slice_file_name}")
        
        # Handle the last chunk if audio length is not perfectly divisible
        if len(audio) % step_size > 0:
            start_sample = len(audio) - samples_per_chunk
            if start_sample >= 0:
                chunk = audio[start_sample:]
                chunk_index = len(metadata)
                slice_file_name = f"{fsID}-{class_id}-{chunk_index}.wav"
                chunk_path = os.path.join(road_dir, slice_file_name)
                
                try:
                    sf.write(chunk_path, chunk, sr)
                    print(f"Saved last chunk: {chunk_path}")
                except Exception as e:
                    print(f"Error saving last chunk '{chunk_path}': {e}")
                    continue
                
                # Predict class for this chunk
                try:
                    predicted_class, predicted_id = predict_class(chunk, sr)
                    print(f"Predicted class for {slice_file_name}: {predicted_class}, ID: {predicted_id}")
                except Exception as e:
                    print(f"Error predicting class for '{chunk_path}': {e}")
                    predicted_class, predicted_id = "unknown", -1
                
                start_time = start_sample / sr
                end_time = len(audio) / sr
                metadata.append({
                    "slice_file_name": slice_file_name,
                    "fsID": fsID,
                    "start": start_time,
                    "end": end_time,
                    "road": road_name,
                    "classID": predicted_id,
                    "class": predicted_class
                })
                print(f"Added metadata for last chunk: {slice_file_name}")
    
    # Create metadata DataFrame and save to CSV
    if metadata:
        metadata_df = pd.DataFrame(metadata)
        metadata_path = os.path.join(output_dir, "metadata.csv")
        try:
            metadata_df.to_csv(metadata_path, index=False)
            print(f"Metadata saved to {metadata_path}")
            print(f"Created {len(metadata)} chunks from {len(audio_files)} files.")
        except Exception as e:
            print(f"Error saving metadata to '{metadata_path}': {e}")
    else:
        print("No metadata to save. No chunks were created.")

# Example usage
audio_dir = "data/DATA"  # Directory with WAV files
output_dir = "data/DATA/output_chunks"  # Directory to save road folders, chunks, and metadata
class_labels = {}  # Optional, overridden by predictions
chunk_length = 6  # Chunk length in seconds
overlap = 0       # Overlap in seconds

split_audio_and_create_metadata(audio_dir, output_dir, class_labels, chunk_length, overlap)

  from pkg_resources import parse_version



Loading YAMNet model...












AttributeError: '_UserObject' object has no attribute 'graph'