In [6]:
import os
import librosa
import numpy as np
import soundfile as sf

def detect_taps(audio_data, sample_rate, threshold=0.04, frame_length=2048, hop_length=512):
    """
    Detects tap locations in an audio signal using RMS energy thresholding.

    Parameters:
    - audio_data (np.ndarray): Audio waveform
    - sample_rate (int): Sampling rate of the audio
    - threshold (float): RMS threshold to detect a tap
    - frame_length (int): Frame size for RMS calculation
    - hop_length (int): Hop size for RMS calculation

    Returns:
    - tap_times (np.ndarray): Array of detected tap times in seconds
    - taps (np.ndarray): Array of frame indices corresponding to detected taps
    """
    rms = librosa.feature.rms(y=audio_data, frame_length=frame_length, hop_length=hop_length)[0]
    taps = np.where(rms > threshold)[0]
    tap_times = librosa.frames_to_time(taps, sr=sample_rate, hop_length=hop_length)
    return tap_times, taps

def extract_and_save_taps(audio_path, output_dir, material_type, tap_window=0.1, threshold=0.04):
    """
    Detects tap events in an audio file and saves individual tap segments.

    Parameters:
    - audio_path (str): Path to the audio file (.wav or .mp3)
    - output_dir (str): Directory to save the extracted tap .wav files
    - material_type (str): Material type for tap name segregation
    - tap_window (float): Duration in seconds to extract around each tap
    - threshold (float): RMS energy threshold for tap detection
    """
    # Load audio
    y, sr = librosa.load(audio_path, sr=None)
    tap_times, _ = detect_taps(y, sr, threshold=threshold)

    # Create output directory if not exists
    os.makedirs(output_dir, exist_ok=True)

    for i, tap_time in enumerate(tap_times):
        start_sample = int(tap_time * sr - tap_window * sr / 2)
        end_sample = int(tap_time * sr + tap_window * sr / 2)
        start_sample = max(start_sample, 0)
        end_sample = min(end_sample, len(y))
        
        segment = y[start_sample:end_sample]
        output_path = os.path.join(output_dir, f"{material_type}_tap_{i+1}.wav")
        sf.write(output_path, segment, sr)
        print(f"[✓] Saved: {output_path} at {tap_time:.2f}s")

def compute_mel_spectrogram(segment, sr, n_mels=128, fmax=8000):
    """
    Computes the Mel-spectrogram of a tap audio segment.

    Parameters:
    - segment (np.ndarray): Audio segment
    - sr (int): Sample rate
    - n_mels (int): Number of Mel bands
    - fmax (int): Maximum frequency

    Returns:
    - np.ndarray: Flattened Mel-spectrogram in decibels
    """
    S = librosa.feature.melspectrogram(y=segment, sr=sr, n_mels=n_mels, fmax=fmax)
    S_dB = librosa.power_to_db(S, ref=np.max)
    return S_dB.flatten()


# Extract individual good audio taps from a source audio

In [7]:
resource = './resources/material/train-data'
audio_path = f'{resource}/good-material.wav'
output_dir = f'{resource}/good-material-taps'

extract_and_save_taps(audio_path, output_dir, "good")


[✓] Saved: ./resources/material/train-data/good-material-taps/good_tap_1.wav at 1.70s
[✓] Saved: ./resources/material/train-data/good-material-taps/good_tap_2.wav at 1.71s
[✓] Saved: ./resources/material/train-data/good-material-taps/good_tap_3.wav at 1.72s
[✓] Saved: ./resources/material/train-data/good-material-taps/good_tap_4.wav at 1.73s
[✓] Saved: ./resources/material/train-data/good-material-taps/good_tap_5.wav at 2.04s
[✓] Saved: ./resources/material/train-data/good-material-taps/good_tap_6.wav at 2.05s
[✓] Saved: ./resources/material/train-data/good-material-taps/good_tap_7.wav at 2.30s
[✓] Saved: ./resources/material/train-data/good-material-taps/good_tap_8.wav at 2.31s
[✓] Saved: ./resources/material/train-data/good-material-taps/good_tap_9.wav at 2.32s
[✓] Saved: ./resources/material/train-data/good-material-taps/good_tap_10.wav at 2.33s
[✓] Saved: ./resources/material/train-data/good-material-taps/good_tap_11.wav at 2.35s
[✓] Saved: ./resources/material/train-data/good-mate

# Extract individual bad audio taps from a source audio

In [8]:
resource = './resources/material/train-data'
audio_path = f'{resource}/bad-material.wav'
output_dir = f'{resource}/bad-material-taps'

extract_and_save_taps(audio_path, output_dir, "bad")


[✓] Saved: ./resources/material/train-data/bad-material-taps/bad_tap_1.wav at 0.55s
[✓] Saved: ./resources/material/train-data/bad-material-taps/bad_tap_2.wav at 0.56s
[✓] Saved: ./resources/material/train-data/bad-material-taps/bad_tap_3.wav at 0.57s
[✓] Saved: ./resources/material/train-data/bad-material-taps/bad_tap_4.wav at 0.58s
[✓] Saved: ./resources/material/train-data/bad-material-taps/bad_tap_5.wav at 0.59s
[✓] Saved: ./resources/material/train-data/bad-material-taps/bad_tap_6.wav at 0.82s
[✓] Saved: ./resources/material/train-data/bad-material-taps/bad_tap_7.wav at 0.84s
[✓] Saved: ./resources/material/train-data/bad-material-taps/bad_tap_8.wav at 0.85s
[✓] Saved: ./resources/material/train-data/bad-material-taps/bad_tap_9.wav at 0.86s
[✓] Saved: ./resources/material/train-data/bad-material-taps/bad_tap_10.wav at 0.87s
[✓] Saved: ./resources/material/train-data/bad-material-taps/bad_tap_11.wav at 1.10s
[✓] Saved: ./resources/material/train-data/bad-material-taps/bad_tap_12.wa