In [223]:
import librosa
import numpy as np
import matplotlib.pyplot as plt
from sklearn.manifold import TSNE
from sklearn.preprocessing import StandardScaler
import librosa.display
from mpl_toolkits.mplot3d import Axes3D
import plotly.express as px
import plotly.graph_objects as go
import os
from sklearn.ensemble import IsolationForest
import shutil


In [202]:
CLAP_DURATION = 0.055
ECHO_DURATION = 0.15

In [203]:
def load_and_truncate_audio(audio_file, target_duration=3.0, sr=44100):
    """
    Load an audio file and truncate it to a specific duration (in seconds).
    
    Parameters:
    - audio_file: Path to the audio file.
    - target_duration: Duration to which the audio should be truncated (in seconds).
    - sr: Sampling rate to use for loading the audio.
    
    Returns:
    - y: The truncated audio signal.
    - sr: The sampling rate.
    """
    y, sr = librosa.load(audio_file, sr=sr)
    target_samples = int(target_duration * sr)
    
    if len(y) > target_samples:
        y = y[:target_samples]  # Truncate to target length
    else:
        # Optionally, pad the audio if it's too short
        y = np.pad(y, (0, target_samples - len(y)), 'constant', constant_values=0)
    
    return y, sr

In [204]:
def extract_audio_features(audio_file, target_duration=3.0):
    """
    Extract audio features from a file after truncating the audio to the target duration.
    
    Parameters:
    - audio_file: Path to the audio file.
    - target_duration: Duration to which the audio should be truncated (in seconds).
    
    Returns:
    - features: A flattened array of features extracted from the audio.
    """
    y, sr = load_and_truncate_audio(audio_file, target_duration)

    # Spectral Centroid
    spectral_centroid = librosa.feature.spectral_centroid(y=y, sr=sr)
    
    # Spectral Bandwidth
    spectral_bandwidth = librosa.feature.spectral_bandwidth(y=y, sr=sr)
    
    # Spectral Rolloff
    spectral_rolloff = librosa.feature.spectral_rolloff(y=y, sr=sr)
    
    # MFCC (Mel-frequency cepstral coefficients)
    mfccs = librosa.feature.mfcc(y=y, sr=sr)
    
    # Chroma (chroma features)
    chroma = librosa.feature.chroma_stft(y=y, sr=sr)
    
    # Root Mean Square Energy
    rms = np.mean(librosa.feature.rms(y=y), axis=1)
    
    # Flatten all features to 1D arrays
    features = np.concatenate([
        spectral_centroid.flatten(), 
        spectral_bandwidth.flatten(), 
        spectral_rolloff.flatten(), 
        mfccs.flatten(), 
        chroma.flatten(), 
        rms.flatten()
    ])
    
    return features

In [205]:
# Function to extract features from a list of audio files
def extract_features_from_files(audio_files, target_duration=3.0):
    """
    Extract features from a list of audio files.
    
    Parameters:
    - audio_files: List of paths to audio files.
    - target_duration: Duration to which the audio should be truncated (in seconds).
    
    Returns:
    - features_list: A 2D numpy array of features.
    """
    features_list = []
    
    for audio_file in audio_files:
        features = extract_audio_features(audio_file, target_duration)
        features_list.append(features)
    
    return np.array(features_list)

In [206]:
def visualize_audio(features, name='Audio', n=6):

    # Normalize the feature data (important for t-SNE)
    scaler = StandardScaler()
    features_scaled = scaler.fit_transform(features)

    # Apply t-SNE for dimensionality reduction
    tsne = TSNE(n_components=3, random_state=42, perplexity=10)
    features_tsne = tsne.fit_transform(features_scaled)

    # Generate different colors based on audio type (Clap vs Echo)
    colors = ['blue'] * n if 'clap' in name.lower() else ['red'] * n

    # Create a 3D scatter plot with plotly
    fig = go.Figure()

    # Add scatter3d for the points with color based on audio type
    fig.add_trace(go.Scatter3d(
        x=features_tsne[:, 0], 
        y=features_tsne[:, 1], 
        z=features_tsne[:, 2],
        mode='markers',
        marker=dict(size=8, color=colors, opacity=0.8)
    ))

    # # Add surface (shading) to the plot
    # fig.add_trace(go.Mesh3d(
    #     x=features_tsne[:, 0], 
    #     y=features_tsne[:, 1], 
    #     z=features_tsne[:, 2],
    #     color='green', 
    #     opacity=0.5, 
    #     alphahull=5  # Control the convex hull of the surface
    # ))

    # Update the layout for better visualization
    fig.update_layout(
        title=f'3D Plot of {name} Features using t-SNE with Shading',
        scene=dict(
            xaxis_title='t-SNE 1',
            yaxis_title='t-SNE 2',
            zaxis_title='t-SNE 3'
        )
    )

    # Show the plot interactively
    fig.show()

In [212]:
def detect_non_anomalies(precomputed_features, new_audio_files, threshold=3.0, target_duration=3.0, batch_size=10):
    """
    Detect non-anomalous audio samples based on precomputed features.
    
    Parameters:
    - precomputed_features: A 2D array where each row represents the extracted features of a reference audio file.
    - new_audio_files: List of paths to new audio files to check for non-anomalies.
    - threshold: Z-score threshold for identifying non-anomalous files.
    - target_duration: Duration to which the audio should be truncated (in seconds).
    - batch_size: Number of audio files to process in a single batch (for efficiency).
    
    Returns:
    - non_anomalous_audio: List of non-anomalous audio files.
    """
    # Step 1: Convert precomputed features to a NumPy array if not already
    features_matrix = np.array(precomputed_features)

    # Step 2: Calculate mean and standard deviation for each feature across reference files
    feature_means = np.mean(features_matrix, axis=0)
    feature_stds = np.std(features_matrix, axis=0)

    # Step 3: Handle features with zero standard deviation (no variation)
    feature_stds = np.where(feature_stds == 0, 1, feature_stds)  # Prevent division by zero

    # Step 4: Initialize an empty list to store non-anomalous audio files
    non_anomalous_audio = []

    # Step 5: Batch process the new audio files
    for i in range(0, len(new_audio_files), batch_size):
        batch_files = new_audio_files[i:i + batch_size]
        
        # Extract features for the current batch
        batch_features = np.array([
            extract_audio_features(file, target_duration) for file in batch_files
        ])
        
        # Calculate Z-scores for the batch
        z_scores_batch = (batch_features - feature_means) / feature_stds
        
        # Identify non-anomalous files in the batch
        for j, z_scores in enumerate(z_scores_batch):
            if np.all(np.abs(z_scores) <= threshold):
                non_anomalous_audio.append(batch_files[j])

    return non_anomalous_audio


In [208]:
# List of audio files for "Clap" and "Echo" categories

original_claps =['../data/claps/clap-1.wav','../data/claps/clap-2.wav','../data/claps/clap-3.wav','../data/claps/clap-4.wav','../data/claps/clap-5.wav','../data/claps/clap-6.wav']
filtered_claps =['../data/filtered_claps/clap_1.wav', '../data/filtered_claps/clap_2.wav', '../data/filtered_claps/clap_3.wav', '../data/filtered_claps/clap_4.wav', '../data/filtered_claps/clap_5.wav', '../data/filtered_claps/clap_6.wav']

claps = original_claps + filtered_claps

# Extract features from the audio files
clap_audio_features = extract_features_from_files(claps,CLAP_DURATION)

# Visualize the audio features for "Clap" and "Echo"
visualize_audio(clap_audio_features, 'Clap Audio', len(claps))

In [209]:
original_echoes =['../data/echoes/echo-1.wav','../data/echoes/echo-2.wav','../data/echoes/echo-3.wav','../data/echoes/echo-4.wav','../data/echoes/echo-5.wav','../data/echoes/echo-6.wav']
filtered_echoes =['../data/filtered_echoes/echo_1.wav', '../data/filtered_echoes/echo_2.wav', '../data/filtered_echoes/echo_3.wav', '../data/filtered_echoes/echo_4.wav', '../data/filtered_echoes/echo_5.wav', '../data/filtered_echoes/echo_6.wav']

echoes =  original_echoes + filtered_echoes

echo_audio_features = extract_features_from_files(echoes,ECHO_DURATION)

visualize_audio(echo_audio_features, 'Echo Audio',len(echoes))

In [210]:
augmented_claps_folder_path = '../data/augmentedClaps'
all_augmented_clap_files = os.listdir(augmented_claps_folder_path)
augmented_claps_abs = [f for f in all_augmented_clap_files if os.path.isfile(os.path.join(augmented_claps_folder_path, f))]
augmented_claps=[]

for c in augmented_claps_abs:
    augmented_claps.append(augmented_claps_folder_path+'/'+c)

In [213]:
non_anomalous_clap_audio = detect_non_anomalies(clap_audio_features, augmented_claps, threshold=3.0, target_duration=CLAP_DURATION, batch_size=100)

In [214]:
non_anomalous_clap_audio_features = extract_features_from_files(non_anomalous_clap_audio,0.15)
print(len(non_anomalous_clap_audio))

visualize_audio(non_anomalous_clap_audio_features, 'New Clap Audio',len(non_anomalous_clap_audio))

33


In [215]:
augmented_echoes_folder_path = '../data/augmentedEchoes'
all_augmented_echo_files = os.listdir(augmented_echoes_folder_path)
augmented_echoes_abs = [f for f in all_augmented_echo_files if os.path.isfile(os.path.join(augmented_echoes_folder_path, f))]
augmented_echoes=[]

for c in augmented_echoes_abs:
    augmented_echoes.append(augmented_echoes_folder_path+'/'+c)

In [216]:
non_anomalous_echo_audio = detect_non_anomalies(echo_audio_features, augmented_echoes, threshold=3, target_duration=ECHO_DURATION,batch_size=100)

In [217]:
non_anomalous_echo_audio_features = extract_features_from_files(non_anomalous_echo_audio,0.15)
print(len(non_anomalous_echo_audio))
visualize_audio(non_anomalous_echo_audio_features, 'new echo audio',len(non_anomalous_echo_audio))

42


In [218]:
non_anomalous_clap_audio

['../data/augmentedClaps/clap_1_scaled_0.5.wav',
 '../data/augmentedClaps/clap_1_scaled_0.7.wav',
 '../data/augmentedClaps/clap_1_scaled_0.9.wav',
 '../data/augmentedClaps/clap_1_scaled_1.2.wav',
 '../data/augmentedClaps/clap_2_scaled_0.5.wav',
 '../data/augmentedClaps/clap_2_scaled_0.7.wav',
 '../data/augmentedClaps/clap_2_scaled_0.9.wav',
 '../data/augmentedClaps/clap_2_scaled_1.2.wav',
 '../data/augmentedClaps/clap_3_scaled_0.2.wav',
 '../data/augmentedClaps/clap_3_scaled_0.5.wav',
 '../data/augmentedClaps/clap_3_scaled_0.7.wav',
 '../data/augmentedClaps/clap_3_scaled_0.9.wav',
 '../data/augmentedClaps/clap_3_scaled_1.2.wav',
 '../data/augmentedClaps/clap_3_scaled_1.5.wav',
 '../data/augmentedClaps/clap_3_scaled_1.7.wav',
 '../data/augmentedClaps/clap_3_scaled_1.9.wav',
 '../data/augmentedClaps/clap_4_scaled_0.2.wav',
 '../data/augmentedClaps/clap_4_scaled_0.5.wav',
 '../data/augmentedClaps/clap_4_scaled_0.7.wav',
 '../data/augmentedClaps/clap_4_scaled_0.9.wav',
 '../data/augmentedC

In [219]:
non_anomalous_echo_audio

['../data/augmentedEchoes/echo_1_scaled_0.2.wav',
 '../data/augmentedEchoes/echo_1_scaled_0.5.wav',
 '../data/augmentedEchoes/echo_1_scaled_0.7.wav',
 '../data/augmentedEchoes/echo_1_scaled_0.9.wav',
 '../data/augmentedEchoes/echo_1_scaled_1.2.wav',
 '../data/augmentedEchoes/echo_1_scaled_1.5.wav',
 '../data/augmentedEchoes/echo_1_scaled_1.7.wav',
 '../data/augmentedEchoes/echo_1_scaled_1.9.wav',
 '../data/augmentedEchoes/echo_2_scaled_0.5.wav',
 '../data/augmentedEchoes/echo_2_scaled_0.7.wav',
 '../data/augmentedEchoes/echo_2_scaled_0.9.wav',
 '../data/augmentedEchoes/echo_2_scaled_1.2.wav',
 '../data/augmentedEchoes/echo_2_scaled_1.5.wav',
 '../data/augmentedEchoes/echo_2_scaled_1.7.wav',
 '../data/augmentedEchoes/echo_2_scaled_1.9.wav',
 '../data/augmentedEchoes/echo_3_scaled_0.5.wav',
 '../data/augmentedEchoes/echo_3_scaled_0.7.wav',
 '../data/augmentedEchoes/echo_3_scaled_0.9.wav',
 '../data/augmentedEchoes/echo_3_scaled_1.2.wav',
 '../data/augmentedEchoes/echo_3_scaled_1.5.wav',


New Claps: ['../data/augmentedClaps/clap_3_scaled_0.2.wav', '../data/augmentedClaps/clap_4_scaled_0.2.wav', '../data/augmentedClaps/clap_1_scaled_0.5.wav', '../data/augmentedClaps/clap_2_scaled_0.5.wav', '../data/augmentedClaps/clap_3_scaled_0.5.wav', '../data/augmentedClaps/clap_4_scaled_0.5.wav', '../data/augmentedClaps/clap_5_scaled_0.5.wav', '../data/augmentedClaps/clap_1_scaled_0.7.wav', '../data/augmentedClaps/clap_2_scaled_0.7.wav', '../data/augmentedClaps/clap_3_scaled_0.7.wav', '../data/augmentedClaps/clap_4_scaled_0.7.wav', '../data/augmentedClaps/clap_5_scaled_0.7.wav', '../data/augmentedClaps/clap_6_scaled_0.7.wav', '../data/augmentedClaps/clap_1_scaled_0.9.wav', '../data/augmentedClaps/clap_2_scaled_0.9.wav', '../data/augmentedClaps/clap_3_scaled_0.9.wav', '../data/augmentedClaps/clap_4_scaled_0.9.wav', '../data/augmentedClaps/clap_5_scaled_0.9.wav', '../data/augmentedClaps/clap_6_scaled_0.9.wav', '../data/augmentedClaps/clap_1_scaled_1.2.wav', '../data/augmentedClaps/clap

In [221]:
# Initialize empty lists for storing the matched files
new_claps = []
new_echoes = []

# Loop through the claps and echoes
for i in range(1, 7):  # Assuming we have 6 different clap/echo categories
    for scale_factor in [0.2, 0.5, 0.7, 0.9, 1.2, 1.5, 1.7, 1.9]:  # Scale factors
        # Generate corresponding file paths for clap and echo based on the pattern
        clap_file = f'../data/augmentedClaps/clap_{i}_scaled_{scale_factor}.wav'
        echo_file = f'../data/augmentedEchoes/echo_{i}_scaled_{scale_factor}.wav'
        
        # If both clap and echo are in the non-anomalous lists, add them to the respective arrays
        if clap_file in non_anomalous_clap_audio and echo_file in non_anomalous_echo_audio:
            new_claps.append(clap_file)
            new_echoes.append(echo_file)

# Check the results
print("Matched Claps:", new_claps)
print("Matched Echoes:", new_echoes)


Matched Claps: ['../data/augmentedClaps/clap_1_scaled_0.5.wav', '../data/augmentedClaps/clap_1_scaled_0.7.wav', '../data/augmentedClaps/clap_1_scaled_0.9.wav', '../data/augmentedClaps/clap_1_scaled_1.2.wav', '../data/augmentedClaps/clap_2_scaled_0.5.wav', '../data/augmentedClaps/clap_2_scaled_0.7.wav', '../data/augmentedClaps/clap_2_scaled_0.9.wav', '../data/augmentedClaps/clap_2_scaled_1.2.wav', '../data/augmentedClaps/clap_3_scaled_0.5.wav', '../data/augmentedClaps/clap_3_scaled_0.7.wav', '../data/augmentedClaps/clap_3_scaled_0.9.wav', '../data/augmentedClaps/clap_3_scaled_1.2.wav', '../data/augmentedClaps/clap_3_scaled_1.5.wav', '../data/augmentedClaps/clap_3_scaled_1.7.wav', '../data/augmentedClaps/clap_3_scaled_1.9.wav', '../data/augmentedClaps/clap_4_scaled_0.2.wav', '../data/augmentedClaps/clap_4_scaled_0.5.wav', '../data/augmentedClaps/clap_4_scaled_0.7.wav', '../data/augmentedClaps/clap_4_scaled_0.9.wav', '../data/augmentedClaps/clap_4_scaled_1.2.wav', '../data/augmentedClaps/

In [224]:
# Define the paths for the new folders
claps_dir = '../data/new_claps'
echoes_dir = '../data/new_echoes'

# Create the new directories if they don't exist
os.makedirs(claps_dir, exist_ok=True)
os.makedirs(echoes_dir, exist_ok=True)

# Function to copy files to the new directories
def copy_files(file_list, destination_dir):
    for file in file_list:
        # Copy each file to the destination directory
        shutil.copy(file, os.path.join(destination_dir, os.path.basename(file)))

# Copy the matched claps and echoes
copy_files(new_claps, claps_dir)
copy_files(new_echoes, echoes_dir)

print(f"Claps and Echoes have been copied to '{claps_dir}' and '{echoes_dir}' respectively.")


Claps and Echoes have been copied to '../data/new_claps' and '../data/new_echoes' respectively.
