In [47]:
import librosa
import numpy as np
import matplotlib.pyplot as plt
from sklearn.manifold import TSNE
from sklearn.preprocessing import StandardScaler
import librosa.display
from mpl_toolkits.mplot3d import Axes3D
import plotly.express as px
import plotly.graph_objects as go
import os

In [48]:
def load_and_truncate_audio(audio_file, target_duration=3.0, sr=44100):
    """
    Load an audio file and truncate it to a specific duration (in seconds).
    
    Parameters:
    - audio_file: Path to the audio file.
    - target_duration: Duration to which the audio should be truncated (in seconds).
    - sr: Sampling rate to use for loading the audio.
    
    Returns:
    - y: The truncated audio signal.
    - sr: The sampling rate.
    """
    y, sr = librosa.load(audio_file, sr=sr)
    target_samples = int(target_duration * sr)
    
    if len(y) > target_samples:
        y = y[:target_samples]  # Truncate to target length
    else:
        # Optionally, pad the audio if it's too short
        y = np.pad(y, (0, target_samples - len(y)), 'constant', constant_values=0)
    
    return y, sr

In [49]:
def extract_audio_features(audio_file, target_duration=3.0):
    """
    Extract audio features from a file after truncating the audio to the target duration.
    
    Parameters:
    - audio_file: Path to the audio file.
    - target_duration: Duration to which the audio should be truncated (in seconds).
    
    Returns:
    - features: A flattened array of features extracted from the audio.
    """
    y, sr = load_and_truncate_audio(audio_file, target_duration)

    # Spectral Centroid
    spectral_centroid = librosa.feature.spectral_centroid(y=y, sr=sr)
    
    # Spectral Bandwidth
    spectral_bandwidth = librosa.feature.spectral_bandwidth(y=y, sr=sr)
    
    # Spectral Rolloff
    spectral_rolloff = librosa.feature.spectral_rolloff(y=y, sr=sr)
    
    # MFCC (Mel-frequency cepstral coefficients)
    mfccs = librosa.feature.mfcc(y=y, sr=sr)
    
    # Chroma (chroma features)
    chroma = librosa.feature.chroma_stft(y=y, sr=sr)
    
    # Root Mean Square Energy
    rms = np.mean(librosa.feature.rms(y=y), axis=1)
    
    # Flatten all features to 1D arrays
    features = np.concatenate([
        spectral_centroid.flatten(), 
        spectral_bandwidth.flatten(), 
        spectral_rolloff.flatten(), 
        mfccs.flatten(), 
        chroma.flatten(), 
        rms.flatten()
    ])
    
    return features

In [50]:
# Function to extract features from a list of audio files
def extract_features_from_files(audio_files, target_duration=3.0):
    """
    Extract features from a list of audio files.
    
    Parameters:
    - audio_files: List of paths to audio files.
    - target_duration: Duration to which the audio should be truncated (in seconds).
    
    Returns:
    - features_list: A 2D numpy array of features.
    """
    features_list = []
    
    for audio_file in audio_files:
        features = extract_audio_features(audio_file, target_duration)
        features_list.append(features)
    
    return np.array(features_list)

In [51]:
def visualize_audio(features, name='Audio', n=6):

    # Normalize the feature data (important for t-SNE)
    scaler = StandardScaler()
    features_scaled = scaler.fit_transform(features)

    # Apply t-SNE for dimensionality reduction
    tsne = TSNE(n_components=3, random_state=42, perplexity=10)
    features_tsne = tsne.fit_transform(features_scaled)

    # Generate different colors based on audio type (Clap vs Echo)
    colors = ['blue'] * n if 'clap' in name.lower() else ['red'] * n

    # Create a 3D scatter plot with plotly
    fig = go.Figure()

    # Add scatter3d for the points with color based on audio type
    fig.add_trace(go.Scatter3d(
        x=features_tsne[:, 0], 
        y=features_tsne[:, 1], 
        z=features_tsne[:, 2],
        mode='markers',
        marker=dict(size=8, color=colors, opacity=0.8)
    ))

    # # Add surface (shading) to the plot
    # fig.add_trace(go.Mesh3d(
    #     x=features_tsne[:, 0], 
    #     y=features_tsne[:, 1], 
    #     z=features_tsne[:, 2],
    #     color='green', 
    #     opacity=0.5, 
    #     alphahull=5  # Control the convex hull of the surface
    # ))

    # Update the layout for better visualization
    fig.update_layout(
        title=f'3D Plot of {name} Features using t-SNE with Shading',
        scene=dict(
            xaxis_title='t-SNE 1',
            yaxis_title='t-SNE 2',
            zaxis_title='t-SNE 3'
        )
    )

    # Show the plot interactively
    fig.show()

In [52]:
# List of audio files for "Clap" and "Echo" categories

original_claps =['../data/claps/clap-1.wav','../data/claps/clap-2.wav','../data/claps/clap-3.wav','../data/claps/clap-4.wav','../data/claps/clap-5.wav','../data/claps/clap-6.wav']
filtered_claps =['../data/filtered_claps/clap_1.wav', '../data/filtered_claps/clap_2.wav', '../data/filtered_claps/clap_3.wav', '../data/filtered_claps/clap_4.wav', '../data/filtered_claps/clap_5.wav', '../data/filtered_claps/clap_6.wav']

augmented_claps_folder_path = '../data/augmentedClaps'
all_augmented_clap_files = os.listdir(augmented_claps_folder_path)
augmented_claps_abs = [f for f in all_augmented_clap_files if os.path.isfile(os.path.join(augmented_claps_folder_path, f))]
augmented_claps=[]

for c in augmented_claps_abs:
    augmented_claps.append(augmented_claps_folder_path+'/'+c)

claps = original_claps+filtered_claps+ augmented_claps


# Extract features from the audio files
clap_audio_features = extract_features_from_files(claps,0.055)

# Visualize the audio features for "Clap" and "Echo"
visualize_audio(clap_audio_features, 'Clap Audio', len(claps))

In [53]:
original_echoes =['../data/echoes/echo-1.wav','../data/echoes/echo-2.wav','../data/echoes/echo-3.wav','../data/echoes/echo-4.wav','../data/echoes/echo-5.wav','../data/echoes/echo-6.wav']
filtered_echoes =['../data/filtered_echoes/echo_1.wav', '../data/filtered_echoes/echo_2.wav', '../data/filtered_echoes/echo_3.wav', '../data/filtered_echoes/echo_4.wav', '../data/filtered_echoes/echo_5.wav', '../data/filtered_echoes/echo_6.wav']

augmented_echoes_folder_path = '../data/augmentedEchoes'
all_augmented_echo_files = os.listdir(augmented_echoes_folder_path)
augmented_echoes_abs = [f for f in all_augmented_echo_files if os.path.isfile(os.path.join(augmented_echoes_folder_path, f))]
augmented_echoes=[]

for c in augmented_echoes_abs:
    augmented_echoes.append(augmented_echoes_folder_path+'/'+c)

echoes =  original_echoes + filtered_echoes + augmented_echoes

echo_audio_features = extract_features_from_files(echoes,0.15)

visualize_audio(echo_audio_features, 'Echo Audio',len(echoes))