In [4]:
import numpy as np
import librosa
import os

# Load ESC-50 dataset
def load_audio_files(directory, sample_rate=22050):
    audio_data = []
    for filename in os.listdir(directory):
        if filename.endswith(".wav"):
            file_path = os.path.join(directory, filename)
            y, sr = librosa.load(file_path, sr=sample_rate)
            audio_data.append(y)
    return audio_data

# Directory where ESC-50 is located
directory = "C:\\Users\\prana\\Downloads\\ESC-50-master\\ESC-50-master\\audio"
audio_data = load_audio_files(directory)

# Determine the max length of audio data for padding/truncating
max_length = max(len(y) for y in audio_data)

# Pad or truncate audio data to the same length
audio_data_reshaped = np.array([librosa.util.fix_length(y, size=max_length) for y in audio_data])

# Standardize data by centering (mean subtraction)
audio_data_centered = audio_data_reshaped - np.mean(audio_data_reshaped, axis=0)

# Compute covariance matrix (Note: transpose data to get correct shape)
cov_matrix = np.cov(audio_data_centered, rowvar=False)

# Compute eigenvalues and eigenvectors
eigenvalues, eigenvectors = np.linalg.eig(cov_matrix)

# Sort eigenvectors by descending eigenvalues
sorted_index = np.argsort(eigenvalues)[::-1]
sorted_eigenvectors = eigenvectors[:, sorted_index]

# Project data onto the principal components
pca_data = np.dot(audio_data_centered, sorted_eigenvectors)

# `pca_data` now contains the audio data projected onto the principal components


MemoryError: Unable to allocate 90.6 GiB for an array with shape (110250, 110250) and data type float64

In [5]:
import numpy as np
import librosa
import os
from sklearn.decomposition import IncrementalPCA

# Load ESC-50 dataset
def load_audio_files(directory, sample_rate=22050):
    audio_data = []
    for filename in os.listdir(directory):
        if filename.endswith(".wav"):
            file_path = os.path.join(directory, filename)
            y, sr = librosa.load(file_path, sr=sample_rate)
            mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
            mfccs = np.mean(mfccs.T, axis=0)  # Take mean of MFCCs over time to reduce dimensionality
            audio_data.append(mfccs)
    return np.array(audio_data)

# Directory where ESC-50 is located
directory = "C:\\Users\\prana\\Downloads\\ESC-50-master\\ESC-50-master\\audio"
audio_data = load_audio_files(directory)

# Standardize data by centering (mean subtraction)
audio_data_centered = audio_data - np.mean(audio_data, axis=0)

# Use IncrementalPCA for memory efficiency
ipca = IncrementalPCA(n_components=10, batch_size=10)
pca_data = ipca.fit_transform(audio_data_centered)

# `pca_data` now contains the audio data projected onto the principal components


In [6]:
pca_data

array([[-3.11911637e+02, -3.79127620e+01, -1.12852323e+01, ...,
         9.97082918e-01,  1.63951316e-01,  4.27344377e-01],
       [ 9.63644676e+01, -9.63085399e+01, -4.95671285e+01, ...,
         8.01260437e+00,  1.01772349e+01,  9.98286954e+00],
       [ 3.06245959e+02, -5.12519979e+01,  3.16131445e+01, ...,
        -2.66891943e+00, -4.15354203e-01, -1.52995705e+01],
       ...,
       [ 6.23260994e+00,  1.10800488e+01,  2.59713889e+01, ...,
        -3.07266740e+00, -1.57698479e-02,  7.59055178e-01],
       [ 1.50038199e+02, -1.79925097e+00, -8.70522719e+00, ...,
        -1.84037532e+01, -1.74354390e+01, -1.61836249e+01],
       [-2.01744764e+02, -5.04401629e+01, -1.47589261e+01, ...,
         9.47110003e+00, -7.85521019e+00,  5.39906558e-01]])

In [1]:
import numpy as np
import librosa
import os
import matplotlib.pyplot as plt
from scipy.io import wavfile
import sounddevice as sd

# Load ESC-50 dataset and extract MFCC features
def load_audio_files(directory, sample_rate=22050, n_mfcc=13):
    audio_data = []
    for filename in os.listdir(directory):
        if filename.endswith(".wav"):
            file_path = os.path.join(directory, filename)
            y, sr = librosa.load(file_path, sr=sample_rate)

            # Extract MFCC features
            mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=n_mfcc)
            mfccs_mean = np.mean(mfccs.T, axis=0)  # Average MFCC over time
            audio_data.append(mfccs_mean)
    return np.array(audio_data), sr

# Directory where ESC-50 is located
directory = "C:\\Users\\prana\\Downloads\\ESC-50-master\\ESC-50-master\\audio"
audio_data, sr = load_audio_files(directory)

# Standardize data by centering (mean subtraction)
mean_audio = np.mean(audio_data, axis=0)
audio_data_centered = audio_data - mean_audio

# Compute covariance matrix
cov_matrix = np.dot(audio_data_centered.T, audio_data_centered) / (audio_data_centered.shape[0] - 1)

# Compute eigenvalues and eigenvectors
eigenvalues, eigenvectors = np.linalg.eig(cov_matrix)

# Sort eigenvectors by eigenvalues in descending order
sorted_index = np.argsort(eigenvalues)[::-1]
sorted_eigenvalues = eigenvalues[sorted_index]
sorted_eigenvectors = eigenvectors[:, sorted_index]

# Project data onto the principal components
pca_data = np.dot(audio_data_centered, sorted_eigenvectors)

# Play the first audio after PCA (using first principal component)
sd.play(pca_data[:, 0], sr)
sd.wait()  # Wait until the audio finishes playing


KeyboardInterrupt: 