In [10]:
import os
import librosa
import numpy as np
import pandas as pd
from tqdm import tqdm

def extract_features(file_name):
    try:
        audio, sample_rate = librosa.load(file_name, res_type='kaiser_fast')

        # MFCC (Mel-frequency cepstral coefficients)
        mfccs = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=40)
        mfccs_processed = np.mean(mfccs.T, axis=0)

        # Chroma feature
        chroma_stft = np.mean(librosa.feature.chroma_stft(y=audio, sr=sample_rate).T, axis=0)

        # Spectral contrast
        spectral_contrast = np.mean(librosa.feature.spectral_contrast(y=audio, sr=sample_rate).T, axis=0)

        # Spectral centroid
        spectral_centroids = np.mean(librosa.feature.spectral_centroid(y=audio, sr=sample_rate).T, axis=0)

        # Zero-crossing rate
        zero_crossing_rate = np.mean(librosa.feature.zero_crossing_rate(y=audio).T, axis=0)

        # Spectral rolloff
        spectral_rolloff = np.mean(librosa.feature.spectral_rolloff(y=audio, sr=sample_rate).T, axis=0)

        # Combine all features into a 1D array
        features = np.hstack([mfccs_processed, chroma_stft, spectral_contrast, spectral_centroids, zero_crossing_rate, spectral_rolloff])

        return features
    except Exception as e:
        print(f"Error encountered while parsing file: {file_name}")
        return None

# Specify the directory containing the .mp3 files
directory = 'something'

# Create an empty DataFrame to store the features
features_df = pd.DataFrame()

print(f"Processing files in {directory} directory")
for filename in tqdm(os.listdir(directory)):
    if filename.endswith('.wav'):
        file_path = os.path.join(directory, filename)
        try:
            features = extract_features(file_path)
            # Append the features to the DataFrame
            if features is not None:
                features_df = features_df.concat(pd.Series(features), ignore_index=True)
        except Exception as e:
            print(f"Error encountered while processing file: {file_path}")
            continue

# Rename the DataFrame columns as needed
# features_df.columns = [list_of_feature_names]

# Now, you have the features in the 'features_df' DataFrame.


Processing files in something directory


100%|█████████████████████████████████████████████| 1/1 [00:00<00:00,  3.22it/s]

Error encountered while processing file: something/commercialsman.wav





In [11]:
features_df.head()