In [6]:
import librosa
import pandas as pd
import numpy as np
from tqdm import tqdm
import os

In [2]:
audio_path= r"E:\Me\coding\jupyter\Environmental_Sound_Classification\dataset\UrbanSound8K\audio"
metadata_path= r"E:\Me\coding\jupyter\Environmental_Sound_Classification\dataset\UrbanSound8K\metadata\UrbanSound8K.csv"

In [3]:
metadata_df= pd.read_csv(metadata_path, usecols=["slice_file_name", "fold", "classID"],dtype={"fold": "uint8", "classID" : "uint8"})
metadata_df

Unnamed: 0,slice_file_name,fold,classID
0,100032-3-0-0.wav,5,3
1,100263-2-0-117.wav,5,2
2,100263-2-0-121.wav,5,2
3,100263-2-0-126.wav,5,2
4,100263-2-0-137.wav,5,2
...,...,...,...
8727,99812-1-2-0.wav,7,1
8728,99812-1-3-0.wav,7,1
8729,99812-1-4-0.wav,7,1
8730,99812-1-5-0.wav,7,1


In [4]:
def extract_mfcc(file_path, sr=22050, n_mfcc=120, max_len=173):
    try:
        y, sr = librosa.load(file_path, sr=sr)
        mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=n_mfcc)
        mfcc = (mfcc - np.mean(mfcc)) / np.std(mfcc)  # Standardization
        
        # Fix length (pad or truncate to max_len time steps)
        if mfcc.shape[1] < max_len:
            pad_width = max_len - mfcc.shape[1]
            mfcc = np.pad(mfcc, pad_width=((0, 0), (0, pad_width)), mode='constant')
        else:
            mfcc = mfcc[:, :max_len]
        
        # Flatten to 1D vector (important for ANN)
        return mfcc.flatten()
    except Exception as e:
        print("Error processing", file_path, e)
        return None


In [7]:
X = []
y = []

for index, row in tqdm(metadata_df.iterrows(), total=len(metadata_df)):
    fold = f"fold{row['fold']}"
    file_path = os.path.join(audio_path, fold, row['slice_file_name'])

    mfcc_features = extract_mfcc(file_path)
    
    if mfcc_features is not None:
        X.append(mfcc_features)
        y.append(row['classID'])

X = np.array(X)
y = np.array(y)

100%|██████████| 8732/8732 [02:40<00:00, 54.33it/s]


In [11]:
X = np.array(X, dtype=np.float32)
y = np.array(y, dtype=np.int64)

# Save to disk
np.save("X_data.npy", X)
np.save("y_data.npy", y)

print("✅ Data saved: X_data.npy and y_data.npy")

✅ Data saved: X_data.npy and y_data.npy
