In [1]:
import librosa
import pandas as pd
import numpy as np
from tqdm import tqdm
import os

In [2]:
audio_path= r"E:\Me\coding\jupyter\Environmental_Sound_Classification\dataset\UrbanSound8K\audio"
metadata_path= r"E:\Me\coding\jupyter\Environmental_Sound_Classification\dataset\UrbanSound8K\metadata\UrbanSound8K.csv"

In [3]:
OUTPUT_PATH = "fold_features"
os.makedirs(OUTPUT_PATH, exist_ok=True)

In [4]:
metadata_df= pd.read_csv(metadata_path, usecols=["slice_file_name", "fold", "classID"],dtype={"fold": "uint8", "classID" : "uint8"})
metadata_df

Unnamed: 0,slice_file_name,fold,classID
0,100032-3-0-0.wav,5,3
1,100263-2-0-117.wav,5,2
2,100263-2-0-121.wav,5,2
3,100263-2-0-126.wav,5,2
4,100263-2-0-137.wav,5,2
...,...,...,...
8727,99812-1-2-0.wav,7,1
8728,99812-1-3-0.wav,7,1
8729,99812-1-4-0.wav,7,1
8730,99812-1-5-0.wav,7,1


In [5]:
def extract_mfcc(file_path, sr=22050, n_mfcc=120, max_len=173):
    try:
        y, sr = librosa.load(file_path, sr=sr)
        mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=n_mfcc)
        mfcc = (mfcc - np.mean(mfcc)) / np.std(mfcc)  # Standardization
        
        # Fix length (pad or truncate to max_len time steps)
        if mfcc.shape[1] < max_len:
            pad_width = max_len - mfcc.shape[1]
            mfcc = np.pad(mfcc, pad_width=((0, 0), (0, pad_width)), mode='constant')
        else:
            mfcc = mfcc[:, :max_len]
        
        # Flatten to 1D vector (important for ANN)
        return mfcc.flatten()
    except Exception as e:
        print("Error processing", file_path, e)
        return None


In [8]:
# Load metadata
metadata = pd.read_csv(metadata_path)

# Process each fold
for fold in range(1, 11):
    fold_df = metadata[metadata['fold'] == fold]
    X, y = [], []

    print(f"Processing Fold {fold}...")

    for _, row in tqdm(fold_df.iterrows(), total=len(fold_df)):
        filepath = os.path.join(audio_path, f"fold{fold}", row["slice_file_name"])
        label = row["classID"]

        features = extract_mfcc(filepath)
        if features is not None:
            X.append(features)
            y.append(label)

    X = np.array(X)
    y = np.array(y)

    np.save(os.path.join(OUTPUT_PATH, f"X_fold{fold}.npy"), X)
    np.save(os.path.join(OUTPUT_PATH, f"y_fold{fold}.npy"), y)

print("✅ Preprocessing completed and saved fold-wise.")

Processing Fold 1...


100%|██████████| 873/873 [00:10<00:00, 85.08it/s] 


Processing Fold 2...


100%|██████████| 888/888 [00:07<00:00, 119.91it/s]


Processing Fold 3...


100%|██████████| 925/925 [00:07<00:00, 120.61it/s]


Processing Fold 4...


100%|██████████| 990/990 [00:09<00:00, 108.19it/s]


Processing Fold 5...


100%|██████████| 936/936 [00:08<00:00, 114.05it/s]


Processing Fold 6...


100%|██████████| 823/823 [00:07<00:00, 116.99it/s]


Processing Fold 7...


100%|██████████| 838/838 [00:07<00:00, 110.86it/s]


Processing Fold 8...


100%|██████████| 806/806 [00:07<00:00, 111.63it/s]


Processing Fold 9...


100%|██████████| 816/816 [00:07<00:00, 113.44it/s]


Processing Fold 10...


100%|██████████| 837/837 [00:07<00:00, 114.51it/s]

✅ Preprocessing completed and saved fold-wise.



