In [None]:
from google.colab import drive
drive.mount("/content/drive")

In [None]:
import pandas as pd
import librosa
import os
import numpy as np
from google.colab import files

In [None]:
def extract_mfcc(file_path, sr=22050, n_mfcc=40, min_n_fft=1024):
    y, sr = librosa.load(file_path, sr=sr)

    if len(y) < min_n_fft:
        print(f"‚ùå Skipping {file_path}: Audio too short ({len(y)} samples)")
        return None

    n_fft_value = min(2048, len(y))  
    mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=n_mfcc, n_fft=n_fft_value)

    return mfcc.flatten()

In [None]:
base_path = "/content/drive/MyDrive/Miniproject_ML/sound_dataset(final)"
labels = os.listdir(base_path)

data = [] 

for label in labels:
    class_path = os.path.join(base_path, label)
    if os.path.isdir(class_path):
        print(f"üìÇ Processing class: {label}")
        for filename in os.listdir(class_path):
            file_path = os.path.join(class_path, filename)
            if filename.endswith(".wav"):
                mfcc = extract_mfcc(file_path)
                if mfcc is not None:
                    data.append([mfcc, label])

df = pd.DataFrame(data, columns=["mfcc", "label"])

mfcc_features = pd.DataFrame(df["mfcc"].tolist())

mfcc_features.columns = [f"mfcc_{i}" for i in range(mfcc_features.shape[1])]

df_final = pd.concat([mfcc_features, df["label"]], axis=1)

print(df_final.head())

df_final.to_csv("extracted_features.csv", index=False)
print("‚úÖ ‡∏ö‡∏±‡∏ô‡∏ó‡∏∂‡∏Å‡∏Ç‡πâ‡∏≠‡∏°‡∏π‡∏•‡πÄ‡∏£‡∏µ‡∏¢‡∏ö‡∏£‡πâ‡∏≠‡∏¢!")

In [None]:
file_path = '/content/extracted_features.csv'
mfcc_df = pd.read_csv(file_path)

In [None]:
# ‡πÉ‡∏ä‡πâ .isnull() ‡πÄ‡∏û‡∏∑‡πà‡∏≠‡∏ô‡∏±‡∏ö‡∏Ñ‡πà‡∏≤ NaN ‡πÉ‡∏´‡∏°‡πà
nan_per_row = mfcc_df.isnull().sum(axis=1)
nan_rows = nan_per_row[nan_per_row > 0]

print(f"‚úÖ ‡∏û‡∏ö‡πÅ‡∏ñ‡∏ß‡∏ó‡∏µ‡πà‡∏°‡∏µ NaN ‡∏ó‡∏±‡πâ‡∏á‡∏´‡∏°‡∏î {len(nan_rows)} ‡πÅ‡∏ñ‡∏ß")
print(nan_rows.head(10)) 

In [None]:
# ‚úÖ ‡πÅ‡∏ó‡∏ô‡∏Ñ‡πà‡∏≤ NaN ‡∏î‡πâ‡∏ß‡∏¢ -100
mfcc_df.fillna(-100, inplace=True)
mfcc_df.to_csv("mfcc_features_fixed.csv", index=False)

print("‚úÖ ‡πÅ‡∏ó‡∏ô‡∏Ñ‡πà‡∏≤ NaN ‡∏î‡πâ‡∏ß‡∏¢ -100 ‡πÅ‡∏•‡∏∞‡∏ö‡∏±‡∏ô‡∏ó‡∏∂‡∏Å‡πÑ‡∏ü‡∏•‡πå‡πÄ‡∏£‡∏µ‡∏¢‡∏ö‡∏£‡πâ‡∏≠‡∏¢!")

In [None]:
file_path = '/content/mfcc_features_fixed.csv'
mfcc_df = pd.read_csv(file_path)

In [None]:
files.download("mfcc_features_fixed.csv")