In [3]:

import os
import numpy as np
import pandas as pd
import librosa
import tensorflow as tf
import resampy
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Conv2D, MaxPooling2D, Flatten
from tensorflow.keras.utils import to_categorical

# --- Configuration ---
DATA_DIR = './Dataset'
TARGET_DURATION = 5.0
MAX_PAD_LEN = 216 # Spectrogram length for a 5-second clip at 22050 Hz

In [None]:
def extractFeature(file_path):
    print(f"Processing file: {file_path}")
    try:
        # Load the audio file. librosa automatically normalizes the amplitude.
        audio, sample_rate = librosa.load(file_path, res_type='kaiser_fast')
        audio_duration = librosa.get_duration(y=audio, sr=sample_rate)
        if audio_duration > TARGET_DURATION:
            # If audio is longer than target, take a clip from the middle
            start = int((audio_duration - TARGET_DURATION) / 2 * sample_rate)
            end = start + int(TARGET_DURATION * sample_rate)
            audio = audio[start:end]
            print(f"  - Original duration: {audio_duration:.2f}s. Truncated to middle {TARGET_DURATION}s.")

        
        mfccs = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=13)
        mfccs = np.mean(mfccs.T, axis=0)
        return mfccs
    except Exception as e:
        print(f"Error processing {file_path}: {e}")
        return None