In [None]:
import os
import numpy as np
import librosa
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import classification_report, accuracy_score
import joblib
from tqdm.notebook import tqdm  # Progress bar

In [None]:
# --- Configuration ---
DATASET_PATH = "Animals"   # Folder containing class subfolders
MODEL_SAVE_PATH = "svm_audio_model_test.pkl"
SCALER_PATH = "scaler.pkl"
ENCODER_PATH = "encoder.pkl"
N_MFCC = 40

def extract_mfcc(file_path):
    try:
        # Load audio (kaiser_fast is faster)
        audio, sample_rate = librosa.load(file_path, res_type='kaiser_fast')
        
        # Extract MFCCs
        mfccs = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=N_MFCC)
        
        # Average across time to get a fixed-size vector for the SVM
        mfccs_scaled = np.mean(mfccs.T, axis=0)
        return mfccs_scaled
        
    except Exception as e:
        print(f"Error processing {file_path}: {e}")
        return None

In [3]:
features = []
labels = []

# Get list of class folders
classes = [d for d in os.listdir(DATASET_PATH) if os.path.isdir(os.path.join(DATASET_PATH, d))]
print(f"Classes detected: {classes}")

for label in classes:
    label_path = os.path.join(DATASET_PATH, label)
    files = [f for f in os.listdir(label_path) if f.endswith(('.wav', '.mp3', '.flac'))]
    
    # Iterate with progress bar
    for file_name in tqdm(files, desc=f"Processing {label}"):
        file_path = os.path.join(label_path, file_name)
        data = extract_mfcc(file_path)
        
        if data is not None:
            features.append(data)
            labels.append(label)

X = np.array(features)
y = np.array(labels)

print(f"Data loading complete. Total samples: {len(X)}")

Classes detected: ['bird', 'cat', 'dog']


ImportError: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html