In [7]:
import librosa
import numpy as np
import pandas as pd
import glob
import os
import re
from sklearn.model_selection import train_test_split
from sklearn.ensemble import StackingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, f1_score, recall_score, precision_score

def load_and_process_data():
    """Load audio files and extract MFCC features"""
    audio_files = glob.glob("/219s*.mp3")
    if not audio_files:
        raise FileNotFoundError("No matching audio files found")

    mfcc_features_dict = {}
    n_mfcc = 13

    for file in audio_files:
        try:
            y, sr = librosa.load(file, sr=None)
            mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=n_mfcc)
            mfcc_mean = np.mean(mfcc, axis=1)

            prefix = re.match(r"(2\d{2})s\d+\.mp3", os.path.basename(file)).group(1)
            file_id = os.path.basename(file).split(".")[0]

            if prefix not in mfcc_features_dict:
                mfcc_features_dict[prefix] = []
            mfcc_features_dict[prefix].append([file_id] + list(mfcc_mean))
        except Exception as e:
            print(f"Error processing {file}: {e}")

    return mfcc_features_dict

def stacking_pipeline():
    """Implement Stacking Classifier with multiple base models"""
    # Load MFCC features
    mfcc_data = load_and_process_data()

    # Initialize results storage
    results = {'y_true': [], 'y_pred': []}

    # Define base models
    base_models = [
        ('svm', SVC(probability=True, random_state=42)),
        ('xgb', XGBClassifier(use_label_encoder=False, eval_metric="logloss", random_state=42)),
        ('rf', RandomForestClassifier(random_state=42)),
        ('mlp', MLPClassifier(max_iter=500, random_state=42))
    ]

    # Define meta-model
    meta_model = LogisticRegression()

    for prefix, features in mfcc_data.items():
        # Process features
        mfcc_df = pd.DataFrame(features, columns=["File Name"] + [f"MFCC_{i}" for i in range(13)])

        # Load labels
        labels_file = f"output_s{prefix}.xlsx"
        if not os.path.exists(labels_file):
            continue

        labels = pd.read_excel(labels_file)
        merged_data = mfcc_df.merge(labels, on="File Name", how="inner")

        # Prepare data
        X = merged_data.drop(columns=["File Name", "Fluency"])
        X = X.apply(pd.to_numeric, errors='coerce').fillna(0)
        y = merged_data["Fluency"].astype(int)

        # Train-test split
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

        # Create and train stacking classifier
        stack_model = StackingClassifier(estimators=base_models, final_estimator=meta_model)
        stack_model.fit(X_train, y_train)

        # Store predictions
        y_pred = stack_model.predict(X_test)
        results['y_true'].extend(y_test)
        results['y_pred'].extend(y_pred)

    return results

# Main program
if __name__ == "__main__":
    # Execute stacking pipeline
    results = stacking_pipeline()

    # Calculate metrics
    metrics = {
        "Accuracy": accuracy_score(results['y_true'], results['y_pred']),
        "F1 Score": f1_score(results['y_true'], results['y_pred'], average='weighted'),
        "Recall": recall_score(results['y_true'], results['y_pred'], average='weighted'),
        "Precision": precision_score(results['y_true'], results['y_pred'], average='weighted')
    }

    # Print results
    print("\n📊 Stacking Classifier Performance:")
    for metric, value in metrics.items():
        print(f"{metric}: {value:.4f}")


NameError: name 'XGBClassifier' is not defined