In [9]:
import os
import librosa
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from lime.lime_tabular import LimeTabularExplainer

# 1. Load and preprocess data

# Directory containing the audio files
data_dir = 'c:/Users/adity/Downloads/X_AI_for_fake_real_audio_detection/Data/'

# Function to extract features from audio files
def extract_features(file_path):
    # Load audio file
    y, sr = librosa.load(file_path, sr=None)

    # Extract MFCCs (Mel-frequency cepstral coefficients)
    mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13).mean(axis=1)

    # Extract Chroma features
    chroma = librosa.feature.chroma_stft(y=y, sr=sr).mean(axis=1)

    # Extract Spectral Contrast
    spectral_contrast = librosa.feature.spectral_contrast(y=y, sr=sr).mean(axis=1)

    # Extract Mel spectrogram
    mel_spectrogram = librosa.feature.melspectrogram(y=y, sr=sr).mean(axis=1)

    # Extract Zero Crossing Rate (ZCR)
    zcr = librosa.feature.zero_crossing_rate(y=y).mean(axis=1)

    # Combine all the features into a single array
    return np.hstack([mfccs, chroma, spectral_contrast, mel_spectrogram, zcr])

# Function to load all audio files from a directory and their corresponding labels
def load_data(data_dir):
    features = []
    labels = []

    # Walk through the directories
    for label in ['Fake', 'Real']:
        folder_path = os.path.join(data_dir, label)
        for filename in os.listdir(folder_path):
            if filename.endswith('.wav'):
                file_path = os.path.join(folder_path, filename)
                feature_vector = extract_features(file_path)
                features.append(feature_vector)
                labels.append(label)

    return np.array(features), np.array(labels)

# Load the data
X, y = load_data(data_dir)

# Encode labels (Fake -> 0, Real -> 1)
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

# Split data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

# 2. Train a classifier (Random Forest in this case)
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# 3. Initialize LimeTabularExplainer
feature_names = [
    'mfcc1', 'mfcc2', 'mfcc3', 'mfcc4', 'mfcc5', 'mfcc6', 'mfcc7', 'mfcc8', 'mfcc9', 'mfcc10', 'mfcc11', 'mfcc12', 'mfcc13',
    'chroma1', 'chroma2', 'chroma3', 'chroma4', 'chroma5', 'chroma6', 'chroma7', 'chroma8', 'chroma9', 'chroma10', 'chroma11', 'chroma12',
    'spectral_contrast1', 'spectral_contrast2', 'spectral_contrast3', 'spectral_contrast4', 'spectral_contrast5', 'spectral_contrast6', 'spectral_contrast7',
    'mel1', 'mel2', 'mel3', 'mel4', 'mel5', 'mel6', 'mel7', 'mel8', 'mel9', 'mel10', 'mel11', 'mel12', 'mel13',
    'zcr'
]

explainer = LimeTabularExplainer(
    X_train,
    training_labels=y_train,
    mode='classification',
    feature_names=feature_names
)

# 4. Explain a prediction for a test instance (first sample in the test set)
exp = explainer.explain_instance(X_test[0], model.predict_proba, num_features=10)

# Show the explanation in a notebook (if running in Jupyter)
exp.show_in_notebook()

# 5. Evaluate the model on the test set
accuracy = model.score(X_test, y_test)
print(f"Test accuracy: {accuracy:.4f}")


IndexError: list index out of range

In [18]:
import os
import librosa
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from lime.lime_tabular import LimeTabularExplainer

data_dir = 'c:/Users/adity/Downloads/X_AI_for_fake_real_audio_detection/Data/'

def extract_features(file_path):
    y, sr = librosa.load(file_path, sr=None)
    mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13).mean(axis=1)
    chroma = librosa.feature.chroma_stft(y=y, sr=sr).mean(axis=1)
    spectral_contrast = librosa.feature.spectral_contrast(y=y, sr=sr).mean(axis=1)
    mel_spectrogram = librosa.feature.melspectrogram(y=y, sr=sr).mean(axis=1)
    zcr = librosa.feature.zero_crossing_rate(y=y).mean(axis=1)

    feature_vector = np.hstack([mfccs, chroma, spectral_contrast, mel_spectrogram, zcr])
    print(f"Extracted {len(feature_vector)} features for file: {file_path}")  # Debugging line

    return feature_vector

def load_data(data_dir):
    features = []
    labels = []

    for label in ['Fake', 'Real']:
        folder_path = os.path.join(data_dir, label)
        for filename in os.listdir(folder_path):
            if filename.endswith('.wav'):
                file_path = os.path.join(folder_path, filename)
                feature_vector = extract_features(file_path)
                features.append(feature_vector)
                labels.append(label)

    return np.array(features), np.array(labels)

# Load the data
X, y = load_data(data_dir)

label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

# Print the shape of X_train to verify feature size
print(f"Shape of X_train: {X_train.shape}")

# Train Random Forest Classifier
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Initialize LimeTabularExplainer
feature_names = [
    'mfcc1', 'mfcc2', 'mfcc3', 'mfcc4', 'mfcc5', 'mfcc6', 'mfcc7', 'mfcc8', 'mfcc9', 'mfcc10', 'mfcc11', 'mfcc12', 'mfcc13',
    'chroma1', 'chroma2', 'chroma3', 'chroma4', 'chroma5', 'chroma6', 'chroma7', 'chroma8', 'chroma9', 'chroma10', 'chroma11', 'chroma12',
    'spectral_contrast1', 'spectral_contrast2', 'spectral_contrast3', 'spectral_contrast4', 'spectral_contrast5', 'spectral_contrast6', 'spectral_contrast7',
    'mel1', 'mel2', 'mel3', 'mel4', 'mel5', 'mel6', 'mel7', 'mel8', 'mel9', 'mel10', 'mel11', 'mel12', 'mel13',
    'zcr'
]

explainer = LimeTabularExplainer(
    X_train,
    training_labels=y_train,
    mode='classification',
    feature_names=feature_names
)

# Explain a prediction for a test instance
if len(X_test) > 0:
    exp = explainer.explain_instance(X_test[0], model.predict_proba, num_features=10)
    exp.show_in_notebook()
else:
    print("X_test is empty!")

# Evaluate model on test set
accuracy = model.score(X_test, y_test)
print(f"Test accuracy: {accuracy:.4f}")


Extracted 161 features for file: c:/Users/adity/Downloads/X_AI_for_fake_real_audio_detection/Data/Fake\fake_1.wav
Extracted 161 features for file: c:/Users/adity/Downloads/X_AI_for_fake_real_audio_detection/Data/Fake\fake_10.wav
Extracted 161 features for file: c:/Users/adity/Downloads/X_AI_for_fake_real_audio_detection/Data/Fake\fake_100.wav
Extracted 161 features for file: c:/Users/adity/Downloads/X_AI_for_fake_real_audio_detection/Data/Fake\fake_1000.wav
Extracted 161 features for file: c:/Users/adity/Downloads/X_AI_for_fake_real_audio_detection/Data/Fake\fake_1001.wav
Extracted 161 features for file: c:/Users/adity/Downloads/X_AI_for_fake_real_audio_detection/Data/Fake\fake_1002.wav
Extracted 161 features for file: c:/Users/adity/Downloads/X_AI_for_fake_real_audio_detection/Data/Fake\fake_1003.wav
Extracted 161 features for file: c:/Users/adity/Downloads/X_AI_for_fake_real_audio_detection/Data/Fake\fake_1004.wav
Extracted 161 features for file: c:/Users/adity/Downloads/X_AI_for_fak

IndexError: list index out of range

In [31]:
import os
import librosa
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from lime.lime_tabular import LimeTabularExplainer

# Directory containing the audio files
data_dir = 'c:/Users/adity/Downloads/X_AI_for_fake_real_audio_detection/Data/'

# Function to extract features from audio files
def extract_features(file_path):
    # Load audio file
    y, sr = librosa.load(file_path, sr=None)

    # Extract MFCCs (Mel-frequency cepstral coefficients)
    mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13).mean(axis=1)

    # Extract Chroma features
    chroma = librosa.feature.chroma_stft(y=y, sr=sr).mean(axis=1)

    # Extract Spectral Contrast
    spectral_contrast = librosa.feature.spectral_contrast(y=y, sr=sr).mean(axis=1)

    # Extract Mel spectrogram
    mel_spectrogram = librosa.feature.melspectrogram(y=y, sr=sr).mean(axis=1)

    # Extract Zero Crossing Rate (ZCR)
    zcr = librosa.feature.zero_crossing_rate(y=y).mean(axis=1)

    # Combine all the features into a single array
    return np.hstack([mfccs, chroma, spectral_contrast, mel_spectrogram, zcr])

# Function to load all audio files from a directory and their corresponding labels
def load_data(data_dir):
    features = []
    labels = []

    # Walk through the directories
    for label in ['Fake', 'Real']:
        folder_path = os.path.join(data_dir, label)
        for filename in os.listdir(folder_path):
            if filename.endswith('.wav'):
                file_path = os.path.join(folder_path, filename)
                feature_vector = extract_features(file_path)
                features.append(feature_vector)
                labels.append(label)

    return np.array(features), np.array(labels)

# Load the data
X, y = load_data(data_dir)

# Check if data is loaded correctly
if len(X) == 0:
    raise ValueError("No data loaded. Check the data directory path and file structure.")

# Encode labels (Fake -> 0, Real -> 1)
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

# Split data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

# Train a classifier (Random Forest in this case)
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Generate feature names dynamically
mfcc_features = [f'mfcc{i+1}' for i in range(13)]
chroma_features = [f'chroma{i+1}' for i in range(12)]
spectral_contrast_features = [f'spectral_contrast{i+1}' for i in range(7)]
mel_features = [f'mel{i+1}' for i in range(128)]  # Default n_mels=128 in librosa
zcr_features = ['zcr']

feature_names = mfcc_features + chroma_features + spectral_contrast_features + mel_features + zcr_features

# Initialize LimeTabularExplainer
explainer = LimeTabularExplainer(
    X_train,
    training_labels=y_train,
    mode='classification',
    feature_names=feature_names
)

# Explain a prediction for a test instance (first sample in the test set)
if len(X_test) > 0:
    exp = explainer.explain_instance(X_test[0], model.predict_proba, num_features=10)
    # For Jupyter notebook, use exp.show_in_notebook()
    # To save as HTML:
    exp.save_to_file('lime_explanation.html')
else:
    print("X_test is empty!")

# Evaluate the model on the test set
accuracy = model.score(X_test, y_test)
print(f"Test accuracy: {accuracy:.4f}")

KeyboardInterrupt: 

In [5]:
joblib.dump(X_train, "background_data.pkl")

NameError: name 'X_train' is not defined

In [8]:
import os
import librosa
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from lime.lime_tabular import LimeTabularExplainer
import shap
import matplotlib.pyplot as plt  # Added missing import


# Directory containing the audio files
data_dir = 'c:/Users/adity/Downloads/X_AI_for_fake_real_audio_detection/Data/'

# Feature extraction function
def extract_features(file_path):
    y, sr = librosa.load(file_path, sr=None)
    mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13).mean(axis=1)
    chroma = librosa.feature.chroma_stft(y=y, sr=sr).mean(axis=1)
    spectral_contrast = librosa.feature.spectral_contrast(y=y, sr=sr).mean(axis=1)
    mel_spectrogram = librosa.feature.melspectrogram(y=y, sr=sr).mean(axis=1)
    zcr = librosa.feature.zero_crossing_rate(y).mean(axis=1)
    return np.hstack([mfccs, chroma, spectral_contrast, mel_spectrogram, zcr])

# Data loading function
def load_data(data_dir):
    features = []
    labels = []
    for label in ['Fake', 'Real']:
        folder_path = os.path.join(data_dir, label)
        for filename in os.listdir(folder_path):
            if filename.endswith('.wav'):
                file_path = os.path.join(folder_path, filename)
                features.append(extract_features(file_path))
                labels.append(label)
    return np.array(features), np.array(labels)

# Main workflow
X, y = load_data(data_dir)
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)  # Fake: 0, Real: 1
X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

# Train model
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Generate feature names
feature_names = (
    [f'mfcc{i+1}' for i in range(13)] +
    [f'chroma{i+1}' for i in range(12)] +
    [f'spectral_contrast{i+1}' for i in range(7)] +
    [f'mel{i+1}' for i in range(128)] +
    ['zcr']
)

# SHAP Explanation
shap_background = X_train[:100]  # Reference background distribution
shap_explainer = shap.TreeExplainer(
    model,
    data=shap_background,
    feature_perturbation="interventional"
)

if len(X_test) > 0:
    # Explain first 5 test samples with SHAP
    shap_sample = X_test[:5]
    shap_values = shap_explainer.shap_values(shap_sample)

    # Process SHAP values
    print(f"\nSHAP Analysis:")
    print(f"Feature vector size: {X_train.shape[1]}")  # Should be 161
    print(f"SHAP values shape: {np.array(shap_values).shape}")  # Should be (2, 5, 161)

    # Extract Real class explanations
    shap_values_real = np.array(shap_values)[1].T  # Shape (5, 161)

    # Global feature importance plot
    shap.summary_plot(
        shap_values_real,
        features=shap_sample,
        feature_names=feature_names,
        plot_type="bar",
        show=False
    )
    plt.tight_layout()
    plt.savefig('shap_global_importance.png')
    plt.close()

    # Individual explanation for first sample
    shap.force_plot(
        shap_explainer.expected_value[1],
        shap_values_real[0],
        shap_sample[0],
        feature_names=feature_names,
        matplotlib=True,
        show=False
    )
    plt.tight_layout()
    plt.savefig('shap_individual_explanation.png')
    plt.close()

# LIME Explanation
lime_explainer = LimeTabularExplainer(
    X_train,
    training_labels=y_train,
    mode='classification',
    feature_names=feature_names
)

if len(X_test) > 0:
    # Explain first test sample with LIME
    exp = lime_explainer.explain_instance(X_test[0], model.predict_proba, num_features=10)
    exp.save_to_file('lime_explanation.html')
    print("\nLIME explanation saved to lime_explanation.html")

# Final evaluation
accuracy = model.score(X_test, y_test)
print(f"\nModel Test Accuracy: {accuracy:.4f}")


SHAP Analysis:
Feature vector size: 161
SHAP values shape: (5, 161, 2)

LIME explanation saved to lime_explanation.html

Model Test Accuracy: 1.0000


In [9]:
import joblib

In [10]:
joblib.dump(X_train, "background_data.pkl")


['background_data.pkl']

In [12]:
import shap
import numpy as np

# Make sure the input sample is reshaped to 2D
sample = X_test[0].reshape(1, -1)

# Initialize the SHAP explainer
explainer = shap.Explainer(model, X_train)

# Get SHAP values for the sample
shap_values = explainer(sample)

# Plot SHAP values
shap.plots.waterfall(shap_values[0])


ValueError: The waterfall plot can currently only plot a single explanation, but a matrix of explanations (shape (161, 2)) was passed! Perhaps try `shap.plots.waterfall(shap_values[0])` or for multi-output models, try `shap.plots.waterfall(shap_values[0, 0])`.


SHAP Analysis:
Feature vector size: 161
SHAP values shape: (5, 161, 2)


AssertionError: The shape of the shap_values matrix does not match the shape of the provided data matrix.

<Figure size 640x480 with 0 Axes>

In [20]:
# Add these imports at the top
import shap
import matplotlib.pyplot as plt

# ... [Keep all previous code until after model training] ...

# SHAP Explanation

# 1. Initialize SHAP explainer
explainer_shap = shap.TreeExplainer(model)

# 2. Calculate SHAP values (use a subset of data for faster computation)
shap_sample = X_test[:100]  # Use first 100 samples from test set
shap_values = explainer_shap.shap_values(shap_sample)

# 3. Global feature importance summary plot
shap.summary_plot(shap_values, shap_sample, feature_names=feature_names, plot_type="bar", show=False)
plt.savefig('shap_global_importance.png', bbox_inches='tight')
plt.close()

# 4. Detailed summary plot
shap.summary_plot(shap_values, shap_sample, feature_names=feature_names, show=False)
plt.savefig('shap_detailed_summary.png', bbox_inches='tight')
plt.close()

# 5. Force plot for individual prediction (first test sample)
shap.force_plot(explainer_shap.expected_value[0],
                shap_values[0][0,:],
                shap_sample[0,:],
                feature_names=feature_names,
                matplotlib=True,
                show=False)
plt.savefig('shap_individual_force_plot.png', bbox_inches='tight')
plt.close()

# 6. Dependence plot for most important feature
shap.dependence_plot(np.argmax(np.abs(shap_values[0]).mean(0)),
                     shap_values[0],
                     shap_sample,
                     feature_names=feature_names,
                     interaction_index=None,
                     show=False)
plt.savefig('shap_dependence_plot.png', bbox_inches='tight')
plt.close()

  from .autonotebook import tqdm as notebook_tqdm


DimensionError: Length of features is not equal to the length of shap_values!

<Figure size 640x480 with 0 Axes>

In [21]:
print("Feature vector shape:", X_train.shape[1])  # Should be 161
print("SHAP values shape:", shap_values[0].shape)  # Should be (100, 161)

Feature vector shape: 161
SHAP values shape: (161, 2)


In [13]:
# If X_train is a pandas DataFrame
print(X_train.columns)


AttributeError: 'numpy.ndarray' object has no attribute 'columns'

In [22]:
# ... [previous code until SHAP section] ...

# 1. Get SHAP values properly
shap_sample = X_test[:5]  # Use smaller sample for debugging
shap_values = explainer_shap.shap_values(shap_sample)

# 2. Verify shapes
print(f"SHAP values structure: {type(shap_values)}")
print(f"Class 0 SHAP shape: {shap_values[0].shape}")  # Should be (5, 161)
print(f"Class 1 SHAP shape: {shap_values[1].shape}")  # Should be (5, 161)

# 3. Correct force plot for first sample
class_idx = 1  # Index for "Real" class
shap.force_plot(
    base_value=explainer_shap.expected_value[class_idx],
    shap_values=shap_values[class_idx][0, :],  # First sample, Real class
    features=shap_sample[0, :],
    feature_names=feature_names,
    matplotlib=True
)
plt.savefig('shap_force_plot.png', bbox_inches='tight')

SHAP values structure: <class 'numpy.ndarray'>
Class 0 SHAP shape: (161, 2)
Class 1 SHAP shape: (161, 2)


DimensionError: Length of features is not equal to the length of shap_values!

In [3]:
import joblib

In [4]:
joblib.dump(X_train, "background_data.pkl")

NameError: name 'X_train' is not defined

In [25]:
import os
import librosa
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier
import shap

# Directory containing the audio files
data_dir = 'c:/Users/adity/Downloads/X_AI_for_fake_real_audio_detection/Data/'

# Feature extraction function
def extract_features(file_path):
    y, sr = librosa.load(file_path, sr=None)
    mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13).mean(axis=1)
    chroma = librosa.feature.chroma_stft(y=y, sr=sr).mean(axis=1)
    spectral_contrast = librosa.feature.spectral_contrast(y=y, sr=sr).mean(axis=1)
    mel_spectrogram = librosa.feature.melspectrogram(y=y, sr=sr).mean(axis=1)
    zcr = librosa.feature.zero_crossing_rate(y).mean(axis=1)
    return np.hstack([mfccs, chroma, spectral_contrast, mel_spectrogram, zcr])

# Data loading function
def load_data(data_dir):
    features = []
    labels = []
    for label in ['Fake', 'Real']:
        folder_path = os.path.join(data_dir, label)
        for filename in os.listdir(folder_path):
            if filename.endswith('.wav'):
                file_path = os.path.join(folder_path, filename)
                features.append(extract_features(file_path))
                labels.append(label)
    return np.array(features), np.array(labels)

# Main workflow
X, y = load_data(data_dir)
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)  # Fake: 0, Real: 1
X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

# Train model
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Generate feature names
feature_names = (
    [f'mfcc{i+1}' for i in range(13)] +
    [f'chroma{i+1}' for i in range(12)] +
    [f'spectral_contrast{i+1}' for i in range(7)] +
    [f'mel{i+1}' for i in range(128)] +
    ['zcr']
)

# SHAP explanation setup
background = X_train[:100]  # Reference background distribution
explainer = shap.TreeExplainer(
    model,
    data=background,
    feature_perturbation="interventional"
)

# Explain first 5 test samples
shap_sample = X_test[:5]
shap_values = explainer.shap_values(shap_sample)

# Verify dimensions
print(f"Feature vector size: {X_train.shape[1]}")  # Should be 161
print(f"SHAP values shape: {np.array(shap_values).shape}")  # Should be (2, 5, 161)

# Extract Real class explanations (index 1) and transpose to (samples, features)
shap_values_real = np.array(shap_values)[1].T  # Now shape (5, 161)

# Global feature importance plot
shap.summary_plot(
    shap_values_real,
    features=shap_sample,
    feature_names=feature_names,
    plot_type="bar",
    show=False
)
plt.tight_layout()
plt.savefig('shap_global_importance.png')
plt.close()

# Individual explanation for first sample
shap.force_plot(
    explainer.expected_value[1],
    shap_values_real[0],  # First sample's SHAP values
    shap_sample[0],
    feature_names=feature_names,
    matplotlib=True,
    show=False
)
plt.tight_layout()
plt.savefig('shap_individual_explanation.png')
plt.close()

print(f"\nModel Test Accuracy: {model.score(X_test, y_test):.2%}")

Feature vector size: 161
SHAP values shape: (5, 161, 2)

Model Test Accuracy: 100.00%


In [27]:
!pip install joblib



In [29]:
import joblib

In [30]:
joblib.dump(model, "model.pkl")
joblib.dump(label_encoder, "label_encoder.pkl")

['label_encoder.pkl']

In [14]:
# Assuming 'feature_names' is the list of feature names
print(feature_names)


['mfcc1', 'mfcc2', 'mfcc3', 'mfcc4', 'mfcc5', 'mfcc6', 'mfcc7', 'mfcc8', 'mfcc9', 'mfcc10', 'mfcc11', 'mfcc12', 'mfcc13', 'chroma1', 'chroma2', 'chroma3', 'chroma4', 'chroma5', 'chroma6', 'chroma7', 'chroma8', 'chroma9', 'chroma10', 'chroma11', 'chroma12', 'spectral_contrast1', 'spectral_contrast2', 'spectral_contrast3', 'spectral_contrast4', 'spectral_contrast5', 'spectral_contrast6', 'spectral_contrast7', 'mel1', 'mel2', 'mel3', 'mel4', 'mel5', 'mel6', 'mel7', 'mel8', 'mel9', 'mel10', 'mel11', 'mel12', 'mel13', 'zcr']


In [15]:
print(X_train.shape)  # This will show (number of samples, number of features)


(2880, 161)


In [17]:
print(np.array(features).shape)


(0,)


In [6]:
print("Number of samples in X_test:", len(X_test))


Number of samples in X_test: 720


In [7]:
print("Shape of X_test:", X_test.shape)


Shape of X_test: (720, 161)


In [11]:
print(f"Number of features in X_train: {X_train.shape[1]}")
print(f"Number of feature names: {len(feature_names)}")


Number of features in X_train: 161
Number of feature names: 46


In [8]:
# Check the number of samples in the test set
if len(X_test) > 0:
    # Example: Choose a random sample from the test set for explanation
    i = 0  # Or you can choose any valid index between 0 and len(X_test)-1
    exp = explainer.explain_instance(X_test[i], model.predict_proba, num_features=10)
    exp.show_in_notebook()
else:
    print("X_test is empty!")


NameError: name 'explainer' is not defined