In [None]:
import os
import numpy as np
import librosa
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm import tqdm
from sklearn.model_selection import StratifiedKFold, train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import confusion_matrix, classification_report, roc_curve, auc
from tensorflow.keras.models import Sequential, save_model
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.regularizers import l2
import shap

# Configuration
PROCESSED_DIR = "c:/Users/adity/Downloads/X_AI_for_fake_real_audio_detection/processed_audio/"
RESULTS_DIR = "model_results_large_with_shap"
SR = 22050
FEATURE_COUNT = 76  # Number of audio features
N_SPLITS = 10  # Number of folds for cross-validation
N_REPEATS = 5  # Reduced repeats due to larger model
BACKGROUND_SAMPLES = 100  # Number of samples for SHAP background
os.makedirs(RESULTS_DIR, exist_ok=True)

# [Previous extract_features() and load_processed_dataset() functions remain the same]

def create_large_model():
    """Create a larger neural network model with regularization"""
    model = Sequential([
        Dense(512, activation='relu', input_shape=(FEATURE_COUNT,), kernel_regularizer=l2(0.001)),
        BatchNormalization(),
        Dropout(0.5),

        Dense(256, activation='relu', kernel_regularizer=l2(0.001)),
        BatchNormalization(),
        Dropout(0.4),

        Dense(128, activation='relu', kernel_regularizer=l2(0.001)),
        BatchNormalization(),
        Dropout(0.3),

        Dense(64, activation='relu', kernel_regularizer=l2(0.001)),
        BatchNormalization(),
        Dropout(0.2),

        Dense(2, activation='softmax')
    ])

    optimizer = Adam(learning_rate=0.0005)
    model.compile(optimizer=optimizer,
                loss='sparse_categorical_crossentropy',
                metrics=['accuracy'])
    return model

def generate_shap_explanations(model, background_data, X_test, feature_names=None):
    """Generate and save SHAP explanations for model predictions"""
    try:
        print("\nGenerating SHAP explanations...")

        # Create explainer
        explainer = shap.DeepExplainer(model, background_data)

        # Calculate SHAP values
        shap_values = explainer.shap_values(X_test)

        # Plot summary plot
        plt.figure(figsize=(10, 8))
        shap.summary_plot(shap_values[1], X_test, feature_names=feature_names, show=False)
        plt.tight_layout()
        plt.savefig(f'{RESULTS_DIR}/shap_summary.png')
        plt.close()

        # Save SHAP values
        joblib.dump(shap_values, f'{RESULTS_DIR}/shap_values.joblib')
        print("SHAP analysis completed and saved.")

    except Exception as e:
        print(f"Error in SHAP explanation: {str(e)}")

def save_background_data(X_scaled, y, n_samples=BACKGROUND_SAMPLES):
    """Save representative background data for SHAP analysis"""
    try:
        # Create stratified background data
        if len(X_scaled) > n_samples * 2:
            background, _ = train_test_split(
                X_scaled,
                train_size=n_samples,
                stratify=y,
                random_state=42
            )
        else:
            # If dataset is small, use all available data
            background = X_scaled

        joblib.dump(background, f'{RESULTS_DIR}/background_data.joblib')
        print(f"\nSaved background data ({background.shape[0]} samples)")

        # Generate feature names for SHAP plots
        feature_names = [
            *[f"MFCC_mean_{i}" for i in range(20)],
            *[f"MFCC_std_{i}" for i in range(20)],
            "Chroma_mean", "Chroma_std",
            "SpectralCentroid_mean", "SpectralCentroid_std",
            "SpectralBandwidth_mean", "SpectralBandwidth_std",
            "SpectralRolloff_mean", "SpectralRolloff_std",
            "ZCR_mean", "ZCR_std",
            "RMS_mean", "RMS_std",
            *[f"SpectralContrast_mean_{i}" for i in range(6)],
            *[f"SpectralContrast_std_{i}" for i in range(6)],
            *[f"Tonnetz_mean_{i}" for i in range(6)],
            *[f"Tonnetz_std_{i}" for i in range(6)]
        ]

        joblib.dump(feature_names, f'{RESULTS_DIR}/feature_names.joblib')

        return background, feature_names

    except Exception as e:
        print(f"Error saving background data: {str(e)}")
        return None, None

def main():
    # Load and prepare data
    X, y = load_processed_dataset()
    if len(X) == 0:
        print("No valid samples found. Exiting.")
        return

    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)

    # Save background data and scaler
    background_data, feature_names = save_background_data(X_scaled, y)
    joblib.dump(scaler, f'{RESULTS_DIR}/scaler.joblib')

    # Cross-validation setup
    all_accuracies = []
    best_accuracy = 0
    best_model = None
    best_X_test = None
    best_y_test = None

    print(f"\nStarting {N_REPEATS}×{N_SPLITS}-fold cross-validation with larger model...")

    for repeat in range(N_REPEATS):
        kfold = StratifiedKFold(n_splits=N_SPLITS, shuffle=True, random_state=repeat)
        repeat_accuracies = []

        for fold, (train_idx, test_idx) in enumerate(kfold.split(X_scaled, y)):
            X_train, X_test = X_scaled[train_idx], X_scaled[test_idx]
            y_train, y_test = y[train_idx], y[test_idx]

            # Train larger model
            model = create_large_model()

            callbacks = [
                EarlyStopping(patience=10, restore_best_weights=True, monitor='val_loss'),
                ReduceLROnPlateau(factor=0.5, patience=5, min_lr=1e-6)
            ]

            history = model.fit(
                X_train, y_train,
                epochs=100,
                batch_size=64,
                validation_data=(X_test, y_test),
                callbacks=callbacks,
                verbose=0
            )

            # Evaluate
            _, accuracy = model.evaluate(X_test, y_test, verbose=0)
            repeat_accuracies.append(accuracy)

            # Save metrics
            y_pred = model.predict(X_test).argmax(axis=1)
            y_probs = model.predict(X_test)
            save_evaluation_metrics(y_test, y_pred, y_probs, repeat, fold)

            # Track best model and corresponding test data
            if accuracy > best_accuracy:
                best_accuracy = accuracy
                best_model = model
                best_X_test = X_test
                best_y_test = y_test
                save_model(model, f'{RESULTS_DIR}/best_model.h5')

            print(f"Repeat {repeat+1}, Fold {fold+1}: Accuracy = {accuracy:.4f}")

        # Repeat statistics
        mean_acc = np.mean(repeat_accuracies)
        all_accuracies.extend(repeat_accuracies)
        print(f"Repeat {repeat+1} complete. Mean accuracy: {mean_acc:.4f}")

    # Generate SHAP explanations for best model
    if best_model is not None and background_data is not None:
        print("\nGenerating SHAP explanations for best model...")
        generate_shap_explanations(best_model, background_data, best_X_test[:100], feature_names)

        # Save example predictions with explanations
        example_idx = np.random.choice(len(best_X_test), size=min(10, len(best_X_test)), replace=False)
        example_data = {
            'X': best_X_test[example_idx],
            'y_true': best_y_test[example_idx],
            'y_pred': best_model.predict(best_X_test[example_idx]).argmax(axis=1),
            'feature_names': feature_names
        }
        joblib.dump(example_data, f'{RESULTS_DIR}/example_predictions.joblib')

    # Final results
    joblib.dump(all_accuracies, f"{RESULTS_DIR}/all_accuracies.joblib")

    print("\nFinal Results:")
    print(f"Mean Accuracy: {np.mean(all_accuracies):.4f} (±{np.std(all_accuracies):.4f})")
    print(f"Best Accuracy: {best_accuracy:.4f}")
    print(f"Worst Accuracy: {np.min(all_accuracies):.4f}")

    # Save comprehensive report
    with open(f"{RESULTS_DIR}/final_report.txt", "w") as f:
        f.write(f"5×10-Fold Cross Validation Results\n")
        f.write(f"Mean Accuracy: {np.mean(all_accuracies):.4f} (±{np.std(all_accuracies):.4f})\n")
        f.write(f"Best Accuracy: {best_accuracy:.4f}\n")
        f.write(f"Worst Accuracy: {np.min(all_accuracies):.4f}\n")

    print(f"\nAll results saved in '{RESULTS_DIR}' directory")

if __name__ == "__main__":

In [3]:
import os
import numpy as np
import librosa
import joblib
import random
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import classification_report, confusion_matrix
from tensorflow.keras.models import load_model

# Configuration
DATA_DIR = "c:/Users/adity/Downloads/X_AI_for_fake_real_audio_detection/Data/"
RESULTS_DIR = "test_results"
SR = 22050  # Sample rate
N_FFT = 2048
HOP_LENGTH = 512
FEATURE_COUNT = 76  # Must match your training setup
TEST_SIZE = 0.1  # 10% of data for testing
os.makedirs(RESULTS_DIR, exist_ok=True)

def extract_features(file_path):
    """Extract audio features matching your training setup"""
    try:
        audio, _ = librosa.load(file_path, sr=SR)
        features = []

        # 1. MFCCs (40 features: 20 means + 20 std)
        mfcc = librosa.feature.mfcc(y=audio, sr=SR, n_mfcc=20,
                                  n_fft=N_FFT, hop_length=HOP_LENGTH)
        features.extend(np.mean(mfcc, axis=1))
        features.extend(np.std(mfcc, axis=1))

        # 2. Chroma (2 features)
        chroma = librosa.feature.chroma_stft(y=audio, sr=SR,
                                          n_fft=N_FFT, hop_length=HOP_LENGTH)
        features.extend([np.mean(chroma), np.std(chroma)])

        # 3. Spectral Features (6 features)
        spectral_centroid = librosa.feature.spectral_centroid(y=audio, sr=SR)
        spectral_bandwidth = librosa.feature.spectral_bandwidth(y=audio, sr=SR)
        spectral_rolloff = librosa.feature.spectral_rolloff(y=audio, sr=SR)
        features.extend([
            np.mean(spectral_centroid), np.std(spectral_centroid),
            np.mean(spectral_bandwidth), np.std(spectral_bandwidth),
            np.mean(spectral_rolloff), np.std(spectral_rolloff)
        ])

        # 4. Zero Crossing Rate (2 features)
        zcr = librosa.feature.zero_crossing_rate(audio,
                                              frame_length=N_FFT, hop_length=HOP_LENGTH)
        features.extend([np.mean(zcr), np.std(zcr)])

        # 5. RMS Energy (2 features)
        rms = librosa.feature.rms(y=audio,
                               frame_length=N_FFT, hop_length=HOP_LENGTH)
        features.extend([np.mean(rms), np.std(rms)])

        # 6. Spectral Contrast (12 features: 6 means + 6 std)
        contrast = librosa.feature.spectral_contrast(y=audio, sr=SR,
                                                  n_bands=6,
                                                  n_fft=N_FFT, hop_length=HOP_LENGTH)
        contrast_mean = np.mean(contrast[:6], axis=1)
        contrast_std = np.std(contrast[:6], axis=1)
        features.extend(contrast_mean)
        features.extend(contrast_std)

        # 7. Tonnetz (12 features: 6 means + 6 std)
        tonnetz = librosa.feature.tonnetz(y=audio, sr=SR)
        features.extend(np.mean(tonnetz, axis=1))
        features.extend(np.std(tonnetz, axis=1))

        features = np.array(features)
        if len(features) != FEATURE_COUNT:
            raise ValueError(f"Expected {FEATURE_COUNT} features, got {len(features)}")
        return features.reshape(1, -1)

    except Exception as e:
        print(f"Error processing {file_path}: {str(e)}")
        return None

def load_test_data():
    """Load 10% of data from the directory structure"""
    X_test, y_test = [], []

    # Process Fake audio files
    fake_path = os.path.join(DATA_DIR, "Fake")
    if os.path.exists(fake_path):
        fake_files = [f for f in os.listdir(fake_path) if f.endswith(".wav")]
        # Ensure at least 1 file is selected
        sample_size = max(1, int(len(fake_files) * TEST_SIZE))
        fake_files_sample = random.sample(fake_files, sample_size)
        print(f"Found {len(fake_files)} fake samples, using {len(fake_files_sample)} for testing ({TEST_SIZE*100}%)")

        for file in fake_files_sample:
            file_path = os.path.join(fake_path, file)
            features = extract_features(file_path)
            if features is not None:
                X_test.append(features[0])
                y_test.append(1)  # Label 1 for fake

    # Process Real audio files
    real_path = os.path.join(DATA_DIR, "Real")
    if os.path.exists(real_path):
        real_files = [f for f in os.listdir(real_path) if f.endswith(".wav")]
        # Ensure at least 1 file is selected
        sample_size = max(1, int(len(real_files) * TEST_SIZE))
        real_files_sample = random.sample(real_files, sample_size)
        print(f"Found {len(real_files)} real samples, using {len(real_files_sample)} for testing ({TEST_SIZE*100}%)")

        for file in real_files_sample:
            file_path = os.path.join(real_path, file)
            features = extract_features(file_path)
            if features is not None:
                X_test.append(features[0])
                y_test.append(0)  # Label 0 for real

    print(f"\nTotal test samples loaded: {len(X_test)}")
    print(f"Class distribution: Real={sum(np.array(y_test)==0)}, Fake={sum(np.array(y_test)==1)}")
    return np.array(X_test), np.array(y_test)

def evaluate_model(model, scaler, X_test, y_test):
    """Evaluate model performance"""
    # Scale test data
    X_test_scaled = scaler.transform(X_test)

    # Make predictions
    y_pred = model.predict(X_test_scaled).argmax(axis=1)
    y_probs = model.predict(X_test_scaled)

    # Calculate and print accuracy
    accuracy = np.mean(y_pred == y_test)
    print(f"\nTest Accuracy: {accuracy:.4f}")

    # Confusion Matrix
    cm = confusion_matrix(y_test, y_pred)
    plt.figure(figsize=(8, 6))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
                xticklabels=['Real', 'Fake'],
                yticklabels=['Real', 'Fake'])
    plt.title('Confusion Matrix')
    plt.savefig(f'{RESULTS_DIR}/confusion_matrix.png')
    plt.close()

    # Classification Report
    report = classification_report(y_test, y_pred, target_names=['Real', 'Fake'])
    print("\nClassification Report:")
    print(report)
    with open(f'{RESULTS_DIR}/classification_report.txt', 'w') as f:
        f.write(report)

    # Save predictions for analysis
    np.save(f'{RESULTS_DIR}/test_predictions.npy', {
        'true_labels': y_test,
        'pred_labels': y_pred,
        'pred_probs': y_probs
    })

def main():
    # Load test data (10% of available data)
    X_test, y_test = load_test_data()
    if len(X_test) == 0:
        print("No valid test samples found. Exiting.")
        return

    # Load model and scaler
    try:
        model = load_model('model_results_large_with_shap/best_model.h5')  # Update path if needed
        scaler = joblib.load('model_results_large_with_shap/scaler.joblib')  # Update path if needed
    except Exception as e:
        print(f"Error loading model or scaler: {str(e)}")
        return

    # Evaluate
    evaluate_model(model, scaler, X_test, y_test)
    print(f"\nTest results saved in '{RESULTS_DIR}' directory")

if __name__ == "__main__":
    main()

Found 1800 fake samples, using 180 for testing (10.0%)
Found 1800 real samples, using 180 for testing (10.0%)





Total test samples loaded: 360
Class distribution: Real=180, Fake=180
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step 

Test Accuracy: 0.8028

Classification Report:
              precision    recall  f1-score   support

        Real       0.77      0.87      0.82       180
        Fake       0.85      0.73      0.79       180

    accuracy                           0.80       360
   macro avg       0.81      0.80      0.80       360
weighted avg       0.81      0.80      0.80       360


Test results saved in 'test_results' directory


In [4]:
import os
import numpy as np
import librosa
import joblib
import random
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import (classification_report, confusion_matrix,
                            roc_curve, auc, precision_recall_curve, average_precision_score)
from tensorflow.keras.models import load_model
from sklearn.preprocessing import label_binarize

# Configuration
DATA_DIR = "c:/Users/adity/Downloads/X_AI_for_fake_real_audio_detection/Data/"
RESULTS_DIR = "test_results"
SR = 22050
N_FFT = 2048
HOP_LENGTH = 512
FEATURE_COUNT = 76
TEST_SIZE = 0.1  # 10% of data for testing
os.makedirs(RESULTS_DIR, exist_ok=True)

def extract_features(file_path):
    """Extract audio features matching training setup"""
    try:
        audio, _ = librosa.load(file_path, sr=SR)
        features = []

        # MFCCs
        mfcc = librosa.feature.mfcc(y=audio, sr=SR, n_mfcc=20,
                                  n_fft=N_FFT, hop_length=HOP_LENGTH)
        features.extend(np.mean(mfcc, axis=1))
        features.extend(np.std(mfcc, axis=1))

        # Chroma
        chroma = librosa.feature.chroma_stft(y=audio, sr=SR,
                                          n_fft=N_FFT, hop_length=HOP_LENGTH)
        features.extend([np.mean(chroma), np.std(chroma)])

        # Spectral Features
        spectral_centroid = librosa.feature.spectral_centroid(y=audio, sr=SR)
        spectral_bandwidth = librosa.feature.spectral_bandwidth(y=audio, sr=SR)
        spectral_rolloff = librosa.feature.spectral_rolloff(y=audio, sr=SR)
        features.extend([
            np.mean(spectral_centroid), np.std(spectral_centroid),
            np.mean(spectral_bandwidth), np.std(spectral_bandwidth),
            np.mean(spectral_rolloff), np.std(spectral_rolloff)
        ])

        # Zero Crossing Rate
        zcr = librosa.feature.zero_crossing_rate(audio,
                                              frame_length=N_FFT, hop_length=HOP_LENGTH)
        features.extend([np.mean(zcr), np.std(zcr)])

        # RMS Energy
        rms = librosa.feature.rms(y=audio,
                               frame_length=N_FFT, hop_length=HOP_LENGTH)
        features.extend([np.mean(rms), np.std(rms)])

        # Spectral Contrast
        contrast = librosa.feature.spectral_contrast(y=audio, sr=SR,
                                                  n_bands=6,
                                                  n_fft=N_FFT, hop_length=HOP_LENGTH)
        contrast_mean = np.mean(contrast[:6], axis=1)
        contrast_std = np.std(contrast[:6], axis=1)
        features.extend(contrast_mean)
        features.extend(contrast_std)

        # Tonnetz
        tonnetz = librosa.feature.tonnetz(y=audio, sr=SR)
        features.extend(np.mean(tonnetz, axis=1))
        features.extend(np.std(tonnetz, axis=1))

        features = np.array(features)
        if len(features) != FEATURE_COUNT:
            raise ValueError(f"Expected {FEATURE_COUNT} features, got {len(features)}")
        return features.reshape(1, -1)

    except Exception as e:
        print(f"Error processing {file_path}: {str(e)}")
        return None

def load_test_data():
    """Load 10% of data with stratified sampling"""
    X_test, y_test = [], []

    # Process Fake audio files
    fake_path = os.path.join(DATA_DIR, "Fake")
    if os.path.exists(fake_path):
        fake_files = [f for f in os.listdir(fake_path) if f.endswith(".wav")]
        sample_size = max(1, int(len(fake_files) * TEST_SIZE))
        fake_files_sample = random.sample(fake_files, sample_size)
        print(f"Using {len(fake_files_sample)}/{len(fake_files)} fake samples")

        for file in fake_files_sample:
            features = extract_features(os.path.join(fake_path, file))
            if features is not None:
                X_test.append(features[0])
                y_test.append(1)

    # Process Real audio files
    real_path = os.path.join(DATA_DIR, "Real")
    if os.path.exists(real_path):
        real_files = [f for f in os.listdir(real_path) if f.endswith(".wav")]
        sample_size = max(1, int(len(real_files) * TEST_SIZE))
        real_files_sample = random.sample(real_files, sample_size)
        print(f"Using {len(real_files_sample)}/{len(real_files)} real samples")

        for file in real_files_sample:
            features = extract_features(os.path.join(real_path, file))
            if features is not None:
                X_test.append(features[0])
                y_test.append(0)

    print(f"\nTotal test samples: {len(X_test)} (Real={y_test.count(0)}, Fake={y_test.count(1)})")
    return np.array(X_test), np.array(y_test)

def plot_confusion_matrix(y_true, y_pred):
    """Generate and save enhanced confusion matrix"""
    cm = confusion_matrix(y_true, y_pred)
    plt.figure(figsize=(10, 8))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
                xticklabels=['Real', 'Fake'],
                yticklabels=['Real', 'Fake'],
                annot_kws={"size": 16})
    plt.title('Confusion Matrix', fontsize=14)
    plt.xlabel('Predicted Label', fontsize=12)
    plt.ylabel('True Label', fontsize=12)
    plt.savefig(f'{RESULTS_DIR}/confusion_matrix.png', dpi=300, bbox_inches='tight')
    plt.close()

def plot_roc_curve(y_true, y_scores):
    """Generate and save ROC curve"""
    fpr, tpr, _ = roc_curve(y_true, y_scores[:, 1])
    roc_auc = auc(fpr, tpr)

    plt.figure(figsize=(10, 8))
    plt.plot(fpr, tpr, color='darkorange', lw=2,
             label=f'ROC curve (AUC = {roc_auc:.2f})')
    plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
    plt.xlim([0.0, 1.0])
    plt.ylim([0.0, 1.05])
    plt.xlabel('False Positive Rate', fontsize=12)
    plt.ylabel('True Positive Rate', fontsize=12)
    plt.title('Receiver Operating Characteristic', fontsize=14)
    plt.legend(loc="lower right")
    plt.savefig(f'{RESULTS_DIR}/roc_curve.png', dpi=300, bbox_inches='tight')
    plt.close()

def plot_precision_recall(y_true, y_scores):
    """Generate and save Precision-Recall curve"""
    precision, recall, _ = precision_recall_curve(y_true, y_scores[:, 1])
    avg_precision = average_precision_score(y_true, y_scores[:, 1])

    plt.figure(figsize=(10, 8))
    plt.plot(recall, precision, color='blue', lw=2,
             label=f'Precision-Recall (AP = {avg_precision:.2f})')
    plt.xlabel('Recall', fontsize=12)
    plt.ylabel('Precision', fontsize=12)
    plt.title('Precision-Recall Curve', fontsize=14)
    plt.legend(loc="upper right")
    plt.savefig(f'{RESULTS_DIR}/precision_recall.png', dpi=300, bbox_inches='tight')
    plt.close()

def evaluate_model(model, scaler, X_test, y_test):
    """Comprehensive model evaluation"""
    # Scale and predict
    X_test_scaled = scaler.transform(X_test)
    y_pred = model.predict(X_test_scaled).argmax(axis=1)
    y_scores = model.predict(X_test_scaled)

    # Calculate metrics
    accuracy = np.mean(y_pred == y_test)
    print(f"\nTest Accuracy: {accuracy:.4f}")

    # Classification Report
    report = classification_report(y_test, y_pred, target_names=['Real', 'Fake'])
    print("\nClassification Report:")
    print(report)
    with open(f'{RESULTS_DIR}/classification_report.txt', 'w') as f:
        f.write(report)

    # Generate plots
    plot_confusion_matrix(y_test, y_pred)
    plot_roc_curve(y_test, y_scores)
    plot_precision_recall(y_test, y_scores)

    # Save raw predictions
    np.savez(f'{RESULTS_DIR}/predictions.npz',
             y_true=y_test,
             y_pred=y_pred,
             y_scores=y_scores)

def main():
    # Load data and model
    X_test, y_test = load_test_data()
    if len(X_test) == 0:
        print("No test samples found!")
        return

    try:
        model = load_model('model_results_large_with_shap/best_model.h5')
        scaler = joblib.load('model_results_large_with_shap/scaler.joblib')
    except Exception as e:
        print(f"Error loading model: {str(e)}")
        return

    # Evaluate
    evaluate_model(model, scaler, X_test, y_test)
    print(f"\nAll results saved to {RESULTS_DIR}")

if __name__ == "__main__":
    main()

Using 180/1800 fake samples
Using 180/1800 real samples

Total test samples: 360 (Real=180, Fake=180)




[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 

Test Accuracy: 0.8278

Classification Report:
              precision    recall  f1-score   support

        Real       0.78      0.91      0.84       180
        Fake       0.89      0.75      0.81       180

    accuracy                           0.83       360
   macro avg       0.84      0.83      0.83       360
weighted avg       0.84      0.83      0.83       360


All results saved to test_results


In [None]:
import os
import numpy as np
import librosa
import joblib
from sklearn.metrics import classification_report, confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns
from tensorflow.keras.models import load_model

# Configuration
PROCESSED_DIR = "c:/Users/adity/Downloads/X_AI_for_fake_real_audio_detection/processed_audio/"
SR = 22050
N_FFT = 2048
HOP_LENGTH = 512
FEATURE_COUNT = 76
RESULTS_DIR = "processed_audio_test_results"
os.makedirs(RESULTS_DIR, exist_ok=True)

def extract_features(file_path):
    """Feature extraction that matches your training setup"""
    try:
        audio, _ = librosa.load(file_path, sr=SR)
        features = []

        # 1. MFCCs (40 features: 20 means + 20 std)
        mfcc = librosa.feature.mfcc(y=audio, sr=SR, n_mfcc=20,
                                  n_fft=N_FFT, hop_length=HOP_LENGTH)
        features.extend(np.mean(mfcc, axis=1))
        features.extend(np.std(mfcc, axis=1))

        # 2. Chroma (2 features)
        chroma = librosa.feature.chroma_stft(y=audio, sr=SR,
                                          n_fft=N_FFT, hop_length=HOP_LENGTH)
        features.extend([np.mean(chroma), np.std(chroma)])

        # 3. Spectral Features (6 features)
        spectral_centroid = librosa.feature.spectral_centroid(y=audio, sr=SR)
        spectral_bandwidth = librosa.feature.spectral_bandwidth(y=audio, sr=SR)
        spectral_rolloff = librosa.feature.spectral_rolloff(y=audio, sr=SR)
        features.extend([
            np.mean(spectral_centroid), np.std(spectral_centroid),
            np.mean(spectral_bandwidth), np.std(spectral_bandwidth),
            np.mean(spectral_rolloff), np.std(spectral_rolloff)
        ])

        # 4. Zero Crossing Rate (2 features)
        zcr = librosa.feature.zero_crossing_rate(audio,
                                              frame_length=N_FFT, hop_length=HOP_LENGTH)
        features.extend([np.mean(zcr), np.std(zcr)])

        # 5. RMS Energy (2 features)
        rms = librosa.feature.rms(y=audio,
                               frame_length=N_FFT, hop_length=HOP_LENGTH)
        features.extend([np.mean(rms), np.std(rms)])

        # 6. Spectral Contrast (12 features: 6 means + 6 std)
        contrast = librosa.feature.spectral_contrast(y=audio, sr=SR,
                                                  n_bands=6,
                                                  n_fft=N_FFT, hop_length=HOP_LENGTH)
        contrast_mean = np.mean(contrast[:6], axis=1)
        contrast_std = np.std(contrast[:6], axis=1)
        features.extend(contrast_mean)
        features.extend(contrast_std)

        # 7. Tonnetz (12 features: 6 means + 6 std)
        tonnetz = librosa.feature.tonnetz(y=audio, sr=SR)
        features.extend(np.mean(tonnetz, axis=1))
        features.extend(np.std(tonnetz, axis=1))

        features = np.array(features)
        if len(features) != FEATURE_COUNT:
            raise ValueError(f"Expected {FEATURE_COUNT} features, got {len(features)}")
        return features.reshape(1, -1)

    except Exception as e:
        print(f"Error processing {file_path}: {str(e)}")
        return None

def load_test_data():
    """Load data from processed_audio folder"""
    X_test, y_test = [], []

    # Process Fake audio files
    fake_path = os.path.join(PROCESSED_DIR, "Fake")
    if os.path.exists(fake_path):
        fake_files = [f for f in os.listdir(fake_path) if f.endswith(".wav")]
        print(f"Found {len(fake_files)} fake test samples")

        for file in fake_files:
            file_path = os.path.join(fake_path, file)
            features = extract_features(file_path)
            if features is not None:
                X_test.append(features[0])
                y_test.append(1)  # Label 1 for fake

    # Process Real audio files
    real_path = os.path.join(PROCESSED_DIR, "Real")
    if os.path.exists(real_path):
        real_files = [f for f in os.listdir(real_path) if f.endswith(".wav")]
        print(f"Found {len(real_files)} real test samples")

        for file in real_files:
            file_path = os.path.join(real_path, file)
            features = extract_features(file_path)
            if features is not None:
                X_test.append(features[0])
                y_test.append(0)  # Label 0 for real

    print(f"\nTotal test samples loaded: {len(X_test)}")
    print(f"Class distribution: Real={sum(np.array(y_test)==0)}, Fake={sum(np.array(y_test)==1)}")
    return np.array(X_test), np.array(y_test)

def evaluate_model(model, scaler, X_test, y_test):
    """Evaluate model performance"""
    # Scale test data
    X_test_scaled = scaler.transform(X_test)

    # Make predictions
    y_pred = model.predict(X_test_scaled).argmax(axis=1)
    y_probs = model.predict(X_test_scaled)

    # Calculate and print accuracy
    accuracy = np.mean(y_pred == y_test)
    print(f"\nTest Accuracy: {accuracy:.4f}")

    # Confusion Matrix
    cm = confusion_matrix(y_test, y_pred)
    plt.figure(figsize=(8, 6))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
                xticklabels=['Real', 'Fake'],
                yticklabels=['Real', 'Fake'])
    plt.title('Confusion Matrix')
    plt.savefig(f'{RESULTS_DIR}/confusion_matrix.png')
    plt.close()

    # Classification Report
    report = classification_report(y_test, y_pred, target_names=['Real', 'Fake'])
    print("\nClassification Report:")
    print(report)
    with open(f'{RESULTS_DIR}/classification_report.txt', 'w') as f:
        f.write(report)

def main():
    # Load test data
    X_test, y_test = load_test_data()
    if len(X_test) == 0:
        print("No valid test samples found. Exiting.")
        return

    # Load model and scaler
    try:
        model = load_model('audio_model_nn_32.h5')
        scaler = joblib.load('scaler_nn_32.joblib')
    except Exception as e:
        print(f"Error loading model or scaler: {str(e)}")
        return

    # Evaluate
    evaluate_model(model, scaler, X_test, y_test)
    print(f"\nTest results saved in '{RESULTS_DIR}' directory")

if __name__ == "__main__":
    main()