In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
import os
import glob
import numpy as np
import librosa
import joblib
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from xgboost import XGBClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline

In [3]:
import soundfile as sf
import librosa

def extract_mfcc_features(file_path, n_mfcc=40):
    """
    Extract MFCC features from an audio file using soundfile first, fallback to librosa.audioread.
    """
    try:
        # First try with soundfile
        y, sr = sf.read(file_path, dtype='float32')

        if len(y.shape) > 1:
            y = y.mean(axis=1)  # Convert stereo to mono

    except Exception:
        # Fallback to librosa's audioread loader
        try:
            y, sr = librosa.load(file_path, sr=None)
        except Exception as e:
            print(f"Error loading {file_path}: {e}")
            return None

    # Skip very short files
    if len(y) < sr * 0.5:
        print(f"Skipping short file: {file_path}")
        return None

    try:
        mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=n_mfcc)
        if mfcc.shape[1] == 0:
            print(f"Empty MFCC for {file_path}")
            return None

        mfcc_mean = np.mean(mfcc.T, axis=0)
        return mfcc_mean

    except Exception as e:
        print(f"Error extracting MFCC from {file_path}: {e}")
        return None

In [4]:
data_path = "/content/drive/MyDrive/processed_audio_files"

In [5]:
real_files = glob.glob(os.path.join(data_path, "*", "Real", "*.wav"))
fake_files = glob.glob(os.path.join(data_path, "*", "Fake", "*.wav"))

# Combine all audio paths
all_files = real_files + fake_files

# Create labels: 0 for real, 1 for fake
labels = [0] * len(real_files) + [1] * len(fake_files)

print(f"Total real: {len(real_files)}, fake: {len(fake_files)}, total: {len(all_files)}")

Total real: 10600, fake: 10589, total: 21189


In [None]:
features = []
valid_labels = []

for file_path, label in zip(all_files, labels):
    mfcc_feat = extract_mfcc_features(file_path)

    if mfcc_feat is None or mfcc_feat.shape[0] == 0:
        print(f"Skipping {file_path} due to invalid or empty features.")
        continue

    features.append(mfcc_feat)
    valid_labels.append(label)

X = np.array(features)
y = np.array(valid_labels)

print(f"Final dataset shape: {X.shape}, labels: {y.shape}")


  y, sr = librosa.load(file_path, sr=None)
	Deprecated as of librosa version 0.10.0.
	It will be removed in librosa version 1.0.
  y, sr_native = __audioread_load(path, offset, duration, dtype)
  y, sr = librosa.load(file_path, sr=None)
	Deprecated as of librosa version 0.10.0.
	It will be removed in librosa version 1.0.
  y, sr_native = __audioread_load(path, offset, duration, dtype)
  y, sr = librosa.load(file_path, sr=None)
	Deprecated as of librosa version 0.10.0.
	It will be removed in librosa version 1.0.
  y, sr_native = __audioread_load(path, offset, duration, dtype)
  y, sr = librosa.load(file_path, sr=None)
	Deprecated as of librosa version 0.10.0.
	It will be removed in librosa version 1.0.
  y, sr_native = __audioread_load(path, offset, duration, dtype)
  y, sr = librosa.load(file_path, sr=None)
	Deprecated as of librosa version 0.10.0.
	It will be removed in librosa version 1.0.
  y, sr_native = __audioread_load(path, offset, duration, dtype)
  y, sr = librosa.load(file_pa

In [None]:
# Split data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize features (important for SVM and Neural Network)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Save features and scaler for later use
joblib.dump((X, y), "/content/drive/MyDrive/mfcc_features.pkl")
joblib.dump(scaler, "/content/drive/MyDrive/scaler.pkl")

['/content/drive/MyDrive/scaler.pkl']

In [None]:
# ======================
# Model 1: Random Forest
# ======================
print("\n" + "="*30)
print("=== Random Forest Model ===")
print("="*30)

rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)

# Evaluate
y_pred_rf = rf_model.predict(X_test)
print("\nTest Set Performance:")
print("Accuracy:", accuracy_score(y_test, y_pred_rf))
print("\nClassification Report:")
print(classification_report(y_test, y_pred_rf, target_names=["Real", "Fake"]))
print("\nConfusion Matrix:")
print(confusion_matrix(y_test, y_pred_rf))

# Cross-validation
print("\nCross-Validation Results:")
rf_scores = cross_val_score(rf_model, X, y, cv=5, scoring='accuracy')
print("Fold Accuracies:", rf_scores)
print("Mean Accuracy:", rf_scores.mean())
print("Std Deviation:", rf_scores.std())

# Save model
joblib.dump(rf_model, "/content/drive/MyDrive/voice_rf_model.pkl")


=== Random Forest Model ===

Test Set Performance:
Accuracy: 1.0

Classification Report:
              precision    recall  f1-score   support

        Real       1.00      1.00      1.00        96
        Fake       1.00      1.00      1.00       104

    accuracy                           1.00       200
   macro avg       1.00      1.00      1.00       200
weighted avg       1.00      1.00      1.00       200


Confusion Matrix:
[[ 96   0]
 [  0 104]]

Cross-Validation Results:
Fold Accuracies: [1.    1.    0.995 1.    1.   ]
Mean Accuracy: 0.999
Std Deviation: 0.0020000000000000018


['/content/drive/MyDrive/voice_rf_model.pkl']

In [None]:
# ======================
# Model 2: SVM
# ======================
print("\n" + "="*30)
print("=== Support Vector Machine ===")
print("="*30)

svm_model = make_pipeline(
    StandardScaler(),
    SVC(kernel='rbf', C=10, gamma='scale', random_state=42, probability=True)
)
svm_model.fit(X_train, y_train)

# Evaluate
y_pred_svm = svm_model.predict(X_test)
print("\nTest Set Performance:")
print("Accuracy:", accuracy_score(y_test, y_pred_svm))
print("\nClassification Report:")
print(classification_report(y_test, y_pred_svm, target_names=["Real", "Fake"]))
print("\nConfusion Matrix:")
print(confusion_matrix(y_test, y_pred_svm))

# Cross-validation
print("\nCross-Validation Results:")
svm_scores = cross_val_score(svm_model, X, y, cv=5, scoring='accuracy')
print("Fold Accuracies:", svm_scores)
print("Mean Accuracy:", svm_scores.mean())
print("Std Deviation:", svm_scores.std())

# Save model
joblib.dump(svm_model, "/content/drive/MyDrive/voice_svm_model.pkl")


=== Support Vector Machine ===

Test Set Performance:
Accuracy: 1.0

Classification Report:
              precision    recall  f1-score   support

        Real       1.00      1.00      1.00        96
        Fake       1.00      1.00      1.00       104

    accuracy                           1.00       200
   macro avg       1.00      1.00      1.00       200
weighted avg       1.00      1.00      1.00       200


Confusion Matrix:
[[ 96   0]
 [  0 104]]

Cross-Validation Results:
Fold Accuracies: [1. 1. 1. 1. 1.]
Mean Accuracy: 1.0
Std Deviation: 0.0


['/content/drive/MyDrive/voice_svm_model.pkl']

In [None]:
# ======================
# Model 3: XGBoost
# ======================
print("\n" + "="*30)
print("=== XGBoost Model ===")
print("="*30)

xgb_model = XGBClassifier(
    n_estimators=100,
    learning_rate=0.1,
    max_depth=5,
    subsample=0.8,
    colsample_bytree=0.8,
    random_state=42
)
xgb_model.fit(X_train, y_train)

# Evaluate
y_pred_xgb = xgb_model.predict(X_test)
print("\nTest Set Performance:")
print("Accuracy:", accuracy_score(y_test, y_pred_xgb))
print("\nClassification Report:")
print(classification_report(y_test, y_pred_xgb, target_names=["Real", "Fake"]))
print("\nConfusion Matrix:")
print(confusion_matrix(y_test, y_pred_xgb))

# Cross-validation
print("\nCross-Validation Results:")
xgb_scores = cross_val_score(xgb_model, X, y, cv=5, scoring='accuracy')
print("Fold Accuracies:", xgb_scores)
print("Mean Accuracy:", xgb_scores.mean())
print("Std Deviation:", xgb_scores.std())

# Save model
joblib.dump(xgb_model, "/content/drive/MyDrive/voice_xgb_model.pkl")


=== XGBoost Model ===

Test Set Performance:
Accuracy: 1.0

Classification Report:
              precision    recall  f1-score   support

        Real       1.00      1.00      1.00        96
        Fake       1.00      1.00      1.00       104

    accuracy                           1.00       200
   macro avg       1.00      1.00      1.00       200
weighted avg       1.00      1.00      1.00       200


Confusion Matrix:
[[ 96   0]
 [  0 104]]

Cross-Validation Results:
Fold Accuracies: [1.    0.995 0.995 1.    1.   ]
Mean Accuracy: 0.998
Std Deviation: 0.0024494897427831805


['/content/drive/MyDrive/voice_xgb_model.pkl']

In [None]:
# ======================
# Model 4: Neural Network
# ======================
print("\n" + "="*30)
print("=== Neural Network (MLP) ===")
print("="*30)

mlp_model = make_pipeline(
    StandardScaler(),
    MLPClassifier(
        hidden_layer_sizes=(100, 50),
        activation='relu',
        solver='adam',
        alpha=0.0001,
        batch_size='auto',
        learning_rate='constant',
        learning_rate_init=0.001,
        max_iter=500,
        random_state=42,
        early_stopping=True
    )
)
mlp_model.fit(X_train, y_train)

# Evaluate
y_pred_mlp = mlp_model.predict(X_test)
print("\nTest Set Performance:")
print("Accuracy:", accuracy_score(y_test, y_pred_mlp))
print("\nClassification Report:")
print(classification_report(y_test, y_pred_mlp, target_names=["Real", "Fake"]))
print("\nConfusion Matrix:")
print(confusion_matrix(y_test, y_pred_mlp))

# Cross-validation
print("\nCross-Validation Results:")
mlp_scores = cross_val_score(mlp_model, X, y, cv=5, scoring='accuracy')
print("Fold Accuracies:", mlp_scores)
print("Mean Accuracy:", mlp_scores.mean())
print("Std Deviation:", mlp_scores.std())

# Save model
joblib.dump(mlp_model, "/content/drive/MyDrive/voice_mlp_model.pkl")



=== Neural Network (MLP) ===

Test Set Performance:
Accuracy: 1.0

Classification Report:
              precision    recall  f1-score   support

        Real       1.00      1.00      1.00        96
        Fake       1.00      1.00      1.00       104

    accuracy                           1.00       200
   macro avg       1.00      1.00      1.00       200
weighted avg       1.00      1.00      1.00       200


Confusion Matrix:
[[ 96   0]
 [  0 104]]

Cross-Validation Results:
Fold Accuracies: [0.995 0.985 1.    1.    1.   ]
Mean Accuracy: 0.9960000000000001
Std Deviation: 0.0058309518948453055


['/content/drive/MyDrive/voice_mlp_model.pkl']

In [None]:
# =========================================
# Function to test new audio with all models
# =========================================
def test_new_audio(file_path):
    # Load models
    rf_model = joblib.load("/content/drive/MyDrive/voice_rf_model.pkl")
    svm_model = joblib.load("/content/drive/MyDrive/voice_svm_model.pkl")
    xgb_model = joblib.load("/content/drive/MyDrive/voice_xgb_model.pkl")
    mlp_model = joblib.load("/content/drive/MyDrive/voice_mlp_model.pkl")
    scaler = joblib.load("/content/drive/MyDrive/scaler.pkl")

    # Extract features
    new_feat = extract_mfcc_features(file_path)
    if new_feat is None:
        print("Error processing audio file")
        return

    new_feat_scaled = scaler.transform([new_feat])

    # Get predictions
    rf_pred = rf_model.predict([new_feat])[0]
    svm_pred = svm_model.predict(new_feat_scaled)[0]
    xgb_pred = xgb_model.predict([new_feat])[0]
    mlp_pred = mlp_model.predict(new_feat_scaled)[0]

    # Get probabilities
    rf_prob = rf_model.predict_proba([new_feat])[0]
    svm_prob = svm_model.predict_proba(new_feat_scaled)[0]
    xgb_prob = xgb_model.predict_proba([new_feat])[0]
    mlp_prob = mlp_model.predict_proba(new_feat_scaled)[0]

    print("\n" + "="*50)
    print(f"Predictions for: {file_path}")
    print("="*50)
    print(f"Random Forest: {'Real' if rf_pred == 0 else 'Fake'} (Confidence: {max(rf_prob)*100:.2f}%)")
    print(f"SVM: {'Real' if svm_pred == 0 else 'Fake'} (Confidence: {max(svm_prob)*100:.2f}%)")
    print(f"XGBoost: {'Real' if xgb_pred == 0 else 'Fake'} (Confidence: {max(xgb_prob)*100:.2f}%)")
    print(f"Neural Network: {'Real' if mlp_pred == 0 else 'Fake'} (Confidence: {max(mlp_prob)*100:.2f}%)")

# Example usage:
# test_new_audio("/content/drive/MyDrive/processed_audio_files/sample/Real/real_sample.wav")
# test_new_audio("/content/drive/MyDrive/processed_audio_files/sample/Fake/fake_sample.wav")

# =========================================
# Compare model performances
# =========================================
print("\n" + "="*50)
print("=== Model Performance Summary ===")
print("="*50)
print(f"{'Model':<20} {'Test Accuracy':<15} {'CV Mean Accuracy':<18} {'CV Std Dev':<10}")
print("-"*60)
print(f"{'Random Forest':<20} {accuracy_score(y_test, y_pred_rf):<15.4f} {rf_scores.mean():<18.4f} {rf_scores.std():<10.4f}")
print(f"{'SVM':<20} {accuracy_score(y_test, y_pred_svm):<15.4f} {svm_scores.mean():<18.4f} {svm_scores.std():<10.4f}")
print(f"{'XGBoost':<20} {accuracy_score(y_test, y_pred_xgb):<15.4f} {xgb_scores.mean():<18.4f} {xgb_scores.std():<10.4f}")
print(f"{'Neural Network':<20} {accuracy_score(y_test, y_pred_mlp):<15.4f} {mlp_scores.mean():<18.4f} {mlp_scores.std():<10.4f}")


=== Model Performance Summary ===
Model                Test Accuracy   CV Mean Accuracy   CV Std Dev
------------------------------------------------------------
Random Forest        1.0000          0.9990             0.0020    
SVM                  1.0000          1.0000             0.0000    
XGBoost              1.0000          0.9980             0.0024    
Neural Network       1.0000          0.9960             0.0058    


In [None]:
# First mount your Google Drive (if not already mounted)
from google.colab import drive
drive.mount('/content/drive')

# Now test your specific audio file
test_new_audio("/content/drive/MyDrive/processed_audio_files/Irtisum/Real/111.wav")

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).

Predictions for: /content/drive/MyDrive/processed_audio_files/Irtisum/Real/111.wav
Random Forest: Real (Confidence: 100.00%)
SVM: Real (Confidence: 98.79%)
XGBoost: Real (Confidence: 99.66%)
Neural Network: Real (Confidence: 52.93%)
