In [None]:
import os
import numpy as np
import pandas as pd
import librosa
import time

# Configuration
DATASET_PATH = "for-norm"
CSV_FEATURES_PATH = "../features/audio_features.csv"
SAMPLE_RATE = 22050
DURATION = 10.0

def extract_features(file_path):
    """Extract important audio features for synthetic voice detection"""
    try:
        # Load audio file
        y, sr = librosa.load(file_path, duration=DURATION, sr=SAMPLE_RATE)
        
        # Initialize feature dictionary
        features = {}
        
        # Short-time Fourier Transform
        stft = np.abs(librosa.stft(y))
        
        # 1. Spectral features (most important for synthetic voice detection)
        spectral_centroid = librosa.feature.spectral_centroid(y=y, sr=sr)
        spectral_bandwidth = librosa.feature.spectral_bandwidth(y=y, sr=sr)
        spectral_rolloff = librosa.feature.spectral_rolloff(y=y, sr=sr)
        
        features['spectral_centroid_mean'] = np.mean(spectral_centroid)
        features['spectral_centroid_std'] = np.std(spectral_centroid)
        features['spectral_bandwidth_mean'] = np.mean(spectral_bandwidth)
        features['spectral_bandwidth_std'] = np.std(spectral_bandwidth)
        features['spectral_rolloff_mean'] = np.mean(spectral_rolloff)
        features['spectral_rolloff_std'] = np.std(spectral_rolloff)
        
        # 2. Zero crossing rate (helps detect artificial smoothness)
        zcr = librosa.feature.zero_crossing_rate(y)
        features['zcr_mean'] = np.mean(zcr)
        features['zcr_std'] = np.std(zcr)
        
        # 3. Harmonic and percussive components (synthetic voices often lack natural harmonics)
        y_harmonic = librosa.effects.harmonic(y)
        y_percussive = librosa.effects.percussive(y)
        
        features['harmonic_mean'] = np.mean(y_harmonic)
        features['harmonic_std'] = np.std(y_harmonic)
        features['percussive_mean'] = np.mean(y_percussive)
        features['percussive_std'] = np.std(y_percussive)
        
        # 4. RMS energy dynamics
        rms = librosa.feature.rms(y=y)
        features['rms_mean'] = np.mean(rms)
        features['rms_std'] = np.std(rms)
        features['rms_dynamic_range'] = np.max(rms) - np.min(rms)
        
        return features
    
    except Exception as e:
        print(f"Error processing {file_path}: {str(e)}")
        return None

def create_dataset():
    """Create dataset by extracting features from all audio files"""
    data = []
    
    for label in ['fake', 'real']:
        folder_path = os.path.join(DATASET_PATH, label)
        print(f"Processing {label} files...")
        
        for i, filename in enumerate(os.listdir(folder_path)):
            if filename.endswith('.wav'):
                file_path = os.path.join(folder_path, filename)
                features = extract_features(file_path)
                
                if features:
                    features['label'] = 1 if label == 'fake' else 0
                    data.append(features)
                
                # Print progress
                if (i+1) % 100 == 0:
                    print(f"Processed {i+1} {label} files")
    
    # Convert to DataFrame and save
    df = pd.DataFrame(data)
    df.to_csv(CSV_FEATURES_PATH, index=False)
    print(f"Dataset created with {len(df)} samples. Saved to {CSV_FEATURES_PATH}")

if __name__ == "__main__":
    start_time = time.time()
    create_dataset()
    print(f"Feature extraction completed in {time.time() - start_time:.2f} seconds")

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score
import joblib
import time

# Configuration
CSV_FEATURES_PATH = "../features/audio_features.csv"
MODEL_PATH = "../models/10sec.pkl"
SCALER_PATH = "../models/10sec_scaler.pkl"

def load_data():
    """Load features from CSV file"""
    print("Loading features from CSV...")
    df = pd.read_csv(CSV_FEATURES_PATH)
    print(f"Loaded {len(df)} samples with {df.shape[1]-1} features each")
    return df

def train_and_evaluate(df):
    """Train and evaluate a model on the extracted features"""
    # Prepare data
    X = df.drop('label', axis=1)
    y = df['label']
    
    # Split data
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.2, random_state=42, stratify=y)
    
    # Scale features
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)
    
    # Train Random Forest classifier
    print("\nTraining Random Forest classifier...")
    start_time = time.time()
    rf = RandomForestClassifier(
        n_estimators=100,
        random_state=42,
        n_jobs=-1,
        max_depth=10,
        min_samples_split=5
    )
    rf.fit(X_train_scaled, y_train)
    training_time = time.time() - start_time
    
    # Evaluate
    y_pred = rf.predict(X_test_scaled)
    accuracy = accuracy_score(y_test, y_pred)
    report = classification_report(y_test, y_pred, target_names=['real', 'fake'])
    
    print(f"\nTraining completed in {training_time:.2f} seconds")
    print(f"Accuracy: {accuracy:.4f}")
    print("Classification Report:")
    print(report)
    
    # Save model and scaler
    joblib.dump(rf, MODEL_PATH)
    joblib.dump(scaler, SCALER_PATH)
    print(f"\nModel saved to {MODEL_PATH}")
    print(f"Scaler saved to {SCALER_PATH}")

if __name__ == "__main__":
    df = load_data()
    train_and_evaluate(df)

Loading features from CSV...
Loaded 10798 samples with 15 features each

Training Random Forest classifier...

Training completed in 0.47 seconds
Accuracy: 0.9162
Classification Report:
              precision    recall  f1-score   support

        real       0.93      0.90      0.92      1080
        fake       0.91      0.93      0.92      1080

    accuracy                           0.92      2160
   macro avg       0.92      0.92      0.92      2160
weighted avg       0.92      0.92      0.92      2160


Model saved to deepfake_audio_detector.pkl
Scaler saved to feature_scaler.pkl


In [8]:
import librosa
import numpy as np
import pandas as pd
import joblib
from sklearn.preprocessing import StandardScaler
import os

# Configuration
MODEL_PATH = "deepfake_audio_detector.pkl"
SCALER_PATH = "feature_scaler.pkl"
SAMPLE_RATE = 22050
DURATION = 10.0  # Should match what you used during training

def extract_features(file_path):
    """Extract the same features used during training"""
    try:
        # Load audio file
        y, sr = librosa.load(file_path, duration=DURATION, sr=SAMPLE_RATE)
        
        # Initialize feature dictionary
        features = {}
        
        # Short-time Fourier Transform
        stft = np.abs(librosa.stft(y))
        
        # 1. Spectral features
        spectral_centroid = librosa.feature.spectral_centroid(y=y, sr=sr)
        spectral_bandwidth = librosa.feature.spectral_bandwidth(y=y, sr=sr)
        spectral_rolloff = librosa.feature.spectral_rolloff(y=y, sr=sr)
        
        features['spectral_centroid_mean'] = np.mean(spectral_centroid)
        features['spectral_centroid_std'] = np.std(spectral_centroid)
        features['spectral_bandwidth_mean'] = np.mean(spectral_bandwidth)
        features['spectral_bandwidth_std'] = np.std(spectral_bandwidth)
        features['spectral_rolloff_mean'] = np.mean(spectral_rolloff)
        features['spectral_rolloff_std'] = np.std(spectral_rolloff)
        
        # 2. Zero crossing rate
        zcr = librosa.feature.zero_crossing_rate(y)
        features['zcr_mean'] = np.mean(zcr)
        features['zcr_std'] = np.std(zcr)
        
        # 3. Harmonic and percussive components
        y_harmonic = librosa.effects.harmonic(y)
        y_percussive = librosa.effects.percussive(y)
        
        features['harmonic_mean'] = np.mean(y_harmonic)
        features['harmonic_std'] = np.std(y_harmonic)
        features['percussive_mean'] = np.mean(y_percussive)
        features['percussive_std'] = np.std(y_percussive)
        
        # 4. RMS energy dynamics
        rms = librosa.feature.rms(y=y)
        features['rms_mean'] = np.mean(rms)
        features['rms_std'] = np.std(rms)
        features['rms_dynamic_range'] = np.max(rms) - np.min(rms)
        
        return features
    
    except Exception as e:
        print(f"Error processing {file_path}: {str(e)}")
        return None

def predict_audio(file_path, model, scaler):
    """Predict whether an audio file is real or fake"""
    # Extract features
    features = extract_features(file_path)
    if features is None:
        return None
    
    # Convert to DataFrame (single row)
    features_df = pd.DataFrame([features])
    
    # Scale features using the saved scaler
    features_scaled = scaler.transform(features_df)
    
    # Make prediction
    prediction = model.predict(features_scaled)
    proba = model.predict_proba(features_scaled)
    
    # Return results
    return {
        'file': os.path.basename(file_path),
        'prediction': 'fake' if prediction[0] == 1 else 'real',
        'confidence': float(np.max(proba)),
        'fake_probability': float(proba[0][1]),
        'real_probability': float(proba[0][0])
    }

def main():
    # Load model and scaler
    print("Loading model and scaler...")
    model = joblib.load(MODEL_PATH)
    scaler = joblib.load(SCALER_PATH)
    print("Model loaded successfully!")
    
    # Get input file from user
    file_path = input("Enter path to audio file (.wav): ").strip()
    
    # Check file exists
    if not os.path.exists(file_path):
        print("Error: File not found!")
        return
    
    # Make prediction
    result = predict_audio(file_path, model, scaler)
    
    if result:
        print("\nPrediction Results:")
        print(f"File: {result['file']}")
        print(f"Prediction: {result['prediction']}")
        print(f"Confidence: {result['confidence']:.2%}")
        print(f"Fake Probability: {result['fake_probability']:.2%}")
        print(f"Real Probability: {result['real_probability']:.2%}")
    else:
        print("Failed to process the audio file.")

if __name__ == "__main__":
    main()

Loading model and scaler...
Model loaded successfully!

Prediction Results:
File: mle.wav
Prediction: real
Confidence: 79.31%
Fake Probability: 20.69%
Real Probability: 79.31%
