In [2]:
! python3 -m venv venv
! source venv/bin/activate
! pip install librosa scikit-learn numpy


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.0.1[0m[39;49m -> [0m[32;49m25.0.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [3]:
import librosa
import numpy as np

def extract_features(audio_file):
    # Load the audio file with a sample rate of 16 kHz
    y, sr = librosa.load(audio_file, sr=16000)
    
    # Extract MFCCs (Mel Frequency Cepstral Coefficients)
    mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=20)
    
    # Extract Spectral Centroid
    spectral_centroid = librosa.feature.spectral_centroid(y=y, sr=sr)
    
    # Return the mean of MFCCs and Spectral Centroid as features
    return np.mean(mfccs, axis=1), np.mean(spectral_centroid)


In [4]:
import os
from sklearn.model_selection import train_test_split

def prepare_dataset(real_folder, ai_folder):
    features = []
    labels = []
    
    # Process real voice files
    for filename in os.listdir(real_folder):
        if filename.endswith(".wav"):
            audio_file = os.path.join(real_folder, filename)
            mfccs, spectral_centroid = extract_features(audio_file)
            features.append(np.hstack([mfccs, spectral_centroid]))
            labels.append(0)  # Label for real voice
    
    # Process AI voice files
    for filename in os.listdir(ai_folder):
        if filename.endswith(".wav"):
            audio_file = os.path.join(ai_folder, filename)
            mfccs, spectral_centroid = extract_features(audio_file)
            features.append(np.hstack([mfccs, spectral_centroid]))
            labels.append(1)  # Label for AI-generated voice
    
    # Convert features and labels to numpy arrays
    return np.array(features), np.array(labels)

# Example usage
real_folder = 'path_to_real_voices'
ai_folder = 'path_to_ai_voices'
X, y = prepare_dataset('/Users/rahulphoolbhati/Downloads/KAGGLE/AUDIO/REAL', '/Users/rahulphoolbhati/Downloads/KAGGLE/AUDIO/FAKE')


In [5]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize the Random Forest Classifier``
model = RandomForestClassifier(n_estimators=100)

# Train the model
model.fit(X_train, y_train)

# Predict on the test set
y_pred = model.predict(X_test)
print(y_pred)

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
print("Detection Accuracy:", accuracy)


[1 1 1 1 1 1 1 1 1 1 1 1 1]
Detection Accuracy: 0.8461538461538461


In [None]:
mfccs, spectral_centroid = extract_features("./output.wav")
ftr = []
ftr.append(np.hstack([mfccs, spectral_centroid]))
ans = model.predict(ftr)
print(ans)


ValueError: Expected 2D array, got 1D array instead:
array=[-2.6433655e+02  1.1013890e+02  6.3031120e+00  3.7417469e+01
 -1.2978225e+01  1.6938864e+01 -5.2382469e+00 -4.0512395e+00
 -6.1013478e-01 -2.9767964e+00 -8.3992851e-01  2.6437075e+00
 -4.4463367e+00  2.6518536e+00 -1.1537710e+01  5.2956858e+00
 -7.2914233e+00 -2.1405914e+00 -4.3751705e-01 -2.9228559e+00
  1.4718126e+03].
Reshape your data either using array.reshape(-1, 1) if your data has a single feature or array.reshape(1, -1) if it contains a single sample.