In [1]:
data_path="DeepFake Audio"

In [2]:
import os

In [3]:
def extract_features(file_path, mfcc=True, chroma=True, mel=True):
    audio, sample_rate = librosa.load(file_path, res_type='kaiser_fast')
    features = []
    if mfcc:
        mfccs = np.mean(librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=13), axis=1)
        features.extend(mfccs)
    if chroma:
        chroma = np.mean(librosa.feature.chroma_stft(y=audio, sr=sample_rate), axis=1)
        features.extend(chroma)
    if mel:
        mel = np.mean(librosa.feature.melspectrogram(y=audio, sr=sample_rate), axis=1)
        features.extend(mel)
    return features

In [4]:
import librosa
import numpy as np

In [5]:
def preprocess_dataset(data_path, labels):
    X = []
    y = []
    for label, folder in enumerate(labels):
        for filename in os.listdir(os.path.join(data_path, folder)):
            file_path = os.path.join(data_path, folder, filename)
            features = extract_features(file_path)
            X.append(features)
            y.append(label)
    return np.array(X), np.array(y)

In [6]:
labels = ["fake", "real"]

In [7]:
X, y = preprocess_dataset(data_path, labels)

In [8]:
y

array([0, 0, 0, ..., 1, 1, 1])

In [9]:
from sklearn.model_selection import train_test_split

In [10]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [11]:
from tensorflow.keras.models import Model,Sequential
from tensorflow.keras.layers import Input, Dense,Conv1D,MaxPool1D

In [12]:
import tensorflow as tf

In [13]:
model1 = tf.keras.Sequential([
    tf.keras.layers.LSTM(64, input_shape=(X_train.shape[1], 1))
    ,
   
    tf.keras.layers.Dense(1, activation='sigmoid', kernel_regularizer='l1')
])

In [14]:
model1.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])


In [15]:
model1.fit(X_train, y_train, epochs=10, batch_size=32, )

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x1c77ab9dea0>

In [16]:
model1.fit(X_train, y_train, epochs=20, batch_size=32, )

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x1c76a7a7430>

In [17]:
model1.fit(X_train, y_train, epochs=2, batch_size=32, )

Epoch 1/2
Epoch 2/2


<keras.callbacks.History at 0x1c76a7b75e0>

In [18]:
model1.evaluate(X_test,y_test)



[0.09205028414726257, 0.992790937423706]

In [19]:
model1.save('Deepfakeaudio99%ffit.h5')

In [103]:
file_path = 'voice5.mp3'
audio_features = extract_features(file_path)

In [104]:
X_array = np.array(audio_features)

In [105]:
X_reshaped = X_array.reshape(1, -1)

In [106]:
y_pred=model1.predict(X_reshaped)



In [107]:
y_pred

array([[0.02246037]], dtype=float32)

In [108]:
if y_pred > 0.5:
    predicted_class = 'human voice'
else:
    predicted_class = 'machine generated voice'

# Print the predicted class
print('Predicted class:', predicted_class,y_pred)

Predicted class: machine generated voice [[0.02246037]]


In [87]:
from sklearn.metrics import confusion_matrix, classification_report

# Make predictions on the test set
y_pred1 = model1.predict(X_test)

# Convert probabilities to class labels
y_pred_classes = (y_pred1 > 0.5).astype(int)

# Create a confusion matrix
conf_matrix = confusion_matrix(y_test, y_pred_classes)

print("Confusion Matrix:")
print(conf_matrix)

# Generate a classification report
print("Classification Report:")
print(classification_report(y_test, y_pred_classes))


Confusion Matrix:
[[506   3]
 [  4 458]]
Classification Report:
              precision    recall  f1-score   support

           0       0.99      0.99      0.99       509
           1       0.99      0.99      0.99       462

    accuracy                           0.99       971
   macro avg       0.99      0.99      0.99       971
weighted avg       0.99      0.99      0.99       971

