In [1]:
data_path="for-rerecorded/training"

In [2]:
import os

In [3]:
def extract_features(file_path, mfcc=True, chroma=True, mel=True):
    audio, sample_rate = librosa.load(file_path, res_type='kaiser_fast')
    features = []
    if mfcc:
        mfccs = np.mean(librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=13), axis=1)
        features.extend(mfccs)
    if chroma:
        chroma = np.mean(librosa.feature.chroma_stft(y=audio, sr=sample_rate), axis=1)
        features.extend(chroma)
    if mel:
        mel = np.mean(librosa.feature.melspectrogram(y=audio, sr=sample_rate), axis=1)
        features.extend(mel)
    return features

In [4]:
import librosa
import numpy as np

In [5]:
def preprocess_dataset(data_path, labels):
    X = []
    y = []
    for label, folder in enumerate(labels):
        for filename in os.listdir(os.path.join(data_path, folder)):
            file_path = os.path.join(data_path, folder, filename)
            features = extract_features(file_path)
            X.append(features)
            y.append(label)
    return np.array(X), np.array(y)

In [6]:
labels = ["fake", "real"]

In [7]:
X, y = preprocess_dataset(data_path, labels)

In [8]:
from sklearn import preprocessing
lb = preprocessing.LabelBinarizer()
lb.fit(y)
y = lb.transform(y)
y = y.ravel()
print(y)

[0 0 0 ... 1 1 1]


In [10]:
from sklearn.model_selection import KFold
kf = KFold(n_splits=20,shuffle=True, random_state=1)


print("KFold splits: " + str(kf.get_n_splits(X)))

KFold splits: 20


In [12]:
import pandas as pd

In [20]:
df=pd.DataFrame(X)

In [21]:
df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,143,144,145,146,147,148,149,150,151,152
0,-174.739410,149.674957,4.881085,27.160982,-24.967499,13.991072,-6.967413,-13.227630,-4.476841,-29.154615,...,1.591820e-06,1.501739e-06,1.426454e-06,1.366229e-06,1.315959e-06,1.274486e-06,1.241887e-06,1.216509e-06,1.198733e-06,1.187351e-06
1,-152.178223,157.751358,-7.193079,25.628414,-11.556658,8.748322,-13.268344,-14.076997,-9.530208,-27.174404,...,3.866787e-07,3.608966e-07,3.396888e-07,3.227526e-07,3.088047e-07,2.975252e-07,2.883786e-07,2.816168e-07,2.766517e-07,2.735640e-07
2,-179.938049,146.828873,9.030097,35.061207,-10.734324,25.595644,-20.929335,-11.866779,-5.875903,-23.736719,...,3.173091e-06,2.952875e-06,2.773992e-06,2.633225e-06,2.517723e-06,2.423770e-06,2.350866e-06,2.294347e-06,2.255026e-06,2.230502e-06
3,-188.319366,122.357864,12.016122,45.064774,-10.562499,22.226219,-16.581192,-6.008571,-2.609559,-25.168190,...,5.313674e-06,5.010569e-06,4.756650e-06,4.552977e-06,4.382930e-06,4.242551e-06,4.131832e-06,4.045170e-06,3.985971e-06,3.947365e-06
4,-209.632187,147.403030,9.368010,50.477673,-9.735914,22.027719,-13.756542,-4.374958,-1.857701,-22.561447,...,5.889156e-06,5.528923e-06,5.229555e-06,4.990864e-06,4.792236e-06,4.629013e-06,4.501549e-06,4.401836e-06,4.332764e-06,4.288720e-06
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10203,-238.591690,173.822800,10.715055,25.958763,-20.301331,18.219460,-13.657253,-6.645621,-7.487351,-25.430511,...,8.796574e-07,8.179766e-07,7.675913e-07,7.277465e-07,6.949077e-07,6.682838e-07,6.474953e-07,6.314583e-07,6.201334e-07,6.131187e-07
10204,-197.566010,103.793015,-12.721643,46.791859,-41.127354,35.025249,-23.492632,-15.093642,5.178831,-29.095663,...,5.990125e-05,5.694286e-05,5.440795e-05,5.234831e-05,5.060157e-05,4.913831e-05,4.798665e-05,4.707507e-05,4.644002e-05,4.603627e-05
10205,-167.089218,127.497665,-45.335808,64.245201,-65.533997,49.812309,-20.352291,-1.899531,1.235591,-26.864250,...,1.012643e-05,9.529126e-06,9.032296e-06,8.635575e-06,8.305671e-06,8.033332e-06,7.820650e-06,7.653397e-06,7.539224e-06,7.465866e-06
10206,-241.339706,137.626709,0.025781,37.176109,-45.809105,11.524055,-27.977697,-17.065212,1.126157,-22.746202,...,4.253087e-06,3.993651e-06,3.778011e-06,3.605969e-06,3.463248e-06,3.346056e-06,3.254138e-06,3.182118e-06,3.132799e-06,3.100650e-06


In [24]:
X_train=df
   

In [25]:
X_train

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,143,144,145,146,147,148,149,150,151,152
0,-174.739410,149.674957,4.881085,27.160982,-24.967499,13.991072,-6.967413,-13.227630,-4.476841,-29.154615,...,1.591820e-06,1.501739e-06,1.426454e-06,1.366229e-06,1.315959e-06,1.274486e-06,1.241887e-06,1.216509e-06,1.198733e-06,1.187351e-06
1,-152.178223,157.751358,-7.193079,25.628414,-11.556658,8.748322,-13.268344,-14.076997,-9.530208,-27.174404,...,3.866787e-07,3.608966e-07,3.396888e-07,3.227526e-07,3.088047e-07,2.975252e-07,2.883786e-07,2.816168e-07,2.766517e-07,2.735640e-07
2,-179.938049,146.828873,9.030097,35.061207,-10.734324,25.595644,-20.929335,-11.866779,-5.875903,-23.736719,...,3.173091e-06,2.952875e-06,2.773992e-06,2.633225e-06,2.517723e-06,2.423770e-06,2.350866e-06,2.294347e-06,2.255026e-06,2.230502e-06
3,-188.319366,122.357864,12.016122,45.064774,-10.562499,22.226219,-16.581192,-6.008571,-2.609559,-25.168190,...,5.313674e-06,5.010569e-06,4.756650e-06,4.552977e-06,4.382930e-06,4.242551e-06,4.131832e-06,4.045170e-06,3.985971e-06,3.947365e-06
4,-209.632187,147.403030,9.368010,50.477673,-9.735914,22.027719,-13.756542,-4.374958,-1.857701,-22.561447,...,5.889156e-06,5.528923e-06,5.229555e-06,4.990864e-06,4.792236e-06,4.629013e-06,4.501549e-06,4.401836e-06,4.332764e-06,4.288720e-06
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10203,-238.591690,173.822800,10.715055,25.958763,-20.301331,18.219460,-13.657253,-6.645621,-7.487351,-25.430511,...,8.796574e-07,8.179766e-07,7.675913e-07,7.277465e-07,6.949077e-07,6.682838e-07,6.474953e-07,6.314583e-07,6.201334e-07,6.131187e-07
10204,-197.566010,103.793015,-12.721643,46.791859,-41.127354,35.025249,-23.492632,-15.093642,5.178831,-29.095663,...,5.990125e-05,5.694286e-05,5.440795e-05,5.234831e-05,5.060157e-05,4.913831e-05,4.798665e-05,4.707507e-05,4.644002e-05,4.603627e-05
10205,-167.089218,127.497665,-45.335808,64.245201,-65.533997,49.812309,-20.352291,-1.899531,1.235591,-26.864250,...,1.012643e-05,9.529126e-06,9.032296e-06,8.635575e-06,8.305671e-06,8.033332e-06,7.820650e-06,7.653397e-06,7.539224e-06,7.465866e-06
10206,-241.339706,137.626709,0.025781,37.176109,-45.809105,11.524055,-27.977697,-17.065212,1.126157,-22.746202,...,4.253087e-06,3.993651e-06,3.778011e-06,3.605969e-06,3.463248e-06,3.346056e-06,3.254138e-06,3.182118e-06,3.132799e-06,3.100650e-06


In [26]:
y

array([0, 0, 0, ..., 1, 1, 1])

In [33]:
y_train=y

In [27]:
data_path1="for-rerecorded/validation"

In [28]:
X1, y1 = preprocess_dataset(data_path1, labels)

In [29]:
from sklearn import preprocessing
lb1 = preprocessing.LabelBinarizer()
lb1.fit(y1)
y1 = lb.transform(y1)
y1 = y1.ravel()
print(y1)

[0 0 0 ... 1 1 1]


In [30]:
df1=pd.DataFrame(X1)

In [31]:
X_test=df1

In [32]:
X_test

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,143,144,145,146,147,148,149,150,151,152
0,-256.103912,98.469414,22.013477,52.473427,-19.976601,12.298381,-13.486473,-21.096678,-8.685554,-14.394847,...,5.152135e-06,4.850130e-06,4.598218e-06,4.396695e-06,4.228468e-06,4.089994e-06,3.981520e-06,3.896523e-06,3.837688e-06,3.799962e-06
1,-300.186493,85.507095,20.787426,23.039135,2.667248,13.296325,-2.531562,1.466273,-0.258310,-9.347284,...,1.021982e-05,9.616067e-06,9.112850e-06,8.710424e-06,8.375203e-06,8.097788e-06,7.881747e-06,7.711950e-06,7.595052e-06,7.520370e-06
2,-157.332275,127.011574,-5.880052,43.091164,-18.972185,23.088911,-13.768730,-11.887770,-1.636095,-30.187855,...,3.442305e-06,3.239278e-06,3.070470e-06,2.935717e-06,2.823577e-06,2.731393e-06,2.658706e-06,2.602187e-06,2.563323e-06,2.538142e-06
3,-276.085114,133.718170,27.578678,36.164024,1.329927,-0.267277,-0.001654,-10.962516,-5.820240,-11.429310,...,7.507843e-06,7.083778e-06,6.728964e-06,6.445035e-06,6.208104e-06,6.011506e-06,5.857601e-06,5.736358e-06,5.654464e-06,5.600895e-06
4,-192.768143,116.761581,8.483458,39.969315,-18.644230,30.744694,-20.441114,-3.995106,-4.845390,-27.480997,...,1.530146e-05,1.439213e-05,1.363423e-05,1.303029e-05,1.252546e-05,1.211058e-05,1.178593e-05,1.153132e-05,1.135512e-05,1.124303e-05
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2239,-170.995926,131.567947,4.367309,34.149075,-21.463945,19.826168,-9.597502,-8.316634,2.036535,-17.701063,...,1.125740e-06,1.064263e-06,1.011978e-06,9.696928e-07,9.341181e-07,9.046452e-07,8.812943e-07,8.630064e-07,8.504387e-07,8.422486e-07
2240,-276.502716,135.377914,10.197211,30.170574,-22.439758,6.364676,-15.692318,-24.409771,0.408411,-8.242374,...,2.322224e-06,2.187234e-06,2.074446e-06,1.984408e-06,1.909373e-06,1.847081e-06,1.798585e-06,1.760399e-06,1.734200e-06,1.717432e-06
2241,-203.477753,148.898331,3.335640,31.260937,-23.842278,10.415450,-18.996857,-33.429424,-3.545755,-22.562578,...,4.251870e-07,3.897166e-07,3.615536e-07,3.396982e-07,3.221379e-07,3.080289e-07,2.970529e-07,2.887397e-07,2.830752e-07,2.793792e-07
2242,-278.188599,115.198441,15.943944,58.600094,-32.497604,25.483486,-7.505641,-18.275711,4.931672,-24.413553,...,1.841635e-06,1.731091e-06,1.639023e-06,1.565354e-06,1.504025e-06,1.453219e-06,1.413930e-06,1.382906e-06,1.361379e-06,1.347869e-06


In [34]:
y_test=y1

In [35]:
from sklearn.ensemble import HistGradientBoostingClassifier

In [36]:
clf1=HistGradientBoostingClassifier()

In [37]:
clf1.fit(X_train,y_train)

In [38]:
clf1.score(X_train,y_train)

0.9964733542319749

In [39]:
clf1.score(X_test,y_test)

0.9630124777183601

In [40]:
from sklearn.neural_network import MLPClassifier

In [41]:
clf2=MLPClassifier()

In [42]:
clf2.fit(X_train,y_train)

In [43]:
clf2.score(X_train,y_train)

0.9640478056426333

In [44]:
clf2.score(X_test,y_test)

0.9322638146167558

In [45]:
from sklearn.tree import DecisionTreeClassifier

In [46]:
clf3=DecisionTreeClassifier()

In [47]:
clf3.fit(X_train,y_train)

In [48]:
clf3.score(X_train,y_train)

1.0

In [50]:
clf3.score(X_test,y_test)

0.8685383244206774

In [51]:
from sklearn.ensemble import GradientBoostingClassifier

In [54]:
clf4=GradientBoostingClassifier()

In [55]:
clf4.fit(X_train,y_train)

In [56]:
clf4.score(X_train,y_train)

0.9552311912225705

In [57]:
clf4.score(X_test,y_test)

0.9295900178253119

In [58]:
from sklearn.ensemble import RandomForestClassifier

In [59]:
clf5=RandomForestClassifier().fit(X_train,y_train)

In [60]:
clf5.score(X_train,y_train)

1.0

In [61]:
clf5.score(X_test,y_test)

0.9474153297682709

In [62]:
from sklearn.ensemble import BaggingClassifier

In [63]:
clf8=BaggingClassifier().fit(X_train,y_train)

In [64]:
clf8.score(X_train,y_train)

0.9959835423197492

In [65]:
clf8.score(X_test,y_test)

0.9166666666666666

In [66]:
from sklearn.ensemble import ExtraTreesClassifier

In [67]:
clf9=ExtraTreesClassifier().fit(X_train,y_train)

In [69]:
clf9.score(X_train,y_train)

1.0

In [70]:
clf9.score(X_test,y_test)

0.9442959001782532

In [80]:
from sklearn.linear_model import ARDRegression  

In [81]:
clf10=ARDRegression().fit(X_train,y_train)

In [82]:
clf10.score(X_train,y_train)

0.20888373497188262

In [83]:
clf10.score(X_test,y_test)

0.2264310955709239

In [84]:
from sklearn.linear_model import LassoLars

In [85]:
clf11=LassoLars().fit(X_train,y_train)

In [86]:
clf11.score(X_train,y_train)

0.32394919036129166

In [87]:
clf11.score(X_test,y_test)

0.303671102609378

In [88]:
from sklearn.linear_model import BayesianRidge

In [89]:
clf12=BayesianRidge().fit(X_train,y_train)

In [92]:
clf12.score(X_train,y_train)

0.5682459050009452

In [93]:
clf12.score(X_test,y_test)

0.5394146415387857

In [94]:
from sklearn.metrics import confusion_matrix, classification_report

# Make predictions on the test set
y_pred = clf1.predict(X_test)

# Convert probabilities to class labels
y_pred_classes = (y_pred > 0.5).astype(int)

# Create a confusion matrix
conf_matrix = confusion_matrix(y_test, y_pred_classes)

print("Confusion Matrix:")
print(conf_matrix)

# Generate a classification report
print("Classification Report:")
print(classification_report(y_test, y_pred_classes))


Confusion Matrix:
[[1099   44]
 [  39 1062]]
Classification Report:
              precision    recall  f1-score   support

           0       0.97      0.96      0.96      1143
           1       0.96      0.96      0.96      1101

    accuracy                           0.96      2244
   macro avg       0.96      0.96      0.96      2244
weighted avg       0.96      0.96      0.96      2244



In [212]:
file_path = 'for-rerecorded/training' 
audio_features = extract_features(file_path)

In [213]:
audio_features

[-433.09164,
 114.70283,
 -4.2901425,
 42.665794,
 -1.0050657,
 26.406828,
 -0.16687989,
 -5.443137,
 -17.775555,
 -15.498422,
 -6.3406644,
 -3.3481221,
 3.9594162,
 0.46514243,
 0.40930822,
 0.38471118,
 0.30481866,
 0.30138382,
 0.45587727,
 0.4639369,
 0.49859858,
 0.4691161,
 0.4529384,
 0.39726946,
 0.43085295,
 0.0036528416,
 0.0081720995,
 0.11260032,
 1.2847229,
 5.6107454,
 2.680298,
 1.2997361,
 2.1827934,
 2.1269157,
 2.2760015,
 0.89660263,
 1.3573629,
 1.9848173,
 1.9348806,
 1.9827057,
 1.0319008,
 1.0026878,
 0.7532252,
 1.1135602,
 0.7703646,
 0.2737987,
 0.26682475,
 0.2545441,
 0.16744696,
 0.30978003,
 0.11673952,
 0.033139955,
 0.016130302,
 0.009352227,
 0.013990659,
 0.009826947,
 0.008063572,
 0.0121949185,
 0.03069506,
 0.09864114,
 0.0832671,
 0.115231365,
 0.060978767,
 0.030743245,
 0.056595482,
 0.049398746,
 0.024130322,
 0.028259654,
 0.04566038,
 0.04713849,
 0.029510938,
 0.024691993,
 0.04202049,
 0.06722937,
 0.02849167,
 0.024319613,
 0.022636015,
 0.

In [214]:
X_array = np.array(audio_features)

In [215]:
X_reshaped = X_array.reshape(1, -1)

In [216]:
y_pred=clf1.predict(X_reshaped)

In [217]:
y_pred

array([1])

In [218]:
predicted_labels = lb.inverse_transform(y_pred)

# Print the predictions
print("Predicted Label:", predicted_labels)

Predicted Label: [1]


In [219]:
if predicted_labels[0] == 1:
    print("The audio is classified as real.")
else:
    print("The audio is classified as fake.")

The audio is classified as real.


In [138]:
import joblib

In [139]:
joblib.dump(clf1, 'deepfakevoiceperhistboostinggrediant.h5')

['deepfakevoiceperhistboostinggrediant.h5']

In [140]:
model=joblib.load('deepfakevoiceperhistboostinggrediant.h5')

In [141]:
model

In [142]:
y_pred=model.predict(X_reshaped)

In [143]:
y_pred

array([0])