<a href="https://colab.research.google.com/github/SarthakAgase/DeepFake-Voice-Recognition/blob/main/DeepFake_Voice_Recognition.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd

df = pd.read_csv("/content/DATASET-balanced.csv")

X = df.iloc[:,:-1]
y = df.iloc[:,-1]

print(X.head(10))
print(y.head(10))

   chroma_stft       rms  spectral_centroid  spectral_bandwidth      rolloff  \
0     0.338055  0.027948        2842.948867         4322.916759  6570.586186   
1     0.443766  0.037838        2336.129597         3445.777044  3764.949874   
2     0.302528  0.056578        2692.988386         2861.133180  4716.610271   
3     0.319933  0.031504        2241.665382         3503.766175  3798.641521   
4     0.420055  0.016158        2526.069123         3102.659519  5025.077899   
5     0.442880  0.012317        3952.880304         3702.717829  7104.089991   
6     0.453897  0.021782        4178.072150         3698.644769  7508.242075   
7     0.474154  0.011107        3993.039753         3948.154333  7872.563956   
8     0.602690  0.000970        3815.431438         3992.517515  6887.564689   
9     0.453962  0.017612        2894.560788         3435.434131  5663.232422   

   zero_crossing_rate       mfcc1       mfcc2      mfcc3      mfcc4  ...  \
0            0.041050 -462.169586   90.3112

In [None]:
from sklearn import preprocessing
lb = preprocessing.LabelBinarizer()
lb.fit(y)
y = lb.transform(y)
y = y.ravel()
print(y)

[0 0 0 ... 1 1 1]


In [None]:
from sklearn.ensemble import RandomForestClassifier
model = RandomForestClassifier(n_estimators=50, random_state=1)

from sklearn.model_selection import KFold
kf = KFold(n_splits=5,  shuffle=True, random_state=1)

print(model)
print("KFold splits: " + str(kf.get_n_splits(X)))

RandomForestClassifier(n_estimators=50, random_state=1)
KFold splits: 5


In [None]:
import time

import numpy as np

from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, matthews_corrcoef, roc_auc_score

acc_score = []
prec_score = []
rec_score = []
f1s = []
MCCs = []
ROCareas = []

start = time.time()
for train_index , test_index in kf.split(X):
    X_train , X_test = X.iloc[train_index,:],X.iloc[test_index,:]
    y_train , y_test = y[train_index] , y[test_index]

    model.fit(X_train,y_train)
    pred_values = model.predict(X_test)

    acc = accuracy_score(pred_values , y_test)
    acc_score.append(acc)

    prec = precision_score(y_test , pred_values, average="binary", pos_label=1)
    prec_score.append(prec)

    rec = recall_score(y_test , pred_values, average="binary", pos_label=1)
    rec_score.append(rec)

    f1 = f1_score(y_test , pred_values, average="binary", pos_label=1)
    f1s.append(f1)

    mcc = matthews_corrcoef(y_test , pred_values)
    MCCs.append(mcc)

    roc = roc_auc_score(y_test , pred_values)
    ROCareas.append(roc)

end = time.time()
timeTaken = (end - start)
print("Model trained in: " + str( round(timeTaken, 2) ) + " seconds.")

Model trained in: 14.58 seconds.


In [None]:
print("Mean results and (std.):\n")
print("Accuracy: " + str( round(np.mean(acc_score)*100, 3) ) + "% (" + str( round(np.std(acc_score)*100, 3) ) + ")\n")
print("Precision: " + str( round(np.mean(prec_score), 3) ) + " (" + str( round(np.std(prec_score), 3) ) + ")")
print("Recall: " + str( round(np.mean(rec_score), 3) ) + " (" + str( round(np.std(rec_score), 3) ) + ")")
print("F1-Score: " + str( round(np.mean(f1s), 3) ) + " (" + str( round(np.std(f1s), 3) ) + ")")
print("MCC: " + str( round(np.mean(MCCs), 3) ) + " (" + str( round(np.std(MCCs), 3) ) + ")")
print("ROC AUC: " + str( round(np.mean(ROCareas), 3) ) + " (" + str( round(np.std(ROCareas), 3) ) + ")")

Mean results and (std.):

Accuracy: 98.574% (0.272)

Precision: 0.993 (0.002)
Recall: 0.979 (0.006)
F1-Score: 0.986 (0.003)
MCC: 0.972 (0.005)
ROC AUC: 0.986 (0.003)
