In [None]:
!pip install datasets librosa scikit-learn tensorflow torch torchaudio matplotlib seaborn streamlit



Collecting streamlit
  Downloading streamlit-1.52.0-py3-none-any.whl.metadata (9.8 kB)
Collecting pydeck<1,>=0.8.0b4 (from streamlit)
  Downloading pydeck-0.9.1-py2.py3-none-any.whl.metadata (4.1 kB)
Downloading streamlit-1.52.0-py3-none-any.whl (9.0 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m9.0/9.0 MB[0m [31m53.2 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pydeck-0.9.1-py2.py3-none-any.whl (6.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.9/6.9 MB[0m [31m53.4 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pydeck, streamlit
Successfully installed pydeck-0.9.1 streamlit-1.52.0


In [1]:
#Because we have rate limit so we download it by using git command
!sudo apt-get install git-lfs
!git lfs install
!git clone https://huggingface.co/datasets/CSALT/deepfake_detection_dataset_urdu


Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
git-lfs is already the newest version (3.0.2-1ubuntu0.3).
0 upgraded, 0 newly installed, 0 to remove and 41 not upgraded.
Git LFS initialized.
Cloning into 'deepfake_detection_dataset_urdu'...
remote: Enumerating objects: 6796, done.[K
remote: Counting objects: 100% (3/3), done.[K
remote: Compressing objects: 100% (3/3), done.[K
remote: Total 6796 (delta 0), reused 0 (delta 0), pack-reused 6793 (from 1)[K
Receiving objects: 100% (6796/6796), 957.64 KiB | 1.33 MiB/s, done.
Resolving deltas: 100% (2/2), done.
Updating files: 100% (6796/6796), done.
Filtering content: 100% (6794/6794), 1.82 GiB | 20.96 MiB/s, done.


In [9]:
import os
import pandas as pd

base_path = "/content/deepfake_detection_dataset_urdu"  # change if needed

data = []

# 1. Bonafide = class 0
for root, dirs, files in os.walk(os.path.join(base_path, "Bonafide")):
    for f in files:
        if f.lower().endswith(".wav"):
            data.append([os.path.join(root, f), 0])

# 2. Spoofed_TTS = class 1
for root, dirs, files in os.walk(os.path.join(base_path, "Spoofed_TTS")):
    for f in files:
        if f.lower().endswith(".wav"):
            data.append([os.path.join(root, f), 1])

# 3. Spoofed_Tacotron = class 1
for root, dirs, files in os.walk(os.path.join(base_path, "Spoofed_Tacotron")):
    for f in files:
        if f.lower().endswith(".wav"):
            data.append([os.path.join(root, f), 1])

df = pd.DataFrame(data, columns=["file_path", "label"])
print(df.head())
print(df['label'].value_counts())


                                           file_path  label
0  /content/deepfake_detection_dataset_urdu/Bonaf...      0
1  /content/deepfake_detection_dataset_urdu/Bonaf...      0
2  /content/deepfake_detection_dataset_urdu/Bonaf...      0
3  /content/deepfake_detection_dataset_urdu/Bonaf...      0
4  /content/deepfake_detection_dataset_urdu/Bonaf...      0
label
0    3398
1    3396
Name: count, dtype: int64


In [10]:
import librosa
import numpy as np
from tqdm import tqdm

def extract_features(file_path, max_len=5, sr_target=16000):
    # Load audio
    y, sr = librosa.load(file_path, sr=sr_target)

    # Fix length (pad or cut)
    max_samples = max_len * sr
    if len(y) < max_samples:
        y = np.pad(y, (0, max_samples - len(y)))
    else:
        y = y[:max_samples]

    # ---- Feature extraction ----

    # 1. MFCC
    mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
    mfcc = mfcc.flatten()

    # 2. Mel Spectrogram
    mel = librosa.feature.melspectrogram(y=y, sr=sr)
    mel = librosa.power_to_db(mel)
    mel = mel.flatten()

    # 3. Chroma
    chroma = librosa.feature.chroma_stft(y=y, sr=sr)
    chroma = chroma.flatten()

    # Combine all features
    combined = np.hstack([mfcc, mel, chroma])

    return combined


In [11]:
X = []
y = []

for i, row in tqdm(df.iterrows(), total=len(df)):
    try:
        feats = extract_features(row["file_path"])
        X.append(feats)
        y.append(row["label"])
    except:
        print("Error:", row["file_path"])


  return pitch_tuning(
100%|██████████| 6794/6794 [06:19<00:00, 17.92it/s]


In [12]:
#Convert to Numpy Arrays
X = np.array(X)
y = np.array(y)

print("X shape:", X.shape)
print("y shape:", y.shape)



X shape: (6794, 24021)
y shape: (6794,)


In [13]:
#Train Test Split
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)


In [14]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)


In [15]:
# Train SVM Model
from sklearn.svm import SVC

model_svm = SVC(probability=True)
model_svm.fit(X_train_scaled, y_train)


In [16]:
from sklearn.linear_model import LogisticRegression
from sklearn.linear_model import Perceptron

#Logistric Regression
model_lr = LogisticRegression(max_iter=500)
model_lr.fit(X_train_scaled, y_train)

#Perceptron Single Layer

model_percep = Perceptron()
model_percep.fit(X_train_scaled, y_train)


In [17]:
import tensorflow as tf
from tensorflow.keras import layers, models

model_dnn = models.Sequential([
    layers.Input(shape=(X_train_scaled.shape[1],)),
    layers.Dense(256, activation='relu'),
    layers.Dense(128, activation='relu'),
    layers.Dense(1, activation='sigmoid')
])

model_dnn.compile(optimizer='adam',
                  loss='binary_crossentropy',
                  metrics=['accuracy'])

model_dnn.fit(
    X_train_scaled, y_train,
    epochs=15,
    batch_size=32,
    validation_split=0.2
)


Epoch 1/15
[1m136/136[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 91ms/step - accuracy: 0.8670 - loss: 1.6590 - val_accuracy: 0.9172 - val_loss: 0.7827
Epoch 2/15
[1m136/136[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 86ms/step - accuracy: 0.9816 - loss: 0.1535 - val_accuracy: 0.9623 - val_loss: 0.4182
Epoch 3/15
[1m136/136[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 89ms/step - accuracy: 0.9793 - loss: 0.3482 - val_accuracy: 0.9779 - val_loss: 0.3492
Epoch 4/15
[1m136/136[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 82ms/step - accuracy: 0.9859 - loss: 0.1806 - val_accuracy: 0.9706 - val_loss: 0.9255
Epoch 5/15
[1m136/136[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 87ms/step - accuracy: 0.9719 - loss: 0.9285 - val_accuracy: 0.9706 - val_loss: 1.4313
Epoch 6/15
[1m136/136[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 86ms/step - accuracy: 0.9889 - loss: 0.2973 - val_accuracy: 0.9733 - val_loss: 0.3982
Epoch 7/15
[1m1

<keras.src.callbacks.history.History at 0x7a6f902aaff0>

In [18]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score

def evaluate(model, X_test, y_test, use_proba=True):
    y_pred = model.predict(X_test)

    if use_proba:
        y_prob = model.predict_proba(X_test)[:,1]
    else:
        y_prob = y_pred

    print("Accuracy :", accuracy_score(y_test, y_pred))
    print("Precision:", precision_score(y_test, y_pred))
    print("Recall   :", recall_score(y_test, y_pred))
    print("F1-score :", f1_score(y_test, y_pred))
    print("AUC-ROC  :", roc_auc_score(y_test, y_prob))


In [19]:
# Evulation of SVM Model
evaluate(model_svm, X_test_scaled, y_test)


Accuracy : 0.9926416482707874
Precision: 0.9926362297496318
Recall   : 0.9926362297496318
F1-score : 0.9926362297496318
AUC-ROC  : 0.9998657194836698


In [20]:
evaluate(model_lr, X_test_scaled, y_test)


Accuracy : 0.9484915378955114
Precision: 0.9511111111111111
Recall   : 0.9455081001472754
F1-score : 0.948301329394387
AUC-ROC  : 0.9875054145369488


In [21]:
evaluate(model_percep, X_test_scaled, y_test, use_proba=False)


Accuracy : 0.9300956585724798
Precision: 0.937125748502994
Recall   : 0.9219440353460973
F1-score : 0.9294729027468448
AUC-ROC  : 0.9300896647318722


In [22]:
y_pred = (model_dnn.predict(X_test_scaled) > 0.5).astype(int)
y_prob = model_dnn.predict(X_test_scaled)

print("Accuracy :", accuracy_score(y_test, y_pred))
print("Precision:", precision_score(y_test, y_pred))
print("Recall   :", recall_score(y_test, y_pred))
print("F1-score :", f1_score(y_test, y_pred))
print("AUC-ROC  :", roc_auc_score(y_test, y_prob))


[1m43/43[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 38ms/step
[1m43/43[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 43ms/step
Accuracy : 0.9889624724061811
Precision: 0.9853801169590644
Recall   : 0.9926362297496318
F1-score : 0.9889948642699926
AUC-ROC  : 0.9966819717577752
