In [1]:
import os
import numpy as np
import librosa


In [2]:
def extract_mfcc_features(file_path, n_mfcc=13):
    y,sr = librosa.load(file_path,sr=16000)
    mfcc = librosa.feature.mfcc(
        y=y,
        sr=sr,
        n_mfcc = n_mfcc,
        n_fft = 2048,
        hop_length = 512
    )

    mfcc_mean = np.mean(mfcc , axis=1)
    mfcc_std = np.std(mfcc, axis =1)

    features = np.hstack((mfcc_mean, mfcc_std))
    return features,mfcc

 

In [3]:
BASE_PATH = "DATASET_BIG\\for-norm\\for-norm"
print(os.listdir(BASE_PATH))

['testing', 'training', 'validation']


In [4]:
BASE_PATH = "DATASET_BIG\\for-norm\\for-norm"
TRAIN_FAKE = os.path.join(BASE_PATH,"training","fake")
TRAIN_REAL = os.path.join(BASE_PATH,"training","real")

TEST_FAKE = os.path.join(BASE_PATH,"testing","fake")
TEST_REAL = os.path.join(BASE_PATH,"testing","real")

VALIDATE_FAKE = os.path.join(BASE_PATH,"validation","fake")
VALIDATE_REAL = os.path.join(BASE_PATH,"validation","real")

In [5]:
def build_dataset(fake_dir,real_dir):
    X,y=[],[]
    for file in os.listdir(fake_dir):
        if file.endswith(".wav"):
            path = os.path.join(fake_dir,file)
            features , _ = extract_mfcc_features(path)
            X.append(features)
            y.append(0)

    for file in os.listdir(real_dir):
        if file.endswith(".wav"):
            path = os.path.join(real_dir,file)
            features,_=extract_mfcc_features(path)
            X.append(features)
            y.append(1)


    return np.array(X), np.array(y)


In [7]:
X_train , y_train = build_dataset(TRAIN_FAKE,TRAIN_REAL)
X_test , y_test = build_dataset(TEST_FAKE,TEST_REAL)
X_validate, y_validate = build_dataset(VALIDATE_FAKE,VALIDATE_REAL)

np.save("X_train.npy", X_train)
np.save("y_train.npy", y_train)

np.save("X_val.npy", X_validate)
np.save("y_val.npy", y_validate)

np.save("X_test.npy", X_test)
np.save("y_test.npy", y_test)





In [8]:
X_train = np.load("X_train.npy")
y_train = np.load("y_train.npy")

X_val = np.load("X_val.npy")
y_val = np.load("y_val.npy")

X_test = np.load("X_test.npy")
y_test = np.load("y_test.npy")


In [9]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
X_train_s = scaler.fit_transform(X_train)
X_val_s = scaler.transform(X_val)
X_test_s = scaler.transform(X_test)
    

In [12]:
svm = SVC(
    kernel="rbf",
    C=1.0,
    gamma="scale",
    class_weight="balanced"
)

svm.fit(X_train_s, y_train)
y_val_pred = svm.predict(X_val_s)

from sklearn.metrics import classification_report, confusion_matrix
print(classification_report(y_val, y_val_pred, target_names=["FAKE", "REAL"]))
print(confusion_matrix(y_val, y_val_pred))


              precision    recall  f1-score   support

        FAKE       0.99      0.99      0.99      5398
        REAL       0.99      0.99      0.99      5400

    accuracy                           0.99     10798
   macro avg       0.99      0.99      0.99     10798
weighted avg       0.99      0.99      0.99     10798

[[5356   42]
 [  74 5326]]


In [11]:
from sklearn.metrics import classification_report, confusion_matrix
y_val_pred = svm.predict(X_val_s)
print("validation results")
print(classification_report(y_val,y_val_pred,target_names=["FALE","REAL"]))
print(confusion_matrix(y_val,y_val_pred))

validation results
              precision    recall  f1-score   support

        FALE       1.00      0.00      0.00      5398
        REAL       0.50      1.00      0.67      5400

    accuracy                           0.50     10798
   macro avg       0.75      0.50      0.33     10798
weighted avg       0.75      0.50      0.33     10798

[[   1 5397]
 [   0 5400]]
