In [22]:
import os
import librosa
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn import svm
from sklearn.metrics import classification_report
from joblib import dump

In [12]:
def load_audio_files(audio_dir):
    audio_files = os.listdir(audio_dir)
    audios = []
    sample_rates = []
    for file in audio_files:
        file_path = os.path.join(audio_dir, file)
        audio, sample_rate = librosa.load(file_path)
        audios.append(audio)
        sample_rates.append(sample_rate)
    return audios, sample_rates

real_audios, real_sample_rates = load_audio_files("Audio Dataset/Test/Original_audios")
fake_audios, fake_sample_rates = load_audio_files("Audio Dataset/Test/Fake_audios")

In [5]:
def extract_features(audios, sample_rates):
    features = []
    for audio, sample_rate in zip(audios, sample_rates):
        mfccs = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=13)
        mfccs_scaled = np.mean(mfccs.T, axis=0)
        features.append(mfccs_scaled)
    return features

real_features = extract_features(real_audios, real_sample_rates)
fake_features = extract_features(fake_audios, fake_sample_rates)

In [7]:
features = real_features + fake_features
labels = [1]*len(real_features) + [0]*len(fake_features)
features_train, features_test, labels_train, labels_test = train_test_split(features, labels, test_size=0.2, random_state=42)

In [10]:
model = svm.SVC(kernel='linear')
model.fit(features_train, labels_train)
labels_pred = model.predict(features_test)
print(classification_report(labels_test, labels_pred))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        11
           1       1.00      1.00      1.00         2

    accuracy                           1.00        13
   macro avg       1.00      1.00      1.00        13
weighted avg       1.00      1.00      1.00        13



In [23]:
dump(model,'model.joblib')

['model.joblib']

In [20]:
def classify_audio(file_path, model):
    audio, sample_rate = librosa.load(file_path)
    features = extract_features([audio], [sample_rate])
    prediction = model.predict(features)
    
    print(prediction[0])
    return 'Real' if prediction[0] == 1 else 'Fake'

file_path = 'a.mp3'
print(classify_audio(file_path, model))

0
Fake
