In [3]:
import os
import pandas as pd
from tqdm import tqdm
import librosa
import numpy as np
def create_mfcc_dataframe(real_folder_path, fake_folder_path):
    # Function to extract MFCC features
    def features_extractor(file):
        audio, sample_rate = librosa.load(file, res_type='kaiser_fast') 
        mfccs_features = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=100)
        mfccs_scaled_features = np.mean(mfccs_features.T, axis=0)
        return mfccs_scaled_features

    # Function to iterate through folder and extract features
    def iterate_folder_and_extract_features(folder_path, label):
        extracted_features = []
        for file_name in tqdm(os.listdir(folder_path)):
            file_path = os.path.join(folder_path, file_name)
            if file_path.endswith(".wav"):  # Assuming the audio files are in WAV format
                data = features_extractor(file_path)
                extracted_features.append([data, label])
        return extracted_features

    # Extract features for real audios
    real_features = iterate_folder_and_extract_features(real_folder_path, label="real")

    # Extract features for fake audios
    fake_features = iterate_folder_and_extract_features(fake_folder_path, label="fake")

    # Combine real and fake features
    all_features = real_features + fake_features

    # Create DataFrame
    df = pd.DataFrame(all_features, columns=["mfcc_features", "label"])
    df_expanded = pd.DataFrame(df['mfcc_features'].tolist())

    # Combine the expanded MFCC columns with the original DataFrame
    df_expanded = pd.concat([df[['label']], df_expanded], axis=1)


    return df_expanded


In [4]:
real_folder_path = '..//data set waves//real'
fake_folder_path = '..//data set waves//fake'

df = create_mfcc_dataframe(real_folder_path, fake_folder_path)

  0%|          | 0/100 [00:00<?, ?it/s]

100%|██████████| 100/100 [00:07<00:00, 13.73it/s]
100%|██████████| 97/97 [00:06<00:00, 14.12it/s]


In [5]:
df.to_csv("data set final//100.csv",index=False)