In [1]:
import os
import pandas as pd
from tqdm import tqdm
import librosa
import numpy as np

def create_mfcc_dataframe(real_folder_path, fake_folder_path,number):
    # Function to extract MFCC features
    def features_extractor(file):
        audio, sample_rate = librosa.load(file, res_type='kaiser_fast') 
        mfccs_features = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=number)
        mfccs_scaled_features = np.mean(mfccs_features.T, axis=0)
        return mfccs_scaled_features

    # Function to iterate through folder and extract features
    def iterate_folder_and_extract_features(folder_path, label):
        extracted_features = []
        for file_name in tqdm(os.listdir(folder_path)):
            file_path = os.path.join(folder_path, file_name)
            if file_path.endswith(".wav"):  # Assuming the audio files are in WAV format
                data = features_extractor(file_path)
                extracted_features.append([data, label])
        return extracted_features

    # Extract features for real audios
    real_features = iterate_folder_and_extract_features(real_folder_path, label="real")

    # Extract features for fake audios
    fake_features = iterate_folder_and_extract_features(fake_folder_path, label="fake")

    # Combine real and fake features
    all_features = real_features + fake_features

    # Create DataFrame
    df = pd.DataFrame(all_features, columns=["mfcc_features", "label"])
    df_expanded = pd.DataFrame(df['mfcc_features'].tolist())

    # Combine the expanded MFCC columns with the original DataFrame
    df_expanded = pd.concat([df[['label']], df_expanded], axis=1)


    return df_expanded


In [4]:
real_folder_path = '..//data set waves//real'
fake_folder_path = '..///data set waves//fake'



In [6]:
df = create_mfcc_dataframe(real_folder_path, fake_folder_path,25)
df.to_csv("./25/data set//all data//df_25f.csv",index=False)
df = create_mfcc_dataframe(real_folder_path, fake_folder_path,40)
df.to_csv("./40/data set//all data//df_40f.csv",index=False)
df = create_mfcc_dataframe(real_folder_path, fake_folder_path,50)
df.to_csv("./50/data set//all data//df_50f.csv",index=False)
df = create_mfcc_dataframe(real_folder_path, fake_folder_path,75)
df.to_csv("./75/data set//all data//df_75f.csv",index=False)
df = create_mfcc_dataframe(real_folder_path, fake_folder_path,100)
df.to_csv("./100/data set//all data//df_100f.csv",index=False)
df = create_mfcc_dataframe(real_folder_path, fake_folder_path,128)
df.to_csv("./128/data set//all data//df_128f.csv",index=False)

  0%|          | 0/100 [00:00<?, ?it/s]

100%|██████████| 100/100 [00:01<00:00, 54.95it/s]
100%|██████████| 97/97 [00:04<00:00, 22.99it/s]
100%|██████████| 100/100 [00:01<00:00, 60.72it/s]
100%|██████████| 97/97 [00:06<00:00, 14.34it/s]
100%|██████████| 100/100 [00:02<00:00, 36.23it/s]
100%|██████████| 97/97 [00:06<00:00, 15.86it/s]
100%|██████████| 100/100 [00:03<00:00, 32.12it/s]
100%|██████████| 97/97 [00:05<00:00, 17.48it/s]
100%|██████████| 100/100 [00:02<00:00, 40.03it/s]
100%|██████████| 97/97 [00:05<00:00, 17.27it/s]
100%|██████████| 100/100 [00:02<00:00, 41.65it/s]
100%|██████████| 97/97 [00:06<00:00, 16.00it/s]
