In [6]:
import os
import librosa
import numpy as np
import pandas as pd
from tqdm import tqdm

# === RELATIVE PATH SETUP FOR JUPYTER ===
base_dir = os.getcwd()  # Gets current working directory (works in Jupyter)
data_folder = os.path.join(base_dir, "..", "..", "A_data")
real_folder = os.path.join(data_folder, "real")
fake_folder = os.path.join(data_folder, "fake")
output_csv = os.path.join(base_dir, "features_combined.csv")  # Output CSV filename

# === FEATURE EXTRACTION FUNCTION ===
def extract_features(filepath):
    try:
        y, sr = librosa.load(filepath, sr=None)

        # MFCCs (13 coefficients)
        mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
        mfccs_mean = np.mean(mfccs, axis=1)
        mfccs_std = np.std(mfccs, axis=1)

        # Zero Crossing Rate
        zcr = librosa.feature.zero_crossing_rate(y)
        zcr_mean = np.mean(zcr)

        # Root Mean Square Energy
        rms = librosa.feature.rms(y=y)
        rms_mean = np.mean(rms)

        features = list(mfccs_mean) + list(mfccs_std) + [zcr_mean, rms_mean]
        return features
    except Exception as e:
        print(f"⚠️ Skipping file {filepath} due to error: {e}")
        return None

# === PROCESS FILES FUNCTION ===
def process_folder(folder_path, label):
    data = []
    for filename in tqdm(os.listdir(folder_path), desc=f"Processing label {label} folder"):
        if filename.endswith(".wav") and not filename.startswith("._"):
            filepath = os.path.join(folder_path, filename)
            features = extract_features(filepath)
            if features:
                features.append(label)
                data.append(features)
    return data

# === COMBINE DATA ===
all_data = process_folder(real_folder, 1)  # 1 = Real
all_data += process_folder(fake_folder, 0)  # 0 = Fake

columns = [f"mfcc_mean_{i+1}" for i in range(13)] + \
          [f"mfcc_std_{i+1}" for i in range(13)] + \
          ["zcr", "rms", "label"]

# === SAVE TO CSV ===
df = pd.DataFrame(all_data, columns=columns)
df.to_csv(output_csv, index=False)

print(f"\n✅ Combined feature extraction complete. Saved to: {output_csv}")

Processing label 1 folder: 100%|██████████| 391/391 [05:51<00:00,  1.11it/s]
Processing label 0 folder: 100%|██████████| 268/268 [03:48<00:00,  1.17it/s]


✅ Combined feature extraction complete. Saved to: /Volumes/T7/Capstone Proj/Model_1/Models/Feature extraction/features_combined.csv



