In [33]:
import os
import wave
import numpy as np
import pandas as pd

In [34]:
base_dir = r"C:\Users\dishi\OneDrive\Desktop\daiict\for-norm"
datasets = ['training', 'testing', 'validation']
labels = ['real', 'fake']

In [35]:
def calculate_zero_crossing_rate(audio_data):
    zero_crossings = np.sum(np.abs(np.diff(np.sign(audio_data)))) / (2 * len(audio_data))
    return zero_crossings

In [36]:
def calculate_rms_energy(audio_data):
    rms_energy = np.sqrt(np.mean(audio_data**2))
    return rms_energy

In [55]:
def calculate_dft(audio_data):
    N = len(audio_data)
    result = np.zeros(N, dtype=complex)
    for k in range(N):
        result[k] = np.sum(audio_data * np.exp(-2j * np.pi * k * np.arange(N) / N))
    return result

In [56]:
def calculate_frequencies(sample_rate, N):
    return np.linspace(0, sample_rate / 2, num=(N // 2) + 1)

In [57]:
def calculate_spectral_centroid(audio_data, sample_rate):
    N = len(audio_data)
    dft = calculate_dft(audio_data)
    magnitude_spectrum = np.abs(dft[:N // 2 + 1])
    frequencies = calculate_frequencies(sample_rate, N)
    spectral_centroid = np.sum(frequencies * magnitude_spectrum) / np.sum(magnitude_spectrum)
    return spectral_centroid

In [58]:
def calculate_spectral_bandwidth(audio_data, sample_rate):
    spectral_centroid = calculate_spectral_centroid(audio_data, sample_rate)
    N = len(audio_data)
    dft = calculate_dft(audio_data)
    magnitude_spectrum = np.abs(dft[:N // 2 + 1])
    frequencies = calculate_frequencies(sample_rate, N)
    spectral_bandwidth = np.sqrt(np.sum((frequencies - spectral_centroid)**2 * magnitude_spectrum) / np.sum(magnitude_spectrum))
    return spectral_bandwidth

In [59]:
def calculate_spectral_rolloff(audio_data, sample_rate, rolloff_percent=0.85):
    magnitude_spectrum = np.abs(calculate_dft(audio_data)[:len(audio_data) // 2 + 1])
    cumulative_sum = np.cumsum(magnitude_spectrum)
    rolloff_threshold = rolloff_percent * cumulative_sum[-1]
    spectral_rolloff = np.where(cumulative_sum >= rolloff_threshold)[0][0]
    spectral_rolloff = spectral_rolloff * (sample_rate / len(audio_data))
    return spectral_rolloff

In [60]:
def manual_dct(filter_banks, n_mfcc):
    num_frames, num_filters = filter_banks.shape
    dct_filters = np.zeros((n_mfcc, num_filters))
    for k in range(n_mfcc):
        dct_filters[k, :] = np.cos((np.pi / num_filters) * (np.arange(num_filters) + 0.5) * k)
    mfcc = np.dot(dct_filters, filter_banks.T).T
    return mfcc

In [61]:
def calculate_mfcc(audio_data, sample_rate, n_mfcc=13):
    # Pre-emphasis
    pre_emphasis = 0.97
    emphasized_signal = np.append(audio_data[0], audio_data[1:] - pre_emphasis * audio_data[:-1])

    # Framing
    frame_size = 0.025
    frame_stride = 0.01
    frame_length, frame_step = int(round(frame_size * sample_rate)), int(round(frame_stride * sample_rate))
    signal_length = len(emphasized_signal)
    num_frames = int(np.ceil(float(np.abs(signal_length - frame_length)) / frame_step)) + 1
    pad_signal_length = num_frames * frame_step + frame_length
    z = np.zeros((pad_signal_length - signal_length))
    pad_signal = np.append(emphasized_signal, z)

    indices = np.tile(np.arange(0, frame_length), (num_frames, 1)) + np.tile(np.arange(0, num_frames * frame_step, frame_step), (frame_length, 1)).T
    frames = pad_signal[indices.astype(np.int32, copy=False)]

    # Windowing
    frames *= np.hamming(frame_length)

    # Fourier-Transform and Power Spectrum
    NFFT = 512
    mag_frames = np.absolute(np. to Hz
    bin = np.floor((NFFT + 1) * hz_points / sample_rate).astype(np.int32)

    fbank = np.zeros((nfilt, int(np.floor(NFFT / 2 + 1))))
    for m in range(1, nfilt + 1):
        f_m_minus = bin[m - 1]   # left
        f_m = bin[m]             # center
        f_m_plus = bin[m + 1]    # right

        for k in range(f_m_minus, f_m):
            fbank[m - 1, k] = (k - bin[m - 1]) / (bin[m] - bin[m - 1])
        for k in range(f_m, f_m_plus):
            fbank[m - 1, k] = (bin[m + 1] - k) / (bin[m + 1] - bin[m])

    filter_banks = np.dot(pow_frames, fbank.T)
    filter_banks = np.where(filter_banks == 0, np.finfo(float).eps, filter_banks)  # Numerical Stability
    filter_banks = 20 * np.log10(filter_banks)  # dB

    # MFCC
    mfcc = manual_dct(filter_banks, n_mfcc)
    mfcc = np.mean(mfcc, axis=0)

    return mfcc

In [62]:
def calculate_skewness(audio_data):
    n = len(audio_data)
    mean = np.mean(audio_data)
    std_dev = np.std(audio_data)
    skewness = (np.sum((audio_data - mean)**3) / n) / (std_dev**3)
    return skewness

In [63]:
def calculate_kurtosis(audio_data):
    n = len(audio_data)
    mean = np.mean(audio_data)
    std_dev = np.std(audio_data)
    kurtosis = (np.sum((audio_data - mean)**4) / n) / (std_dev**4) - 3
    return kurtosis

In [64]:
def process_audio_file(file_path):
    audio_file = wave.open(file_path, "r")
    audio_data = audio_file.readframes(-1)
    audio_data = np.frombuffer(audio_data, dtype=np.int16)
    sample_rate = audio_file.getframerate()
    audio_file.close()

    # Convert audio data to float32
    audio_data = audio_data.astype(np.float32)

    # Extract features using the defined functions
    features = {
        'filename': os.path.basename(file_path),
        'zero_crossing_rate': float(calculate_zero_crossing_rate(audio_data)),
        'rms_energy': float(calculate_rms_energy(audio_data)),
        'signal_mean': float(np.mean(audio_data)),
        'signal_std': float(np.std(audio_data)),
        'amplitude_envelope': float(np.max(audio_data)),
        'spectral_centroid': float(calculate_spectral_centroid(audio_data, sample_rate)),
        'spectral_bandwidth': float(calculate_spectral_bandwidth(audio_data, sample_rate)),
        'spectral_rolloff': float(calculate_spectral_rolloff(audio_data, sample_rate)),
        'skewness': float(calculate_skewness(audio_data)),
        'kurtosis': float(calculate_kurtosis(audio_data))
    }

    # Extract MFCCs
    mfccs = calculate_mfcc(audio_data, sample_rate)
    for i, mfcc in enumerate(mfccs):
        features[f'mfcc_{i+1}'] = float(mfcc)

    return features, audio_data

In [None]:
def process_directory(directory, label, chunk_size=100):
    file_paths = [os.path.join(directory, filename) for filename in os.listdir(directory) if filename.endswith(".wav")]
    num_files = len(file_paths)

    features_list = []
    all_audio_data = []

    for i in range(0, num_files, chunk_size):
        chunk_paths = file_paths[i:i + chunk_size]

        for idx, file_path in enumerate(chunk_paths, start=i+1):
            print(f"Processing file {idx} of {num_files}: {os.path.basename(file_path)}...")
            features, audio_data = process_audio_file(file_path)
            features['label'] = label
            features_list.append(features)
            all_audio_data.append(audio_data)
            print(f"File {idx} done.")

    return features_list, all_audio_data

In [None]:
all_features = []
all_real_data = []
all_fake_data = []

# Iterate over each dataset and label
for dataset in datasets:
    for label in labels:
        directory = os.path.join(base_dir, dataset, label)
        features_list, audio_data_all = process_directory(directory, label)

        all_features.extend(features_list)
        if label == 'real':
            all_real_data.extend(audio_data_all)
        else:
            all_fake_data.extend(audio_data_all)

In [None]:
features_df = pd.DataFrame(all_features)

In [None]:
features_df.to_csv('audio_features_manual.csv', index=False)

In [None]:
features_df

Unnamed: 0,filename,zero_crossing_rate,rms_energy,signal_mean,signal_std,amplitude_envelope,spectral_centroid,spectral_bandwidth,spectral_rolloff,skewness,...,mfcc_12,mfcc_13,mfcc_14,mfcc_15,mfcc_16,mfcc_17,mfcc_18,mfcc_19,mfcc_20,label
0,file1.wav_16k.wav_norm.wav_mono.wav_silence.wav,0.137460,4134.410645,0.238595,4777.139160,2894.356445,1822.402563,1500.086348,3311.115506,0.621559,...,-7.984339,3.527972,-16.490644,-11.739811,-7.235430,-10.607544,-7.613899,-11.456425,-7.058982,real
1,file10.wav_16k.wav_norm.wav_mono.wav_silence.wav,0.154065,4390.338867,-5.655016,5254.524902,3324.125244,1768.520360,1465.244396,3338.242828,-0.257526,...,-3.333730,0.747620,-4.416759,6.523121,-6.339976,2.330548,-1.422495,-2.984604,2.761274,real
2,file100.wav_16k.wav_norm.wav_mono.wav_silence.wav,0.095206,4307.233398,0.083535,4895.892578,3066.934570,1446.031905,1533.366622,2730.610795,-0.894788,...,-1.045881,-8.296654,7.702209,-10.432310,1.070562,-4.160299,2.941002,-7.965646,0.452805,real
3,file1000.wav_16k.wav_norm.wav_mono.wav_silence...,0.188090,2733.269775,0.386014,3238.413330,1859.789307,2033.304687,1511.028950,3504.194631,1.160901,...,-6.387084,-4.906898,-14.497639,-13.718105,-3.959789,-8.347791,-10.312110,-7.487181,-7.729549,real
4,file10000.wav_16k.wav_norm.wav_mono.wav_silenc...,0.193106,4242.850098,0.979406,4707.291504,3028.382812,2081.626495,1411.119670,3395.873092,0.659361,...,-13.368902,0.640402,-20.105606,-10.947557,-9.520848,-4.379232,0.532663,-9.983327,-0.611973,real
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
69293,file9976.mp3.wav_16k.wav_norm.wav_mono.wav_sil...,0.175867,5456.414062,2.369928,5895.954102,4045.685059,2014.476307,1695.701148,3920.343137,-0.353003,...,-5.649595,-7.496578,-3.368686,-15.716028,4.014661,-18.087637,3.279789,-9.086059,-0.123569,fake
69294,file9986.mp3.wav_16k.wav_norm.wav_mono.wav_sil...,0.092460,6355.379883,1219.500122,6627.966309,4569.830078,1239.453419,1192.801579,2202.387972,0.866452,...,-3.680366,-12.802091,-0.114138,-14.733644,5.416863,-11.527183,0.801583,-3.775925,-1.670324,fake
69295,file9991.mp3.wav_16k.wav_norm.wav_mono.wav_sil...,0.090450,7261.250488,-25.698208,8061.187500,5678.382812,1384.246640,1221.333776,2517.071759,-0.480945,...,-4.121984,3.032112,-2.240477,5.433537,1.417942,3.506669,-6.886306,-8.497355,-7.166287,fake
69296,file9992.wav_16k.wav_norm.wav_mono.wav_silence...,0.188666,6305.985352,2.041852,7054.541992,4516.195801,1613.781999,1393.188249,2994.455645,0.135941,...,-17.992008,4.239638,-10.878890,-15.818137,0.319991,-19.100618,-5.788297,-9.580729,-13.157767,fake
