In [6]:
import librosa
import numpy as np
import pandas as pd

In [7]:
data_folder = 'audio_MNIST_six_only/'
files = librosa.util.find_files(data_folder, ext=['wav'])
print(f"Found {len(files)} audio files.")

Found 3000 audio files.


In [8]:
target_sr = 6800 # Target sample rate
fixed_duration = 1.0
n_samples = int(target_sr * fixed_duration)

features = []
labels = []

for filepath in files:
    audio, sr = librosa.load(filepath, sr=target_sr)

    # Pad or trim to fixed length
    if len(audio) < n_samples:
        audio = np.pad(audio, (0, n_samples - len(audio)))
    else:
        audio = audio[:n_samples]

    # denoise the signal by applying centered moving average
    window_size = 5
    denoised_audio = np.convolve(audio, np.ones(window_size)/window_size, mode='valid')
    # FFT
    fft_result = np.fft.fft(denoised_audio)
    magnitude = np.abs(fft_result)[:n_samples // 2]

    # Normalize
    magnitude = magnitude / np.max(magnitude)

    features.append(magnitude)
    labels.append(filepath.split("/")[-1].split(".")[0])  # implement this as needed

# Convert to NumPy arrays
#X = np.array(features)
#y = np.array(labels)

In [9]:
df_fft = pd.DataFrame(data=features, index=labels, columns=[x for x in range(6800//2)])
df_fft

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,3390,3391,3392,3393,3394,3395,3396,3397,3398,3399
6_01_0,0.131477,0.040664,0.021781,0.016047,0.241540,0.049441,0.065172,0.349460,0.476330,0.386294,...,0.000092,0.000070,0.000042,0.000073,0.000092,0.000071,0.000042,0.000072,0.000092,0.000072
6_01_1,0.115790,0.033437,0.045090,0.037052,0.060328,0.065047,0.090258,0.186483,0.435931,0.238401,...,0.000018,0.000021,0.000046,0.000041,0.000009,0.000029,0.000047,0.000035,0.000002,0.000035
6_01_10,0.005463,0.078892,0.009493,0.136006,0.058788,0.058395,0.300883,0.224023,1.000000,0.677277,...,0.000072,0.000157,0.000122,0.000031,0.000136,0.000151,0.000053,0.000099,0.000160,0.000099
6_01_11,0.110545,0.009117,0.026015,0.037528,0.062674,0.072264,0.082788,0.136266,0.472449,0.432665,...,0.000222,0.000229,0.000231,0.000225,0.000222,0.000228,0.000232,0.000226,0.000221,0.000226
6_01_12,0.125418,0.031603,0.048844,0.026526,0.128466,0.074944,0.107386,0.383622,0.633578,0.299405,...,0.000052,0.000108,0.000110,0.000057,0.000027,0.000096,0.000116,0.000078,0.000008,0.000078
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6_60_5,0.071865,0.004768,0.003300,0.011508,0.008962,0.010774,0.001901,0.100205,0.240953,0.056392,...,0.000005,0.000005,0.000005,0.000005,0.000005,0.000005,0.000005,0.000005,0.000005,0.000005
6_60_6,0.298776,0.158232,0.082080,0.036483,0.101728,0.085802,0.072058,0.126860,0.250605,0.041331,...,0.000017,0.000016,0.000008,0.000012,0.000018,0.000015,0.000007,0.000014,0.000018,0.000014
6_60_7,0.178895,0.040466,0.052830,0.055122,0.031937,0.033674,0.073049,0.090692,0.503542,0.300539,...,0.000165,0.000141,0.000103,0.000065,0.000060,0.000096,0.000135,0.000162,0.000171,0.000162
6_60_8,0.074978,0.008684,0.010270,0.021518,0.033130,0.030203,0.027108,0.061079,0.156846,0.074700,...,0.000050,0.000034,0.000026,0.000044,0.000051,0.000038,0.000025,0.000041,0.000051,0.000041


In [10]:
# save to csv
df_fft.to_csv("fft_dataset.csv", index=True)