In [1]:
import librosa
import numpy as np
import pandas as pd

In [2]:
data_folder = 'audio_MNIST_six_only/'
files = librosa.util.find_files(data_folder, ext=['wav'])
print(f"Found {len(files)} audio files.")

Found 3000 audio files.


In [3]:
target_sr = 12000
fixed_duration = 1.0
n_samples = int(target_sr * fixed_duration)

features = []
labels = []

for filepath in files:
    audio, sr = librosa.load(filepath, sr=target_sr)

    # Pad or trim to fixed length
    if len(audio) < n_samples:
        audio = np.pad(audio, (0, n_samples - len(audio)))
    else:
        audio = audio[:n_samples]

    # FFT
    fft_result = np.fft.fft(audio)
    magnitude = np.abs(fft_result)[:n_samples // 2]

    # Normalize
    magnitude = magnitude / np.max(magnitude)

    features.append(magnitude)
    labels.append(filepath.split("/")[-1].split(".")[0])  # implement this as needed

# Convert to NumPy arrays
#X = np.array(features)
#y = np.array(labels)

In [5]:
df_fft = pd.DataFrame(data=features, index=labels, columns=[x for x in range(6000)])
df_fft

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,5990,5991,5992,5993,5994,5995,5996,5997,5998,5999
6_01_0,0.112358,0.034818,0.018651,0.013124,0.206282,0.041696,0.054382,0.297927,0.405547,0.330600,...,0.000041,0.000033,0.000023,0.000033,0.000040,0.000033,0.000023,0.000033,0.000040,0.000033
6_01_1,0.113601,0.032898,0.044340,0.036356,0.059043,0.064000,0.088885,0.181754,0.427581,0.235320,...,0.000055,0.000055,0.000052,0.000052,0.000056,0.000055,0.000051,0.000053,0.000056,0.000054
6_01_10,0.005531,0.078903,0.009256,0.136242,0.058258,0.058613,0.301000,0.221344,1.000000,0.683145,...,0.000230,0.000238,0.000238,0.000230,0.000235,0.000240,0.000233,0.000231,0.000239,0.000236
6_01_11,0.110023,0.009812,0.025336,0.036760,0.062735,0.070672,0.080056,0.135541,0.464421,0.429974,...,0.000018,0.000045,0.000049,0.000028,0.000013,0.000041,0.000050,0.000033,0.000010,0.000037
6_01_12,0.105340,0.026534,0.041305,0.022346,0.107876,0.062634,0.090992,0.321599,0.534074,0.254085,...,0.000074,0.000087,0.000085,0.000072,0.000071,0.000084,0.000088,0.000076,0.000068,0.000080
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6_60_5,0.069576,0.004602,0.003180,0.011101,0.008695,0.010618,0.002082,0.096498,0.233499,0.054914,...,0.000024,0.000024,0.000025,0.000025,0.000024,0.000023,0.000022,0.000021,0.000019,0.000019
6_60_6,0.279638,0.148183,0.076664,0.034406,0.095101,0.080251,0.067548,0.118046,0.234762,0.038431,...,0.000239,0.000251,0.000141,0.000150,0.000254,0.000232,0.000118,0.000180,0.000262,0.000208
6_60_7,0.160999,0.037529,0.048340,0.050212,0.028655,0.030842,0.065344,0.083618,0.454500,0.274905,...,0.000484,0.000469,0.000473,0.000496,0.000524,0.000543,0.000545,0.000530,0.000504,0.000478
6_60_8,0.072625,0.008465,0.010114,0.020994,0.032169,0.029160,0.025781,0.059348,0.151535,0.072892,...,0.000020,0.000017,0.000016,0.000019,0.000020,0.000018,0.000016,0.000018,0.000020,0.000018


In [6]:
# save to csv
df_fft.to_csv("fft_dataset.csv", index=True)