# 📚 Libraries

In [None]:
# Audio processing and I/O
import os
import wave
import soundfile as sf
import librosa
import librosa.display

# Plotting
import matplotlib
matplotlib.use('Agg')  # No GUI
import matplotlib.pyplot as plt
import pylab

# Data handling
import numpy as np
import pandas as pd
import joblib
import math
import IPython.display as ipd

# 🔍 Step 1 - Audio Info Extraction

In [None]:
def get_audio_info(wav_file):
    info = sf.info(wav_file)
    sample_rate = info.samplerate
    subtype = info.subtype

    # Extract bit depth if PCM format
    if 'PCM' in subtype:
        try:
            bit_depth = int(subtype.split('_')[1])
        except (IndexError, ValueError):
            bit_depth = "Unavailable"
    else:
        bit_depth = "Unavailable"

    return sample_rate, bit_depth


if __name__ == "__main__":
    wav_file_path = "C:/Users/cadur/Downloads/Urbansonic/0_Mels_Gen/DATA/ESC-50-master/audio/1-137-A-32.wav"
    sample_rate, bit_depth = get_audio_info(wav_file_path)
    print(f"Sample rate: {sample_rate} Hz")
    print(f"Bit depth: {bit_depth} bits")

# 🎯 Step 2 - Define Paths and Read CSV

In [None]:
# Directories
wav_dir = r'C:\Users\cadur\Downloads\Urbansonic\Datos\DATA\ESC-50-master\Esc_50_Seg_Wav'
mel_spec_dir = r"C:\Users\cadur\Downloads\Urbansonic\Datos\DATA\ESC-50-master\ESC_50_Mel"
csv_path = r"C:\Users\cadur\Downloads\Urbansonic\Datos\DATA\ESC-50-master\csv\ESC-50.csv"

# Load dataset metadata
csv_data = pd.read_csv(csv_path)
unique_classes = csv_data['class'].unique()

# 📁 Step 3 - Create Class Subdirectories

In [None]:
# Create a directory for each class inside mel_spec_dir
for class_name in unique_classes:
    class_path = os.path.join(mel_spec_dir, class_name)
    if os.path.exists(class_path):
        print(f"{class_name} directory already exists. Skipping...")
    else:
        os.mkdir(class_path)

print("Created mel spectrogram directories:")
print(os.listdir(mel_spec_dir))
print(f"Total: {len(os.listdir(mel_spec_dir))} classes")

# ⚙️ Step 4 - Spectrogram Configuration

In [None]:
# Parameters
SR = 22050  # Target sampling rate
FEATURE = 'mel'
FMIN = 0
roi_len = 5  # Audio segment length in seconds
n_samples = int(SR * roi_len)

# Mel spectrogram parameters
n_fft = 2048
hop_length = 32
n_mels = 128
fmin = 0
fmax = 11025

# 🎼 Step 5 - Generate Mel Spectrograms

In [None]:
for index, row in csv_data.iterrows():
    file_name = row['name']
    class_label = row['class']
    audio_path = row['audio_path']

    print(f"Processing: {file_name} | Class: {class_label}")

    # Load audio using soundfile
    audio_data, sample_rate = sf.read(audio_path)

    # Save the file as WAV in memory to ensure consistency
    sf.write(audio_path, audio_data, sample_rate, format='WAV')

    # Read audio as int16 for waveform plotting
    with wave.open(audio_path) as audio:
        sample_rate = audio.getframerate()
        audio_data = np.frombuffer(audio.readframes(-1), dtype='int16')

    # Plot setup
    fig = plt.figure(1, frameon=False)
    fig.set_size_inches(6, 6)
    ax = plt.Axes(fig, [0., 0., 1., 1.])
    ax.set_axis_off()
    fig.add_axes(ax)

    # Generate Mel spectrogram
    S = librosa.feature.melspectrogram(
        y=audio_data.astype(np.float32), sr=sample_rate,
        n_fft=n_fft, hop_length=hop_length,
        n_mels=n_mels, fmin=fmin, fmax=fmax
    )

    # Display and save the spectrogram as PNG
    librosa.display.specshow(librosa.power_to_db(S, ref=np.max), fmin=fmin, y_axis='linear')
    output_dir = os.path.join(mel_spec_dir, class_label, file_name.split('.')[0] + '.png')
    fig.savefig(output_dir)
    fig.clear()
    ax.cla()
    plt.clf()
    plt.close('all')

    print(f"Saved mel spectrogram: {output_dir}")
    print('-' * 70)