In [None]:
import librosa
import librosa.display
import numpy as np
import h5py
import matplotlib.pyplot as plt
import os


mp3_files = [
    r"D:/University_Course_Work/StatsML-2/Deeplearning/Practical_Home_Work/species_learning_with_deep_learning/Data/test1.mp3",
    r"D:/University_Course_Work/StatsML-2/Deeplearning/Practical_Home_Work/species_learning_with_deep_learning/Data/test2.mp3",
    r"D:/University_Course_Work/StatsML-2/Deeplearning/Practical_Home_Work/species_learning_with_deep_learning/Data/test3.mp3"
]

output_h5 = r"D:/University_Course_Work/StatsML-2/Deeplearning/Practical_Home_Work/species_learning_with_deep_learning/Data/test_bird_spectrograms.hdf5"

time_starts = {
    mp3_files[0]: 7,
    mp3_files[1]: 1,
    mp3_files[2]: 6
}


hop_length = 512
win_length = 2048
sr = 22050
target_width = 517  
required_duration = (hop_length * (target_width - 1) + win_length) / sr 

def create_spectrogram(mp3_path, start_time, duration=12.1, max_width=517):
    y, sr = librosa.load(mp3_path, offset=start_time, duration=duration)
    S = librosa.feature.melspectrogram(
        y=y, sr=sr, n_fft=win_length, hop_length=hop_length, n_mels=128
    )
    S_db = librosa.power_to_db(S, ref=np.max)
   
    if S_db.shape[1] > max_width:
        S_db = S_db[:, :max_width]
    return S_db 

def save_and_pad_spectrograms(mp3_files, time_starts, output_h5):
    """
    Creates spectrograms, normalizes, pads/truncates to (128,517), saves to HDF5,
    and then inspects each dataset shape.
    """
    with h5py.File(output_h5, 'w') as h5f:

        for idx, mp3 in enumerate(mp3_files):
            spec = create_spectrogram(mp3, time_starts[mp3], duration=required_duration)

            spec = (spec - spec.min()) / (spec.max() - spec.min())
 
            h, w = spec.shape
            if w < target_width:
                spec = np.pad(spec, ((0,0),(0,target_width-w)), mode='constant')
            else:
                spec = spec[:, :target_width]
            spec = spec.astype(np.float32)

            name = f"audio_{idx+1}"
            h5f.create_dataset(name, data=spec)
            print(f"Saved '{name}' with shape {spec.shape}")


    print(f"\nInspecting {output_h5}:")
    with h5py.File(output_h5, 'r') as h5f:
        for name in h5f:
            print(f"  {name}: shape = {h5f[name][...].shape}")


save_and_pad_spectrograms(mp3_files, time_starts, output_h5)


Saved 'audio_1' with shape (128, 517)
Saved 'audio_2' with shape (128, 517)
Saved 'audio_3' with shape (128, 517)

Inspecting D:/University_Course_Work/StatsML-2/Deeplearning/Practical_Home_Work/species_learning_with_deep_learning/Data/test_bird_spectrograms.hdf5:
  audio_1: shape = (128, 517)
  audio_2: shape = (128, 517)
  audio_3: shape = (128, 517)
