In [1]:
import numpy as np
import os
import matplotlib.pyplot as plt
import copy as cp
from scipy.fftpack import fft
from librosa.feature import mfcc
from librosa import load
from tqdm import tqdm

In [2]:
def obtain_signal_slice(insignal, fs, cut_start, cut_stop):
    cut_start_idx = int(fs * cut_start)
    cut_stop_idx = int(fs * cut_stop)
    required_length = cut_stop_idx - cut_start_idx

    # Jeśli nagranie jest za krótkie, dodajemy ciszę
    if len(insignal) < cut_stop_idx:
        print("Uzupełnienie krótkiego nagrania ciszą.")
        # Rozszerzenie sygnału ciszą (zerami) do wymaganego czasu
        insignal = np.pad(insignal, (0, required_length - len(insignal)), 'constant')
    
    return insignal[cut_start_idx:cut_stop_idx]


def split_to_frames(insignal, frame_length, overlap_factor):
    shift_factor = frame_length * (1 - overlap_factor)
    target_frames_count = (len(insignal) - frame_length) / shift_factor
    shift_factor = int(shift_factor)
    target_frames_count = int(target_frames_count)
    output_data = np.zeros((frame_length, target_frames_count))

    for i in range(target_frames_count):
        frame_start = i * shift_factor
        frame_end = frame_start + frame_length
        output_data[:, i] = insignal[frame_start:frame_end]
    
    return output_data

def frames_to_spectrogram(frames_mtx, fft_window):
    output_data = cp.copy(frames_mtx)
    for i in range(output_data.shape[1]):
        output_data[:, i] = np.abs(fft(output_data[:, i] * fft_window))
    
    output_data = output_data[0:output_data.shape[0] // 2, :]
    return output_data

def signal_to_spectrogram(ex, frame_length, overlap_factor, fft_window):
    split_ex = split_to_frames(ex, frame_length, overlap_factor)
    spectrogram = frames_to_spectrogram(split_ex, fft_window)
    return spectrogram

def draw_spectogram(dataset_path, chosen_fpath):
    frame_length = 1024
    overlap_factor = 0.3
    cut_start = 0
    cut_stop = 5
    fft_window = np.ones(frame_length)
    
    joint_path = os.path.join(dataset_path, chosen_fpath)
    audio_data, fs = load(joint_path, sr=None) 
    audio_data = audio_data.astype(float)
    audio_data = audio_data / np.max(np.abs(audio_data))
    
    audio_slice = obtain_signal_slice(audio_data, fs, cut_start, cut_stop)
    spectrogram = signal_to_spectrogram(audio_slice, frame_length, overlap_factor, fft_window)

    save_dir = os.path.join('Spectograms', os.path.dirname(chosen_fpath))  
    os.makedirs(save_dir, exist_ok=True) 
    
    save_path = os.path.join(save_dir, os.path.splitext(os.path.basename(chosen_fpath))[0] + '.png')
    
    plt.figure(figsize=(3, 3))
    plt.imshow(spectrogram, aspect='auto', origin="lower", extent=[0, spectrogram.shape[1] * frame_length / fs, 0, fs // 2], cmap='gray')
    plt.axis('off')
    plt.savefig(save_path, bbox_inches='tight', pad_inches=0)
    plt.close()

In [3]:
def process_audio_files(root_folder = 'data'):
    audio_formats = {'.wav', '.mp3'}
    dataset_path = os.path.join(root_folder, 'xeno-canto')

    for root, _, files in os.walk(dataset_path):
        for file in files:
            if os.path.splitext(file)[1].lower() in audio_formats:
                try:
                    draw_spectogram(dataset_path, os.path.relpath(os.path.join(root, file), dataset_path))
                except Exception as e:
                    print(f'Error while processing {file}: {e}')
            


In [None]:
process_audio_files()