In [1]:
import os
import tkinter as tk
from tkinter import ttk, messagebox
import threading
import queue

import sounddevice as sd
import soundfile as sf
import wavio
import numpy as np
import librosa
import librosa.display
import matplotlib
matplotlib.use('Agg')  # Use a non-GUI backend for matplotlib
import matplotlib.pyplot as plt
import noisereduce as nr
import tensorflow as tf


Defaulting to user installation because normal site-packages is not writeable
Note: you may need to restart the kernel to use updated packages.


In [7]:
import soundfile as sf

# Paths
audio_files_path = os.path.join(parent_dir, 'test_audio')
image_save_path = os.path.join(parent_dir, 'test_spectos')
output_audio_file = os.path.join(audio_files_path, "synthetic_audio.wav")
os.makedirs(audio_files_path, exist_ok=True)
os.makedirs(image_save_path, exist_ok=True)

duration = 6 
sampling_rate = 44100
frequency = 440.0

t = np.linspace(0, duration, int(sampling_rate * duration), endpoint=False)
synthetic_audio = 0.5 * np.sin(2 * np.pi * frequency * t)

sf.write(output_audio_file, synthetic_audio, sampling_rate)
global_min_db = -40    
global_max_db = 51     
segment_length = 3     
sampling_rate = 44100  
hop_length = 512     
silence_threshold = 0.01
frame_duration = 0.03  #duration for silence removal (30 ms)

def remove_silence(audio, sr, frame_duration=0.03, silence_threshold=0.01):
    frame_length = int(sr * frame_duration)
    hop_length = frame_length
    non_silent_frames = []

    for i in range(0, len(audio), hop_length):
        frame = audio[i:i + frame_length]
        if len(frame) < frame_length:
            break

        frame_energy = np.mean(np.abs(frame))

        if frame_energy > silence_threshold:
            non_silent_frames.append(frame)

    processed_audio = np.concatenate(non_silent_frames) if non_silent_frames else np.array([])
    return processed_audio

def process_audio_file(audio_file):
    file_path = os.path.join(audio_files_path, audio_file)
    audio, sr = librosa.load(file_path, sr=sampling_rate)

    second_underscore_index = audio_file.find('_', audio_file.find('_') + 1)
    prefix = audio_file[:second_underscore_index]

    noise_profile = audio[:int(sr * 0.5)]
    reduced_noise = nr.reduce_noise(y=audio, sr=sr, y_noise=noise_profile, prop_decrease=0.8)

    cleaned_audio = remove_silence(reduced_noise, sr, frame_duration, silence_threshold)

    samples_per_segment = segment_length * sr
    segment_index = 1

    for i in range(0, len(cleaned_audio), samples_per_segment):
        segment_audio = cleaned_audio[i:i + samples_per_segment]

        if len(segment_audio) >= samples_per_segment:
            save_spectrogram(segment_audio, sr, prefix, segment_index)
            segment_index += 1

    remaining_audio = len(cleaned_audio) % samples_per_segment
    if remaining_audio > 0:
        segment_audio = cleaned_audio[-remaining_audio:]  #slice the last segment
        save_spectrogram(segment_audio, sr, prefix, segment_index)
        #save last segment's length
        last_segment_duration = remaining_audio / sr
        file_segment_times[audio_file] = last_segment_duration
    else:
        last_segment_duration = 0

    with open(os.path.join(image_save_path, "segmentlast_times.txt"), "a") as f:
            f.write(f"{prefix}_segment{segment_index}.png: {last_segment_duration:.2f} seconds\n")

    if len(cleaned_audio) == 0:
        print(f"Warning: {audio_file} was completely silent after processing.")


def save_spectrogram(segment_audio, sr, prefix, segment_index):
    stft = librosa.stft(segment_audio)
    stft_db = librosa.amplitude_to_db(np.abs(stft))
    plt.figure(figsize=(4, 5), dpi=100)
    librosa.display.specshow(stft_db, sr=sr, x_axis=None, y_axis=None, 
                             vmin=global_min_db, vmax=global_max_db, cmap='gray')
    
    plt.axis('off')
    plt.tight_layout(pad=0)
    segment_filename = f"{prefix}_segment{segment_index}.png"
    plt.savefig(os.path.join(image_save_path, segment_filename), bbox_inches='tight', pad_inches=0)
    plt.close()

for audio_file in os.listdir(audio_files_path):
    if audio_file.endswith(".wav"):
        print(f"Processing {audio_file} for spectrograms...")
        process_audio_file(audio_file)
        
print(f"All spectrograms have been saved successfully.")
print("Last segment times have been recorded in 'segmentlast_times.txt'.")

Audio loaded with soundfile: 264600 samples at 44100 Hz
Audio shape after processing: (264600,)
