In [1]:
import os
import librosa
import librosa.display
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.optimizers import Adam
import matplotlib.pyplot as plt


In [4]:
from tensorflow.keras.layers import Layer

class ResizeLayer(Layer):
    def __init__(self, target_height, target_width, **kwargs):
        super(ResizeLayer, self).__init__(**kwargs)
        self.target_height = target_height
        self.target_width = target_width

    def call(self, inputs):
        return tf.image.resize(inputs, [self.target_height, self.target_width])

    def get_config(self):
        config = super(ResizeLayer, self).get_config()
        config.update({
            "target_height": self.target_height,
            "target_width": self.target_width,
        })
        return config


In [6]:
from tensorflow.keras.models import load_model

# Завантажити модель з використанням custom_objects
model = load_model('audio_denoising_unet.h5', custom_objects={'ResizeLayer': ResizeLayer})







In [7]:
# Paths to data
# train_blended_paths = '../data/audios/english/train/blended'
# train_clean_paths = '../data/audios/english/train/clean'
train_blended_paths = '../data/audios/english/train/blended_trim'
train_clean_paths = '../data/audios/english/train/clean_trim'


# val_blended = '../data/audios/english/validation/blended'
# val_clean = '../data/audios/english/validation/clean'
val_blended = '../data/audios/english/validation/blended_trim'
val_clean = '../data/audios/english/validation/clean_trim'


test_blended = '../data/audios/english/test/blended'
test_clean = '../data/audios/english/test/clean'

In [8]:
# Function to load audio and create spectrogram
def load_and_preprocess_audio(filepath, sr=16000, fixed_length=300):
    y, _ = librosa.load(filepath, sr=sr)
    spectrogram = librosa.stft(y, n_fft=1024, hop_length=512)
    spectrogram_db = librosa.amplitude_to_db(np.abs(spectrogram))
    
    # Adjust the spectrogram length to exactly 300 frames
    if spectrogram_db.shape[1] < fixed_length:
        # Pad with zeros if it's shorter than the fixed length
        padding = fixed_length - spectrogram_db.shape[1]
        spectrogram_db = np.pad(spectrogram_db, ((0, 0), (0, padding)), mode='constant')
    else:
        # Truncate if it's longer than the fixed length
        spectrogram_db = spectrogram_db[:, :fixed_length]
        
    return spectrogram_db


# Loading data pairs (blended and clean)
def load_data_pairs(blended_path, clean_path):
    blended_files = sorted([os.path.join(blended_path, f) for f in os.listdir(blended_path) if f.endswith('.mp3')])
    clean_files = sorted([os.path.join(clean_path, f) for f in os.listdir(clean_path) if f.endswith('.flac')])
    
    blended_spectrograms = [load_and_preprocess_audio(f) for f in blended_files]
    clean_spectrograms = [load_and_preprocess_audio(f) for f in clean_files]
    
    return blended_spectrograms, clean_spectrograms


In [18]:
# Example inference function
def denoise_audio(model, blended_audio):
    spectrogram = load_and_preprocess_audio(blended_audio)
    input_spec = np.expand_dims(spectrogram, axis=[0, -1])
    denoised_spec = model.predict(input_spec)
    denoised_audio = librosa.istft(denoised_spec[0, ..., 0])
    return denoised_audio

# Usage example (replace with an actual audio file path):
denoised_audio = denoise_audio(model, '../data/audios/english/train/blended_trim/19-198-0002.mp3')

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 4s/step


In [19]:
import soundfile as sf
sf.write('denoised_sample.wav', denoised_audio, 16000)  # 16000 - це частота дискретизації
