In [1]:
import os
import numpy as np
import librosa
import tensorflow as tf
from tensorflow.keras import layers, models
from glob import glob

In [2]:
# Paths to data
# train_blended_paths = '../data/audios/english/train/blended'
# train_clean_paths = '../data/audios/english/train/clean'
train_blended_paths = '../../data/audios/english/train/blended_trim'
train_clean_paths = '../../data/audios/english/train/clean_trim'


# val_blended = '../data/audios/english/validation/blended'
# val_clean = '../data/audios/english/validation/clean'
val_blended = '../../data/audios/english/validation/blended_trim'
val_clean = '../../data/audios/english/validation/clean_trim'


test_blended = '../../data/audios/english/test/blended'
test_clean = '../../data/audios/english/test/clean'

In [None]:
# Constants
SAMPLE_RATE = 16000  # Define sample rate for consistency
TARGET_LENGTH = SAMPLE_RATE * 3  # Set target length in samples (3 seconds here as an example)
batch_size = 16

In [4]:
def load_audio_tf(path, target_sr=SAMPLE_RATE, target_length=TARGET_LENGTH):
    audio, sr = librosa.load(path, sr=target_sr)
    # Трімінг або доповнення до потрібної довжини
    if len(audio) > target_length:
        audio = audio[:target_length]
    else:
        audio = np.pad(audio, (0, max(0, target_length - len(audio))))
    return audio

def get_all_files(directory, extension):
    """
    Рекурсивно отримує всі файли з зазначеним розширенням у директорії та її піддиректоріях.
    """
    file_paths = []
    for root, _, files in os.walk(directory):
        for file in files:
            if file.endswith(extension):
                file_paths.append(os.path.join(root, file))
    return sorted(file_paths)

def audio_to_tf_dataset(blended_dir, clean_dir, batch_size=16, shuffle=True):
    blended_files = get_all_files(blended_dir, '.mp3')
    clean_files = get_all_files(clean_dir, '.flac')
    
    assert len(blended_files) == len(clean_files), "Кількість blended та clean файлів не збігається"
    
    def generator():
        for b_path, c_path in zip(blended_files, clean_files):
            blended_audio = load_audio_tf(b_path)
            clean_audio = load_audio_tf(c_path)
            yield blended_audio, clean_audio
    
    dataset = tf.data.Dataset.from_generator(
        generator,
        output_signature=(
            tf.TensorSpec(shape=(TARGET_LENGTH,), dtype=tf.float32),
            tf.TensorSpec(shape=(TARGET_LENGTH,), dtype=tf.float32),
        )
    )
    if shuffle:
        dataset = dataset.shuffle(buffer_size=1000)
    dataset = dataset.batch(batch_size)
    return dataset

In [None]:
# Підготовка датасетів з повторенням
train_dataset = audio_to_tf_dataset(train_blended_paths, train_clean_paths, batch_size=16).repeat()
val_dataset = audio_to_tf_dataset(val_blended, val_clean, batch_size=batch_size).repeat()

# Вказуємо кількість кроків для кожної епохи
steps_per_epoch = len(train_dataset) // batch_size
validation_steps = len(val_dataset) // batch_size

In [None]:
def simplified_unet(input_shape=(None, 1)):
    inputs = layers.Input(shape=input_shape)

    # Зменшимо кількість фільтрів на кожному рівні
    conv1 = layers.Conv1D(32, kernel_size=3, activation='relu', padding='same')(inputs)
    conv1 = layers.Conv1D(32, kernel_size=3, activation='relu', padding='same')(conv1)
    pool1 = layers.MaxPooling1D(pool_size=2)(conv1)

    conv2 = layers.Conv1D(64, kernel_size=3, activation='relu', padding='same')(pool1)
    conv2 = layers.Conv1D(64, kernel_size=3, activation='relu', padding='same')(conv2)
    pool2 = layers.MaxPooling1D(pool_size=2)(conv2)

    conv3 = layers.Conv1D(128, kernel_size=3, activation='relu', padding='same')(pool2)
    conv3 = layers.Conv1D(128, kernel_size=3, activation='relu', padding='same')(conv3)

    up4 = layers.UpSampling1D(size=2)(conv3)
    up4 = layers.concatenate([up4, conv2])
    conv4 = layers.Conv1D(64, kernel_size=3, activation='relu', padding='same')(up4)
    conv4 = layers.Conv1D(64, kernel_size=3, activation='relu', padding='same')(conv4)

    up5 = layers.UpSampling1D(size=2)(conv4)
    up5 = layers.concatenate([up5, conv1])
    conv5 = layers.Conv1D(32, kernel_size=3, activation='relu', padding='same')(up5)
    conv5 = layers.Conv1D(32, kernel_size=3, activation='relu', padding='same')(conv5)

    outputs = layers.Conv1D(1, kernel_size=1, activation='linear')(conv5)

    model = Model(inputs, outputs)
    return model

In [None]:
# Ініціалізація та компіляція спрощеної моделі
model = simplified_unet(input_shape=(None, 1))
model.compile(optimizer='adam', loss='mean_squared_error', metrics=['mae'])

In [8]:
# Тренування моделі
EPOCHS = 5
history = model.fit(
    train_dataset,
    validation_data=val_dataset,
    epochs=EPOCHS
)


Epoch 1/5
      7/Unknown [1m1199s[0m 168s/step - loss: 0.0042 - mae: 0.0408

  self.gen.throw(typ, value, traceback)


[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1358s[0m 194s/step - loss: 0.0041 - mae: 0.0408 - val_loss: 0.0027 - val_mae: 0.0350
Epoch 2/5
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1213s[0m 172s/step - loss: 0.0023 - mae: 0.0343 - val_loss: 0.0022 - val_mae: 0.0291
Epoch 3/5
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1223s[0m 175s/step - loss: 0.0019 - mae: 0.0302 - val_loss: 0.0025 - val_mae: 0.0296
Epoch 4/5
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1205s[0m 172s/step - loss: 0.0016 - mae: 0.0272 - val_loss: 0.0023 - val_mae: 0.0295
Epoch 5/5
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1206s[0m 173s/step - loss: 0.0019 - mae: 0.0290 - val_loss: 0.0028 - val_mae: 0.0295


In [9]:
# Evaluate and save the model
model.save("ML-DAN_v.2.0.h5")



In [10]:
model.save("ML-DAN_v.2.0.keras")

In [11]:
model.save_weights('model_weights.weights.h5')

In [19]:
import soundfile as sf
import os

def load_audio_for_test(file_path, target_sr=SAMPLE_RATE, segment_length=SAMPLE_RATE * 3):
    """
    Завантажує аудіофайл і повертає його сегменти потрібної довжини для подальшої обробки.
    """
    audio, sr = librosa.load(file_path, sr=target_sr)
    audio_segments = []

    for start in range(0, len(audio), segment_length):
        segment = audio[start:start + segment_length]
        if len(segment) < segment_length:
            segment = np.pad(segment, (0, segment_length - len(segment)))
        audio_segments.append(segment.reshape(1, -1, 1))

    return audio_segments


def denoise_audio(model, input_file):
    audio_segments = load_audio_for_test(input_file)
    denoised_audio = []

    for segment in audio_segments:
        denoised_segment = model.predict(segment).squeeze()
        denoised_audio.append(denoised_segment)

    denoised_audio = np.concatenate(denoised_audio)
    output_path = os.path.join(os.path.dirname(input_file), "denoised_" + os.path.basename(input_file).split('.')[0] + ".wav")
    sf.write(output_path, denoised_audio, SAMPLE_RATE)
    print(f"Очищене аудіо збережено у файлі: {output_path}")


In [20]:
denoise_audio(model, '../../data/audios/english/train/blended_trim/19-198-0002.mp3')

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step
Очищене аудіо збережено у файлі: ../../data/audios/english/train/blended_trim\denoised_19-198-0002.wav
