<a href="https://colab.research.google.com/github/SanyaShresta25/Speech-Enhancement-Using-UNet-Architecture/blob/main/speech_enhancement.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
!git clone https://github.com/dathu/Speech-enhancement-deeplearn-vbelz.git
%cd Speech-enhancement-deeplearn-vbelz

Cloning into 'Speech-enhancement-deeplearn-vbelz'...
remote: Enumerating objects: 157, done.[K
remote: Counting objects: 100% (40/40), done.[K
remote: Compressing objects: 100% (9/9), done.[K
remote: Total 157 (delta 35), reused 31 (delta 31), pack-reused 117 (from 1)[K
Receiving objects: 100% (157/157), 47.19 MiB | 30.09 MiB/s, done.
Resolving deltas: 100% (53/53), done.
/content/Speech-enhancement-deeplearn-vbelz


In [None]:
# Update all requirements to more recent versions
!sed -i 's/tensorflow==1.15.2/tensorflow>=2.10.0/g' requirements.txt
!sed -i 's/scipy==1.3.1/scipy>=1.7.0/g' requirements.txt
!sed -i 's/matplotlib==3.1.1/matplotlib>=3.5.0/g' requirements.txt
!sed -i 's/numpy==1.17.2/numpy>=1.20.0/g' requirements.txt
!sed -i 's/librosa==0.7.0/librosa>=0.9.0/g' requirements.txt
!sed -i 's/sklearn/scikit-learn/g' requirements.txt

# Install the updated requirements
!pip install -r requirements.txt

In [None]:
# Create directories for the processed data
!mkdir -p demo_data/clean_speech
!mkdir -p demo_data/noisy_speech

# Download LibriSpeech (clean speech) - using a small subset for testing
!wget https://www.openslr.org/resources/12/dev-clean.tar.gz
!tar -xzf dev-clean.tar.gz

# Download ESC-50 (noise)
!wget https://github.com/karoldvl/ESC-50/archive/master.zip
!unzip master.zip

In [5]:
%%writefile prepare_my_data.py
import os
import numpy as np
import librosa
import soundfile as sf
from glob import glob

# Paths
clean_path = './LibriSpeech/dev-clean'
noise_path = './ESC-50-master/audio'
output_clean = './demo_data/clean_speech'
output_noisy = './demo_data/noisy_speech'

os.makedirs(output_clean, exist_ok=True)
os.makedirs(output_noisy, exist_ok=True)

# Process clean speech
clean_files = []
for root, dirs, files in os.walk(clean_path):
    for file in files:
        if file.endswith('.flac'):
            clean_files.append(os.path.join(root, file))

noise_files = glob(f'{noise_path}/*.wav')

# Select a subset for processing
clean_subset = clean_files[:100]  # Process 100 files for testing

# Create noisy mixtures
for i, clean_file in enumerate(clean_subset):
    # Load clean speech
    clean, sr = librosa.load(clean_file, sr=16000)

    # Select random noise file
    noise_file = np.random.choice(noise_files)
    noise, _ = librosa.load(noise_file, sr=16000)

    # Make noise the same length as clean (loop or cut)
    if len(noise) < len(clean):
        noise = np.tile(noise, int(np.ceil(len(clean)/len(noise))))
    noise = noise[:len(clean)]

    # Mix with SNR between 0-10dB
    snr = np.random.uniform(0, 10)
    clean_power = np.mean(clean**2)
    noise_power = np.mean(noise**2)
    noise_scale = np.sqrt(clean_power / (10**(snr/10) * noise_power))

    noisy = clean + noise_scale * noise

    # Save files
    clean_out = f"{output_clean}/clean_{i:04d}.wav"
    noisy_out = f"{output_noisy}/noisy_{i:04d}.wav"

    sf.write(clean_out, clean, sr)
    sf.write(noisy_out, noisy, sr)

    if i % 10 == 0:
        print(f"Processed {i} files")

print("Data preparation complete!")

Writing prepare_my_data.py


In [6]:
!python prepare_my_data.py

Processed 0 files
Processed 10 files
Processed 20 files
Processed 30 files
Processed 40 files
Processed 50 files
Processed 60 files
Processed 70 files
Processed 80 files
Processed 90 files
Data preparation complete!


In [None]:
# Check model_unet.py to see what changes are needed
!cat model_unet.py

In [8]:
!python train_model.py  # For training
!python prediction_denoise.py  # For prediction/enhancement

2025-04-22 07:58:25.201373: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1745308705.237160    4149 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1745308705.244808    4149 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-04-22 07:58:25.269502: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
2.18.0
2025-04-22 07:58:30.363097: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT fa

In [9]:
# Run the prediction/enhancement script
!python prediction_denoise.py

2025-04-22 07:58:47.114857: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1745308727.136479    4263 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1745308727.142887    4263 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-04-22 07:58:47.165514: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [None]:
# Check that the clean and noisy speech directories contain files
!ls -la demo_data/clean_speech/ | head
!ls -la demo_data/noisy_speech/ | head

In [11]:
!mkdir -p weights/output

In [12]:
# Train the model (this might take some time)
!python train_model.py

2025-04-22 07:59:18.735742: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1745308758.759866    4395 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1745308758.766977    4395 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-04-22 07:59:18.790032: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
2.18.0


In [13]:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'  # Only errors will be shown
# Generate enhanced audio
!python prediction_denoise.py

E0000 00:00:1745308771.799244    4464 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1745308771.805568    4464 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


In [14]:
!ls -la weights/output/

total 8
drwxr-xr-x 2 root root 4096 Apr 22 07:59 .
drwxr-xr-x 3 root root 4096 Apr 22 07:59 ..


In [15]:
import os
print(os.getcwd())

/content/Speech-enhancement-deeplearn-vbelz


In [16]:
import os

# Create the output directory if it doesn't exist
os.makedirs('./weights/output/', exist_ok=True)
print("Ensured that './weights/output/' exists.")


Ensured that './weights/output/' exists.


In [17]:
import os

print(os.listdir("demo_data/noisy_speech"))


['noisy_0085.wav', 'noisy_0060.wav', 'noisy_0058.wav', 'noisy_0028.wav', 'noisy_0074.wav', 'noisy_0076.wav', 'noisy_0050.wav', 'noisy_0004.wav', 'noisy_0057.wav', 'noisy_0002.wav', 'noisy_0025.wav', 'noisy_0007.wav', 'noisy_0088.wav', 'noisy_0054.wav', 'noisy_0067.wav', 'noisy_0099.wav', 'noisy_0096.wav', 'noisy_0059.wav', 'noisy_0086.wav', 'noisy_0064.wav', 'noisy_0016.wav', 'noisy_0094.wav', 'noisy_0078.wav', 'noisy_0021.wav', 'noisy_0015.wav', 'noisy_0068.wav', 'noisy_0083.wav', 'noisy_0026.wav', 'noisy_0070.wav', 'noisy_0079.wav', 'noisy_0090.wav', 'noisy_0075.wav', 'noisy_0005.wav', 'noisy_0077.wav', 'noisy_0061.wav', 'noisy_0093.wav', 'noisy_0039.wav', 'noisy_0038.wav', 'noisy_0008.wav', 'noisy_0006.wav', 'noisy_0030.wav', 'noisy_0043.wav', 'noisy_0065.wav', 'noisy_0081.wav', 'noisy_0010.wav', 'noisy_0098.wav', 'noisy_0020.wav', 'noisy_0034.wav', 'noisy_0036.wav', 'noisy_0052.wav', 'noisy_0023.wav', 'noisy_0044.wav', 'noisy_0037.wav', 'noisy_0047.wav', 'noisy_0009.wav', 'noisy_00

In [18]:
import librosa
from IPython.display import Audio

# Load audio
y, sr = librosa.load("demo_data/noisy_speech/noisy_0048.wav", sr=None)

# Play audio
Audio(y, rate=sr)


In [19]:
import os
print(os.listdir("demo_data/save_predictions"))



['denoise_t1.wav', 'denoise_t2.wav']


In [20]:
noisy, _ = librosa.load("demo_data/noisy_speech/noisy_0048.wav", sr=None)
denoised, _ = librosa.load("demo_data/save_predictions/denoise_t1.wav", sr=None)


In [21]:
from IPython.display import Audio

# Play noisy version (you’ll need to pick which t1/t2 this came from)
Audio("demo_data/noisy_speech/noisy_0048.wav")


In [22]:
# Play denoised version
Audio("demo_data/save_predictions/denoise_t1.wav")


In [23]:
!python main.py

E0000 00:00:1745308807.152726    4622 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1745308807.159720    4622 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2.18.0
Traceback (most recent call last):
  File "/content/Speech-enhancement-deeplearn-vbelz/main.py", line 104, in <module>
    prediction(weights_path, name_model, audio_dir_prediction, dir_save_prediction, audio_input_prediction,
  File "/content/Speech-enhancement-deeplearn-vbelz/prediction_denoise.py", line 18, in prediction
    loaded_model = model_from_json(loaded_model_json)
                   ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/keras/src/models/model.py", line 808, in model_from_json
    return serialization_lib.deserialize_keras_object(
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^