In [2]:
from utils import *
import os
import numpy as np
from griffin_lim import *
import matplotlib.pyplot as plt
import soundfile as sf

## Audio MNIST

### Load Random Spectrogram created from the iDLG model

In [None]:
folder_path = "results_mat/iDLG_audio_mnist"
# Get a list of all files in the folder_path
all_files = os.listdir(folder_path)

# Sample 3 files from the list
sample_file = np.random.choice(all_files, size=1, replace=False)

spec_db = np.load(f'{folder_path}/{sample_file[0]}')

sr = 8000
img = librosa.display.specshow(spec_db[0][0], x_axis='time', y_axis='linear', fmax=sr//2, fmin=0, cmap='magma')

### Reconstrunct signal in time-domain

In [None]:
inverse_spec = np.array(10**(spec_db[0][0]/20))

win_size = 256
hop = 128
win = 'hamming'


y_reconstructed = mfgla(inverse_spec, iterations=1000, stft_kwargs={'n_fft': win_size, 'hop_length': hop, 'window' : win, 'center' : None }
                        , istft_kwargs={'n_fft': win_size, 'hop_length': hop, 'window' : win, 'center' : None })

In [None]:
folder_path = "data/audioMNIST/data"
y, sr = librosa.load(folder_path + "/" + sample_file[0].replace("npy","wav"), sr=sr)

fig, ax = plt.subplots(nrows=2, sharex=True, sharey=True)
librosa.display.waveshow(y_reconstructed, sr=sr, ax=ax[0])
librosa.display.waveshow(y, sr=sr, ax=ax[1])
ax[0].set(title='Reconstruction', xlabel=None)
ax[0].label_outer()
ax[1].set(title='Original', xlabel=None)
ax[1].label_outer()

In [None]:
import soundfile as sf
sf.write('results_audio/audio_mnist/' + sample_file[0].replace("npy","wav"), y_reconstructed, sr) # Save as WAV file

In [15]:
# For all files

folder_path = "results_mat/iDLG_audio_mnist"

all_files = os.listdir(folder_path)


win_size = 512
hop = 256
win = 'hamming'
sr = 48000
i = 0
for file in all_files:
    spec_db = np.load(f'{folder_path}/{file}')
    inverse_spec = np.array(10**(spec_db[0][0]/20))
    try:
        y_reconstructed = mfgla(inverse_spec, iterations=1000, stft_kwargs={'n_fft': win_size, 'hop_length': hop, 'window' : win, 'center' : None }
                            , istft_kwargs={'n_fft': win_size, 'hop_length': hop, 'window' : win, 'center' : None })
    except Exception as e:
        print(f"Error processing {file}")
        continue
    sf.write('results_audio/audio_mnist/' + file.replace("npy","wav"), y_reconstructed, sr)
    i += 1
    if i % 100 == 0:
        print(f"Processed {i}/{len(all_files)} files")

  inverse_spec = np.array(10**(spec_db[0][0]/20))


Error processing 0_07_30.npy
Error processing 0_09_13.npy
Error processing 0_15_27.npy
Error processing 0_18_22.npy
Error processing 0_20_40.npy
Error processing 0_20_7.npy
Error processing 0_21_3.npy
Error processing 0_22_39.npy
Error processing 0_23_5.npy
Error processing 0_26_8.npy
Error processing 0_31_6.npy
Error processing 0_36_15.npy
Error processing 0_48_41.npy
Error processing 0_49_12.npy
Error processing 0_50_19.npy
Error processing 0_51_23.npy
Error processing 0_51_25.npy
Error processing 1_05_9.npy
Error processing 1_12_31.npy
Error processing 1_14_7.npy
Error processing 1_15_48.npy
Processed 100/983 files
Error processing 1_17_34.npy
Error processing 1_17_41.npy
Error processing 1_18_19.npy
Error processing 1_20_1.npy
Error processing 1_20_44.npy
Error processing 1_20_5.npy
Error processing 1_38_21.npy
Error processing 1_42_27.npy
Error processing 1_45_36.npy
Error processing 1_50_5.npy
Error processing 1_52_44.npy
Error processing 1_54_12.npy
Error processing 1_55_16.npy


## Urbansound

### Load Random Spectrogram created from the iDLG model

In [None]:
folder_path = "results_mat/iDLG_urbansound"
# Get a list of all files in the folder_path
all_files = os.listdir(folder_path)

# Sample 3 files from the list
sample_file = np.random.choice(all_files, size=1, replace=False)

spec_db = np.load(f'{folder_path}/{sample_file[0]}')
sr = 44100
img = librosa.display.specshow(spec_db[0][0], x_axis='time', y_axis='linear', fmax=sr//2, fmin=0, cmap='magma')

### Reconstrunct signal in time-domain

In [None]:
inverse_spec = np.array(10**(spec_db[0][0]/20))

win_size = 2048
hop = 1024
win = 'hamming'

y_reconstructed = mfgla(inverse_spec, iterations=1000, stft_kwargs={'n_fft': win_size, 'hop_length': hop, 'window' : win, 'center' : None }
                        , istft_kwargs={'n_fft': win_size, 'hop_length': hop, 'window' : win, 'center' : None })

In [None]:
folder_path = "data/audio/"
for dir, _ , files in os.walk(folder_path):
    if sample_file[0].replace("npy","wav") in files:
        folder_path = dir
        break
y, sr = librosa.load(folder_path + "/" + sample_file[0].replace("npy","wav"), sr=sr)

fig, ax = plt.subplots(nrows=2, sharex=True, sharey=True)
librosa.display.waveshow(y_reconstructed, sr=sr, ax=ax[0])
librosa.display.waveshow(y, sr=sr, ax=ax[1])
ax[0].set(title='Reconstruction', xlabel=None)
ax[0].label_outer()
ax[1].set(title='Original', xlabel=None)
ax[1].label_outer()

In [None]:
import soundfile as sf
sf.write('results_audio/urbansound/' + sample_file[0].replace("npy","wav"), y_reconstructed, sr) # Save the reconstructed audio

In [12]:
# For all files

folder_path = "results_mat/iDLG_urbansound"

all_files = os.listdir(folder_path)


win_size = 2048
hop = 1024
win = 'hamming'
i = 0
for file in all_files:
    spec_db = np.load(f'{folder_path}/{file}')
    inverse_spec = np.array(10**(spec_db[0][0]/20))
    try:
        y_reconstructed = mfgla(inverse_spec, iterations=1000, stft_kwargs={'n_fft': win_size, 'hop_length': hop, 'window' : win, 'center' : None }
                            , istft_kwargs={'n_fft': win_size, 'hop_length': hop, 'window' : win, 'center' : None })
        folder_path2 = "data/audio/"
        for dir, _ , files in os.walk(folder_path2):
            if file.replace("npy","wav") in files:
                folder_path2 = dir
                break
        _, sr = librosa.load(folder_path2 + "/" + file.replace("npy","wav"), sr=None)
    except Exception as e:
        print(f"Error processing {file} - {e}")
        continue
    sf.write('results_audio/urbansound/' + file.replace("npy","wav"), y_reconstructed, sr)
    i += 1
    if i % 100 == 0:
        print(f"Processed {i}/{len(all_files)} files")

  inverse_spec = np.array(10**(spec_db[0][0]/20))
  _M = spectrogram + (alpha * np.abs(_D))


Error processing 100852-0-0-24.npy - Audio buffer is not finite everywhere
Error processing 101729-0-0-24.npy - Audio buffer is not finite everywhere
Error processing 101729-0-0-28.npy - Audio buffer is not finite everywhere
Error processing 103074-7-1-0.npy - Audio buffer is not finite everywhere
Error processing 103199-4-0-4.npy - Audio buffer is not finite everywhere
Error processing 103258-5-0-5.npy - Audio buffer is not finite everywhere
Error processing 105029-7-0-1.npy - Audio buffer is not finite everywhere
Error processing 105029-7-1-3.npy - Audio buffer is not finite everywhere
Error processing 105088-3-0-10.npy - Audio buffer is not finite everywhere
Error processing 105319-3-0-39.npy - Audio buffer is not finite everywhere
Error processing 106905-8-0-1.npy - Audio buffer is not finite everywhere
Error processing 107357-8-1-1.npy - Audio buffer is not finite everywhere
Error processing 111386-5-1-6.npy - Audio buffer is not finite everywhere
Error processing 115241-9-0-8.npy