# Training The Model

In [1]:
import numpy as np
import librosa
import tensorflow as tf
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import Dense, LSTM, TimeDistributed
from scipy.io import wavfile

In [2]:
# Set file paths and parameters
win_size = 1024
hop_size = win_size // 2

In [3]:
# Initialize arrays to hold training data
stft_mag_train = []
stft_noise_mag_train = []
stft_mag_target_train = []

In [4]:
# Trim audiofile
def trim_wav( originalWavPath,Name):
    sampleRate, waveData = wavfile.read( originalWavPath )
    startSample = int( 0 * sampleRate )
    endSample = int( 10 * sampleRate )
    wavfile.write( Name, sampleRate, waveData[startSample:endSample])

In [5]:
batch_size = 10  # Define batch size

# Define generator function to yield batches of audio files
def audio_generator(batch_size):
    for batch_start in range(0, 20, batch_size):
        batch_end = min(batch_start + batch_size, 250)
        batch = []
        for FileNum in range(batch_start, batch_end):
            # Load the Audio File
            audio_file = f'C:/Users/Matt/Documents/Project/CS-M/Experiments/NoiseCancelTests/Datasets/Original/All/({FileNum+1}).wav'
            audio, sr = librosa.load(audio_file, sr=None)
            batch.append(audio)
        yield batch, sr, batch_start, batch_end

# Loop over generator to add noise and extract features
for NoiseNum in range(10):
    for audio_batch, sr, batch_start, batch_end in audio_generator(batch_size):
        # Noise files
        noise_file = f'C:/Users/Matt/Documents/Project/CS-M/Experiments/NoiseCancelTests/Datasets/Noise/({NoiseNum+1}).wav'
        noise, sr = librosa.load(noise_file, sr=None)

        # Resize the noise array to match the length of the audio arrays in the batch
        for i in range(len(audio_batch)):
            if len(noise) > len(audio_batch[i]):
                noise_batch = noise[:len(audio_batch[i])]
            else:
                audio_batch[i] = audio_batch[i][:len(noise)]
                noise_batch = noise
            audio_batch[i] = audio_batch[i] + noise_batch

        # Extract features using Short-Time Fourier Transform (STFT)
        stft_audio = librosa.stft(np.concatenate(audio_batch), n_fft=win_size, hop_length=hop_size)
        stft_noise = librosa.stft(np.tile(noise_batch, (len(audio_batch), 1)), n_fft=win_size, hop_length=hop_size)

        # Separate magnitude and phase components
        stft_mag = np.abs(stft_audio)
        stft_phase = np.angle(stft_audio)
        stft_noise_mag = np.abs(stft_noise)

        # Split processed features and target values into individual arrays
        stft_mag_split = np.array_split(stft_mag, len(audio_batch))
        stft_noise_mag_split = np.array_split(stft_noise_mag, len(audio_batch))

        # Append processed features and target values to arrays
        stft_mag_train.extend(stft_mag_split)
        stft_noise_mag_train.extend(stft_noise_mag_split)
        stft_mag_target_train.extend(stft_mag_split)
        print(NoiseNum, batch_end)


0 10
0 20
1 10
1 20
2 10
2 20
3 10
3 20
4 10
4 20
5 10
5 20
6 10
6 20
7 10
7 20
8 10
8 20
9 10
9 20


In [6]:
# Concatenate training data from all files
stft_mag_train = np.concatenate(stft_mag_train, axis=0)
stft_noise_mag_train = np.concatenate(stft_noise_mag_train, axis=0)
stft_mag_target_train = np.concatenate(stft_mag_target_train, axis=0)

In [13]:
# Define RNN model with LSTM layers
model = Sequential()
model.add(LSTM(units=128, input_shape=(None, 1), return_sequences=True))
model.add(LSTM(units=64, return_sequences=True))
model.add(TimeDistributed(Dense(units=1, activation='linear')))

# Compile model
model.compile(loss='mse', optimizer='adam')

# Split stft_mag_train into chunks with the same shape as stft_noise_mag_train
chunk_size = stft_noise_mag_train.shape[0]
chunks = [stft_mag_train[i:i+chunk_size] for i in range(0, len(stft_mag_train), chunk_size)]

# Reshape chunks to have the same shape as stft_noise_mag_train
reshaped_chunks = [np.expand_dims(chunk, axis=-1) for chunk in chunks]

# Reshape stft_noise_mag_train to have the same shape as reshaped_chunks
reshaped_noise = np.expand_dims(stft_noise_mag_train, axis=-1)
reshaped_noise = np.tile(reshaped_noise, (1, 1, len(reshaped_chunks)))

# Concatenate reshaped_chunks and reshaped_noise along the second dimension
combined_input = np.concatenate((reshaped_chunks + [reshaped_noise]), axis=1)

# Train model on combined input
model.fit(combined_input, stft_mag_target_train, epochs=50)

MemoryError: Unable to allocate 17.1 GiB for an array with shape (4598942400, 1) and data type float32

In [None]:
# Load the saved model
model = load_model('ANC.h5')

# Testing The AI Model

In [None]:
Test_file = ''
trim_wav(Test_file,'TestFile.wav')
noise_file = ''
trim_wav(noise_file,'noiseFile.wav')
Test, sr = librosa.load('TestFile.wav', sr=None)
noise, sr = librosa.load('noiseFile.wav', sr=None)

In [None]:
# Extract features using STFT
stft_test_audio = librosa.stft(Test, n_fft=win_size, hop_length=hop_size)
stft_noise_audio = librosa.stft(noise, n_fft=win_size, hop_length=hop_size)

In [None]:
# Separate magnitude and phase components
stft_test_mag = np.abs(stft_test_audio)
stft_test_phase = np.angle(stft_test_audio)
stft_noise_mag = np.abs(stft_noise_audio)

# Reshape magnitude component to (n_frames, n_bins) format
test_mag_shape = stft_test_mag.shape
stft_test_mag = np.reshape(stft_test_mag, (test_mag_shape[0], test_mag_shape[1], 1))
noise_mag_shape = stft_noise_mag.shape
stft_noise_mag = np.reshape(stft_noise_mag, (noise_mag_shape[0], noise_mag_shape[1], 1))

In [None]:
# Apply model to test audio
stft_test_mag_pred = model.predict(np.concatenate((stft_test_mag, stft_noise_mag), axis=-1))
stft_test_mag_pred = np.squeeze(stft_test_mag_pred, axis=-1)

# Invert STFT to obtain noise-reduced audio
stft_test_pred = stft_test_mag_pred * np.exp(1j * stft_test_phase)
test_audio_pred = librosa.istft(stft_test_pred, hop_length=hop_size)

In [None]:
# Save noise-reduced audio to file
librosa.output.write_wav('ANC.wav', test_audio_pred, sr)