# Audio Denoising Using FFT

In [None]:
import numpy as np
from scipy.io import wavfile
from scipy.fft import fft, ifft
import IPython.display as ipd
import soundfile as sf
import librosa

import plotly.graph_objects as go

In [None]:
def audio_similarity(arr_1, arr_2):
    # Extract MFCCs
    mfcc1 = librosa.feature.mfcc(y=arr_1)
    mfcc2 = librosa.feature.mfcc(y=arr_2)
    
    # Ensure same length
    min_len = min(mfcc1.shape[1], mfcc2.shape[1])
    mfcc1 = mfcc1[:, :min_len]
    mfcc2 = mfcc2[:, :min_len]
    
    # Calculate cosine similarity
    similarity = np.mean(np.sum(mfcc1 * mfcc2, axis=0) / 
                         (np.linalg.norm(mfcc1, axis=0) * np.linalg.norm(mfcc2, axis=0)))
    
    return similarity

In [None]:
# Load the .wav file
sample_rate, audio_data = wavfile.read('data/audio/c_major.wav')

# If the audio is stereo, take the first channel
if audio_data.ndim > 1:
    audio_data = audio_data[:, 0]

In [None]:
# Create time array
time = np.arange(0, len(audio_data)) / sample_rate

# Create the plot
fig = go.Figure()
fig.add_trace(go.Scatter(x=time, y=audio_data, mode='lines'))

# Update layout
fig.update_layout(height=600, width=1800,
    # title='Audio Waveform',
    xaxis_title='Time (seconds)',
    yaxis_title='Amplitude'
)

# Show the plot
fig.show()

In [None]:
# Create a temporary WAV file
sf.write('data/audio/temp.wav', audio_data, sample_rate)

# Play the sound
ipd.display(ipd.Audio('data/audio/temp.wav'))

In [None]:
# Perform Fourier Transform
fft_result = fft(audio_data)
frequencies = np.fft.fftfreq(len(fft_result), 1/sample_rate)


In [None]:
# Create the plot
fig = go.Figure()
fig.add_trace(go.Scatter(x=frequencies, y=np.abs(fft_result), mode='lines'))

# Customize the layout
fig.update_layout(height=600, width=1000,
    # title='Fourier Transform of Audio File',
    xaxis_title='Frequency (Hz)',
    yaxis_title='Magnitude',
    xaxis_range=[0, sample_rate/2]  # Display only positive frequencies
)

# Show the plot
fig.show()

In [None]:
# Create the plot
fig = go.Figure()
fig.add_trace(go.Scatter(x=frequencies, y=np.abs(fft_result), mode='lines'))

# Customize the layout
fig.update_layout(height=600, width=1000,
    # title='Fourier Transform of Audio File',
    xaxis_title='Frequency (Hz)',
    yaxis_title='Magnitude',
    xaxis_range=[0, sample_rate/2],  # Display only positive frequencies
    yaxis_range=[0, 1.5e6]  # Display only positive frequencies
)

# Show the plot
fig.show()

# Add Noise

In [None]:
# Add Gaussian white noise
noise_level = int(0.1 * audio_data.max())  # Adjust this value to control the amount of noise
noise = np.random.normal(0, noise_level, audio_data.shape)
noisy_audio = audio_data + noise.astype(np.int16)

In [None]:
# Create time array
time = np.arange(0, len(noisy_audio)) / sample_rate

# Create the plot
fig = go.Figure()
fig.add_trace(go.Scatter(x=time, y=noisy_audio, mode='lines'))

# Update layout
fig.update_layout(height=600, width=1800,
    # title='Noisy Audio Waveform',
    xaxis_title='Time (seconds)',
    yaxis_title='Amplitude'
)

# Show the plot
fig.show()

In [None]:
# Create a temporary WAV file
sf.write('data/audio/temp_noisy.wav', noisy_audio, sample_rate)

# Play the sound
ipd.display(ipd.Audio('data/audio/temp_noisy.wav'))

In [None]:
# Perform Fourier Transform on the noisy audio
fft_result_noisy = fft(noisy_audio)
frequencies_noisy = np.fft.fftfreq(len(fft_result_noisy), 1/sample_rate)


In [None]:
# Create the plot
fig = go.Figure()
fig.add_trace(go.Scatter(x=frequencies_noisy, y=np.abs(fft_result_noisy), mode='lines'))

# Customize the layout
fig.update_layout(height=600, width=1000,
    # title='Fourier Transform of Noisy Audio File',
    xaxis_title='Frequency (Hz)',
    yaxis_title='Magnitude',
    xaxis_range=[0, sample_rate/2]  # Display only positive frequencies
)

# Show the plot
fig.show()

In [None]:
# Create the plot
fig = go.Figure()
fig.add_trace(go.Scatter(x=frequencies_noisy, y=np.abs(fft_result_noisy), mode='lines'))

# Customize the layout
fig.update_layout(height=600, width=1000,
    # title='Fourier Transform of Noisy Audio File',
    xaxis_title='Frequency (Hz)',
    yaxis_title='Magnitude',
    xaxis_range=[0, sample_rate/2],  # Display only positive frequencies
    yaxis_range=[0, 1.5e6]  # Display only positive frequencies
)

# Show the plot
fig.show()

## Denoise the Signal using a Threshold

In [None]:
threshold = int(1.4e6)  # Example threshold, adjust based on your signal
fft_results_denoised = fft_result_noisy.copy()
fft_results_denoised[np.abs(fft_results_denoised) < threshold] = 0


In [None]:
# Create the plot
fig = go.Figure()
fig.add_trace(go.Scatter(x=frequencies_noisy, y=np.abs(fft_results_denoised), mode='lines'))

# Customize the layout
fig.update_layout(height=600, width=1000,
    # title='Fourier Transform of Noisy Audio File',
    xaxis_title='Frequency (Hz)',
    yaxis_title='Magnitude',
    xaxis_range=[0, sample_rate/2]  # Display only positive frequencies
)

# Show the plot
fig.show()

In [None]:
# Create the plot
fig = go.Figure()
fig.add_trace(go.Scatter(x=frequencies_noisy, y=np.abs(fft_results_denoised), mode='lines'))

# Customize the layout
fig.update_layout(height=600, width=1000,
    # title='Fourier Transform of Noisy Audio File',
    xaxis_title='Frequency (Hz)',
    yaxis_title='Magnitude',
    xaxis_range=[0, sample_rate/2],  # Display only positive frequencies
    yaxis_range=[0, 1.5e6]  # Display only positive frequencies
)

# Show the plot
fig.show()

In [None]:
denoised_signal = np.real(ifft(fft_results_denoised))
denoised_signal = denoised_signal.astype(np.int16)

In [None]:
# Create time array
time = np.arange(0, len(denoised_signal)) / sample_rate

# Create the plot
fig = go.Figure()
fig.add_trace(go.Scatter(x=time, y=denoised_signal, mode='lines'))

# Update layout
fig.update_layout(height=600, width=1800,
    # title='Denoised Audio Waveform',
    xaxis_title='Time (seconds)',
    yaxis_title='Amplitude'
)

# Show the plot
fig.show()

In [None]:
# Create a temporary WAV file
sf.write('data/audio/temp_denoised.wav', denoised_signal, sample_rate)

# Play the sound
ipd.display(ipd.Audio('data/audio/temp_denoised.wav'))

# Loss

In [None]:
print('MAE: w(t):', abs(audio_data - audio_data).mean())
print('MAE: w~(t):', abs(audio_data - noisy_audio).mean())
print('MAE: w^(t):', abs(audio_data - denoised_signal).mean())

In [None]:
print('MSE: w(t):', ((audio_data - audio_data) ** 2).mean())
print('MSE: w~(t):', ((audio_data - noisy_audio) ** 2).mean())
print('MSE: w^(t):', ((audio_data - denoised_signal) ** 2).mean())

In [None]:
print('CS: w(t):', audio_similarity(1. * audio_data, 1. * audio_data))
print('CS: w~(t):', audio_similarity(1. * audio_data, 1. * noisy_audio))
print('CS: w^(t):', audio_similarity(1. * audio_data, 1. * denoised_signal))