<a href="https://colab.research.google.com/github/ShreyJais/Speech-Processing/blob/main/2348558_SPR_LAB1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### Q1- Implement sampling and quantization techniques for the given speech signals.

#### (a) Plot the time domain representation of the original speech signal.

In [None]:
import librosa
amplitude,sample_rate = librosa.load('/content/audio.mp3',sr=None) #sr(sample rate)

In [None]:
import plotly.express as px
fig = px.line(y=amplitude, title='Audio Waveform')
fig.update_xaxes(title_text='Time')
fig.update_yaxes(title_text='Amplitude')
fig.show()

In [None]:
print("Current Sample Rate:",sample_rate)

#### (b) Sample the speech signal at different sampling rates (e.g., 8kHz, 16kHz, and 44.1kHz).

In [None]:
resample1=librosa.resample(amplitude,orig_sr=sample_rate,target_sr=8000)
resample2=librosa.resample(amplitude,orig_sr=sample_rate,target_sr=16000)
resample3=librosa.resample(amplitude,orig_sr=sample_rate,target_sr=44100)

In [None]:
fig1 = px.line(y=resample1, title='Resampled Audio (8kHz)')
fig1.update_xaxes(title_text='Time')
fig1.update_yaxes(title_text='Amplitude')

fig2 = px.line(y=resample2, title='Resampled Audio (16kHz)')
fig2.update_xaxes(title_text='Time')
fig2.update_yaxes(title_text='Amplitude')

fig3 = px.line(y=resample3, title='Resampled Audio (44.1kHz)')
fig3.update_xaxes(title_text='Time')
fig3.update_yaxes(title_text='Amplitude')

fig1.show()
fig2.show()
fig3.show()

#### (d) Using the sampled signals from the above task, reconstruct the signal using:
#### (i) Zero-order hold (nearest-neighbor interpolation)
#### (ii) Linear interpolation.

In [None]:
from scipy import interpolate
import numpy as np
interpolation1=interpolate.interp1d([x for x in range(len(resample1))],resample1,kind='zero')
interpolation2=interpolate.interp1d([x for x in range(len(resample1))],resample1,kind='linear')
interpolation3=interpolate.interp1d([x for x in range(len(resample2))],resample2,kind='zero')
interpolation4=interpolate.interp1d([x for x in range(len(resample2))],resample2,kind='linear')
new=np.linspace(0,len(resample1)-1,len(amplitude))
upsample8a=interpolation1(new)
upsample8b=interpolation2(new)
upsample16a=interpolation3(new)
upsample16b=interpolation4(new)

#### (e) Calculate the Mean Squared Error (MSE) between the original and the reconstructed signals for both methods.

In [None]:
from sklearn.metrics import mean_squared_error
print("Mean square error post reconstructing 8KHz using zero-hold interpolation: ",mean_squared_error(amplitude,upsample8a))
print("Mean square error post reconstructing 8KHz using Linear interpolation: ",mean_squared_error(amplitude,upsample8b))
print("Mean square error post reconstructing 16KHz using zero-hold interpolation: ",mean_squared_error(amplitude,upsample16a))
print("Mean square error post reconstructing 16KHz using Linear interpolation: ",mean_squared_error(amplitude,upsample16b))

### Q2- Implement the source-filter Model for a given speech signal and analyze the impact of sampling and reconstruction on the quality of the speech signal.

#### (a) Generate a synthetic speech signal using the source-filter model.
#### (i) Create a source signal (e.g., a glottal pulse train for voiced sounds or white noise for unvoiced sounds).
#### (ii) Apply a filter that models the vocal tract, represented by an all-pole filter or an FIR filter with formants (resonances of the vocal tract).

In [None]:
time = np.linspace(0, 1, 1000)
frequency = 3
continuous_signal = np.sin(2 * np.pi * frequency * time)

fig = px.line(x=time, y=continuous_signal, title='Continuous Signal')
fig.update_xaxes(title_text='Time')
fig.update_yaxes(title_text='Amplitude')
fig.show()

In [None]:
noise_signal = np.random.randn(1000)

fig = px.line(y=noise_signal, title='Noise Signal')
fig.update_xaxes(title_text='Time')
fig.update_yaxes(title_text='Amplitude')
fig.show()

In [None]:
signal = continuous_signal + noise_signal

fig = px.line(y=signal, title='Combined Signal')
fig.update_xaxes(title_text='Time')
fig.update_yaxes(title_text='Amplitude')
fig.show()

In [None]:
from scipy.signal import firwin, lfilter
numtaps=101
formant_frequencies=[500, 1500, 2500]
normalized_freqs=[f/(240000/2) for f in formant_frequencies]
vocal_tract_filter=firwin(numtaps,normalized_freqs,pass_zero=False)

#### (b) Plot the generated speech signal and analyze the effect of the filter on the original source.

In [None]:
filtered_glottal_pulse = lfilter(vocal_tract_filter, 1.0, continuous_signal)

fig1 = px.line(y=continuous_signal, title='Original Continuous Signal')
fig1.update_xaxes(title_text='Time')
fig1.update_yaxes(title_text='Amplitude')

fig2 = px.line(y=filtered_glottal_pulse[:1000], title='Filtered Glottal Pulse (Vocal Tract Filter Applied)')
fig2.update_xaxes(title_text='Time')
fig2.update_yaxes(title_text='Amplitude')

fig1.show()
fig2.show()

#### (c) Sample the speech signal generated in the above task at different sampling rates (e.g., 8 kHz, 16 kHz, 44.1 kHz).

In [None]:
resampled_signals = [librosa.resample(filtered_glottal_pulse, orig_sr=24000, target_sr=rate) for rate in [8000, 16000, 44100]]

for i, signal in enumerate(resampled_signals):
    fig = px.line(y=signal[:1000], title=f'Sampled Speech Signal at {[8000, 16000, 44100][i]} Hz')
    fig.update_xaxes(title_text='Time')
    fig.update_yaxes(title_text='Amplitude')
    fig.show()

#### (d) Reconstruct the signal using a suitable interpolation method (e.g., zero-order hold, linear interpolation).
#### (e) Compute the Mean Squared Error (MSE) between the original and reconstructed speech signals.

In [None]:
from scipy import interpolate
for i in resampled_signals:
    x = np.arange(len(i))
    f_zero_order = interpolate.interp1d(x, i, kind='nearest')
    f_linear = interpolate.interp1d(x, i, kind='linear')
    x_new = np.linspace(0, len(i)-1, len(filtered_glottal_pulse))
    reconstructed_zero_order = f_zero_order(x_new)
    reconstructed_linear = f_linear(x_new)
    mse_zero_order = mean_squared_error(filtered_glottal_pulse[:len(reconstructed_zero_order)], reconstructed_zero_order)
    mse_linear = mean_squared_error(filtered_glottal_pulse[:len(reconstructed_linear)], reconstructed_linear)
    print(f'MSE for Zero-Order Hold: {mse_zero_order}')
    print(f'MSE for Linear Interpolation: {mse_linear}')

In [None]:
fig = px.line(y=filtered_glottal_pulse[:1000], title='Original Filtered Speech Signal (Source-Filter Model)')
fig.update_xaxes(title_text='Time')
fig.update_yaxes(title_text='Amplitude')
fig.show()

fig1 = px.line(y=reconstructed_zero_order[:1000], title='Reconstructed Speech Signal (Zero-Order Hold)')
fig1.update_xaxes(title_text='Time')
fig1.update_yaxes(title_text='Amplitude')
fig1.show()

fig2 = px.line(y=reconstructed_linear[:1000], title='Reconstructed Speech Signal (Linear Interpolation)')
fig2.update_xaxes(title_text='Time')
fig2.update_yaxes(title_text='Amplitude')
fig2.show()