In [1]:
import librosa.display
import numpy as np
import scipy
import torchaudio
import torch



def conv1d(sequence, kernel):
        output = []
        length = len(sequence+1)
        sequence = np.pad(sequence[:,0], (0, kernel.size), 'constant', constant_values=0)
        for i in range(length):
                conv = np.dot(sequence[i:i+kernel.size], kernel)
                output.append(conv)
        return output

def resample(x, origin_sr, resample_sr):
        ratio = resample_sr/origin_sr
        x_new = np.zeros((int(len(x) * ratio), 1))
        last_frame = 0
        for i, frame in enumerate(x):
                x_new[int(i * ratio)] = frame
                x_new[int(i * ratio + 1)] = (frame + last_frame) / 2
                last_frame = frame
        return x_new

def decimation(x, origin_sr, resample_sr):
        ratio = resample_sr/origin_sr
        x_decimated = np.zeros((int(len(x) * ratio))+1)
        h_filter = scipy.signal.firwin(51, ratio, window='hamming')
        x_conv = conv1d(x, h_filter)
        # x_new = scipy.signal.lfilter(h_filter, 1, x)
        count = int(1/ratio)
        for i in range(len(x)):
                if count == int(1/ratio):
                        x_decimated[int(i*ratio)]=x_conv[int(i)]
                        count = 0
                count += 1
        return x_decimated


# audio load
x, Fs = librosa.load(r"C:\Users\PC00\Downloads\ssp2023-main\lab02\kdigits0-3.wav", sr=16000)

# resampling
x_32 = resample(x, Fs, 32000)
x_48 = resample(x, Fs, 48000)
x_44 = resample(x, Fs, 44100)

# decimation
dx_8_from32 = decimation(x_32, 32000, 8000)
dx_8_from48 = decimation(x_48, 48000, 8000)
dx_8_from44 = decimation(x_44, 44000, 8000)
dx_11_from32 = decimation(x_32, 32000, 11025)
dx_11_from48 = decimation(x_48, 48000, 11025)
dx_11_from44 = decimation(x_44, 44000, 11025)

# save audio
torchaudio.save('./xxx_32k.wav', torch.tensor(x_32).T, 32000)
torchaudio.save('./xxx_48k.wav', torch.tensor(x_48).T, 48000)
torchaudio.save('./xxx_44k.wav', torch.tensor(x_44).T, 44100)

torchaudio.save('./xxx_8k_from32.wav', torch.tensor(dx_8_from32).T, 8000)
torchaudio.save('./xxx_8k_from48.wav', torch.tensor(dx_8_from48).T, 8000)
torchaudio.save('./xxx_8k_from44.wav', torch.tensor(dx_8_from44).T, 8000)
torchaudio.save('./xxx_11k_from32.wav', torch.tensor(dx_11_from32).T, 11025)
torchaudio.save('./xxx_11k_from48.wav', torch.tensor(dx_11_from48).T, 11025)
torchaudio.save('./xxx_11k_from44.wav', torch.tensor(dx_11_from44).T, 11025)



  'The interface of "soundfile" backend is planned to change in 0.8.0 to '
