In [29]:
# Install necessary packages
# pip install psola
# pip install sounddevice soundfile
# pip install pytsmod

# Import necessary packages
import psola
import soundfile as sf
import sounddevice as sd
import soundfile
import numpy as np
import wave
import librosa


In [30]:
# Function definition to play audio
def play_flac(file_path):
    with open(file_path, 'rb') as f:
        data, fs = sf.read(f)
    
    sd.play(data, fs)
    sd.wait()

# Example usage
file_path = "data/dev-clean/84/121123/84-121123-0000.flac"
play_flac(file_path)

In [31]:
# Define function to play audio
def play_audio(audio, sample_rate):
    # Play the audio
    sd.play(audio, sample_rate)
    sd.wait()  # Wait until playback is finished

# Psola operation
audio, sample_rate = psola.from_file(file_path, fmin=1000, fmax =1001)
play_audio(audio, sample_rate)

In [32]:
# Transform flac to wav
audio, sample_rate = soundfile.read(file_path)
file_path_wav = file_path.replace('flac', 'wav')
soundfile.write(file_path_wav, audio, sample_rate, 'PCM_16')

play_audio(audio, sample_rate)

In [33]:
def play_wav(file_path):
    # Load audio file
    y, sr = librosa.load(file_path, sr=None)
    
    # Play audio
    sd.play(y, sr)
    sd.wait()

def add_white_noise(y, noise_level=0.05):
    # Generate white noise
    noise = np.random.normal(scale=noise_level, size=len(y))
    
    # Add white noise to the audio
    y_noisy = y + noise
    
    return y_noisy

audio, sample_rate = librosa.load(file_path_wav)
# Add white noise
y_noisy = add_white_noise(audio)

play_audio(y_noisy, sample_rate)

In [38]:
import IPython.display as ipd
import numpy as np
import scipy
from scipy.io import wavfile
from scipy import signal

def bandpass(x, lo, hi):
    X = scipy.fft.dct(x)
    N = len(X)
    X[0:int(lo*N*2)] = 0
    X[int(hi*N*2):] = 0
    return scipy.fft.idct(X)

# Read the .frac file
# original, rate = np.loadtxt(file_path), 16000  # Assuming the sample rate is 16000 Hz
original, rate = librosa.load(file_path, sr=16000)

# Display original audio
ipd.display(ipd.HTML('Original (0 to 22050 Hz)'))
ipd.display(ipd.Audio(original, rate=rate))

# Display narrowband audio
ipd.display(ipd.HTML('Narrowband (300 Hz to 3.3 kHz)'))
narrowband_audio = bandpass(original, 300/rate, 3300/rate)
ipd.display(ipd.Audio(narrowband_audio, rate=rate))

# Display wideband audio
ipd.display(ipd.HTML('Wideband (50 Hz to 7 kHz)'))
wideband_audio = bandpass(original, 50/rate, 7000/rate)
ipd.display(ipd.Audio(wideband_audio, rate=rate))

# Display superwideband audio
ipd.display(ipd.HTML('Superwideband (50 Hz to 16 kHz)'))
superwideband_audio = bandpass(original, 50/rate, 16000/rate)
ipd.display(ipd.Audio(superwideband_audio, rate=rate))

# Display fullband audio
ipd.display(ipd.HTML('Fullband (50 Hz to 22 kHz)'))
fullband_audio = bandpass(original, 50/rate, 22000/rate)
ipd.display(ipd.Audio(fullband_audio, rate=rate))

In [47]:
import numpy as np
import pytsmod as tsm
import soundfile as sf  # you can use other audio load packages.

x, sr = sf.read(file_path)
x = x.T
x_length = x.shape[-1]  # length of the audio sequence x.

s_fixed = 1.5  # stretch the audio signal 1.3x times.
s_ap = np.array([[0, x_length / 2, x_length], [0, x_length, x_length * 1.5]])  # double the first half of the audio only and preserve the other half.

x_s_fixed = tsm.wsola(x, s_fixed)
x_s_ap = tsm.wsola(x, s_ap)

play_audio(x_s_ap, 16000)