In [9]:
import numpy as np
import scipy.signal as signal
import librosa
import soundfile as sf
import matplotlib.pyplot as plt

# Define the sampling frequency
fs = 44100  # Sampling frequency in Hz (this should be set based on your audio file's sample rate)

# Define the bandpass filter specifications
low_cutoff = 80  # Low cutoff frequency in Hz
high_cutoff = 2000  # High cutoff frequency in Hz

# Normalize the frequencies by the Nyquist frequency
nyquist = 0.5 * fs
low = low_cutoff / nyquist
high = high_cutoff / nyquist

# Design the bandpass filter using SOS (Second-Order Sections)
sos = signal.iirfilter(
    N=4,  # Filter order; 4th-order bandpass filter
    Wn=[low, high],  # Normalized cutoff frequencies
    btype='band',
    ftype='butter',  # Butterworth filter
    output='sos'
)

# Load the MP3 file using librosa
input_file = "C:/Users/Jose/Desktop/Hackathon/Speech2Text/convPepeTrijoMaggi.mp3"
x, sr = librosa.load(input_file, sr=None)  # Load the audio file, keeping the original sampling rate

# Update the sampling frequency to match the audio file's sample rate
fs = sr
nyquist = 0.5 * fs
low = low_cutoff / nyquist
high = high_cutoff / nyquist

# Design the bandpass filter with updated frequencies
sos = signal.iirfilter(
    N=4,
    Wn=[low, high],
    btype='band',
    ftype='butter',
    output='sos'
)

# Apply the SOS filter to the audio signal
filtered_x = signal.sosfilt(sos, x)

# Save the filtered audio to a new file
output_file = 'C:/Users/Jose/Desktop/Hackathon/Speech2Text/convPepeTrijoMaggi.wav'
sf.write(output_file, filtered_x, sr)



## Faster-Whisper Speech2Text

In [8]:
from faster_whisper import WhisperModel
model_size = "medium"


##            Size of the model to use (tiny, tiny.en, base, base.en,
##            small, small.en, distil-small.en, medium, medium.en, distil-medium.en, large-v1,
##            large-v2, large-v3, large, distil-large-v2 or distil-large-v3)


# Run on GPU with FP16
#model = WhisperModel(model_size, device="cuda", compute_type="float16")

# or run on GPU with INT8
# model = WhisperModel(model_size, device="cuda", compute_type="int8_float16")
# or run on CPU with INT8

model = WhisperModel(model_size, device="cpu", compute_type="int8")

segments, info = model.transcribe(r"C:\Users\Jose\Desktop\Hackathon\Speech2Text\convPepeTrijoMaggi.wav", beam_size=5)

print("Detected language '%s' with probability %f" % (info.language, info.language_probability))

for segment in segments:
    print("[%.2fs -> %.2fs] %s" % (segment.start, segment.end, segment.text))

Detected language 'es' with probability 0.920982
[0.00s -> 8.00s]  les quería preguntar cómo le fue el día que dio la cagada con la tormenta
[16.00s -> 25.00s]  yo estuve hasta el miércoles sin luz y venía a Carlaú a pechar energía
[31.00s -> 35.00s]  yo al contrario, tuve luz todo el rato
[35.00s -> 42.00s]  no se me cortó menos mal, pero mi hermano, mi prima, vinieron a pedirnos luz
[42.00s -> 45.00s]  estamos cargando los celulares de todos, literalmente de todos
[45.00s -> 48.00s]  de los amigos, del amigo del amigo, de la mamá del amigo
[48.00s -> 56.00s]  la cuenta de luz, no sé cómo salió, pero fue muy chistoso para estar por cada interruptor
[56.00s -> 62.00s]  o sea, enchufe nuevo, enchufe nuevo, celular nuevo que encontraron, era impresionante
[66.00s -> 72.00s]  para mí fue terrible porque de hecho, como mi entrada de la casa se inunda
[72.00s -> 76.00s]  cortaron la luz y nosotros sacamos el agua con una bomba eléctrica
[76.00s -> 82.00s]  entonces, claro, pasó toda la noch