In [1]:
import os
import librosa
import numpy as np
from scipy.signal import butter, sosfilt
from scipy.io import wavfile

def multiband_compression(y, sr, low_cutoff, high_cutoff, low_gain, high_gain):
    # Create a Butterworth high-pass filter
    order = 12
    nyquist = sr / 2
    norm_low_cutoff = low_cutoff / nyquist
    sos_highpass = butter(order, norm_low_cutoff, btype='high', analog=False, output='sos')

    # Apply the high-pass filter
    filtered_audio = sosfilt(sos_highpass, y)

    # Apply gain to different frequency bands
    low_band_indices = np.where(librosa.mel_frequencies(n_mels=filtered_audio.shape[0], fmin=0, fmax=sr/2) <= low_cutoff)[0]
    high_band_indices = np.where(librosa.mel_frequencies(n_mels=filtered_audio.shape[0], fmin=0, fmax=sr/2) > high_cutoff)[0]
    filtered_audio[low_band_indices] *= low_gain
    filtered_audio[high_band_indices] *= high_gain

    return filtered_audio

# Define parameters for multiband compression
low_cutoff = 300
high_cutoff = 3000
low_gain = 0.05
high_gain = 1.2

# Set input and output directories
input_dir = 'audio'
output_dir = 'audio_edited'

# Create the output directory if it doesn't exist
os.makedirs(output_dir, exist_ok=True)

# Loop through all files in the input directory
for filename in os.listdir(input_dir):
    if filename.endswith('.wav'):
        # Load the audio file
        audio_file = os.path.join(input_dir, filename)
        y, sr = librosa.load(audio_file)

        # Apply multiband compression
        filtered_audio = multiband_compression(y, sr, low_cutoff, high_cutoff, low_gain, high_gain)

        # Export the filtered audio to a new file
        output_filename = os.path.splitext(filename)[0] + '.mp3'
        export_file = os.path.join(output_dir, output_filename)
        wavfile.write(export_file, sr, np.int16(filtered_audio * 32767))

        print(f"Filtered audio saved as {export_file}")

print("Note: Converting to MP3 may result in further quality loss due to re-encoding.")
print("You can now use a speech-to-text tool like Whisper to convert the filtered audio to text.")

Filtered audio saved as audio_edited\300_AUDIO.mp3
Filtered audio saved as audio_edited\301_AUDIO.mp3
Filtered audio saved as audio_edited\302_AUDIO.mp3
Filtered audio saved as audio_edited\303_AUDIO.mp3
Filtered audio saved as audio_edited\304_AUDIO.mp3
Filtered audio saved as audio_edited\305_AUDIO.mp3
Filtered audio saved as audio_edited\306_AUDIO.mp3
Filtered audio saved as audio_edited\307_AUDIO.mp3
Filtered audio saved as audio_edited\308_AUDIO.mp3
Filtered audio saved as audio_edited\309_AUDIO.mp3
Filtered audio saved as audio_edited\310_AUDIO.mp3
Filtered audio saved as audio_edited\311_AUDIO.mp3
Filtered audio saved as audio_edited\312_AUDIO.mp3
Filtered audio saved as audio_edited\313_AUDIO.mp3
Filtered audio saved as audio_edited\314_AUDIO.mp3
Filtered audio saved as audio_edited\315_AUDIO.mp3
Filtered audio saved as audio_edited\316_AUDIO.mp3
Filtered audio saved as audio_edited\317_AUDIO.mp3
Filtered audio saved as audio_edited\318_AUDIO.mp3
Filtered audio saved as audio_e

In [2]:
import os
import whisper
from whisper.utils import get_writer

# Load the Whisper model
model = whisper.load_model("tiny")

# Set input and output directories
input_dir = 'audio_edited'
output_dir = 'text'

# Create the output directory if it doesn't exist
os.makedirs(output_dir, exist_ok=True)

# Loop through all files in the input directory
for filename in os.listdir(input_dir):
    if filename.endswith('.mp3'):
        # Load the audio file
        audio_file = os.path.join(input_dir, filename)

        # Transcribe the audio using Whisper
        result = model.transcribe(audio_file)

        # Save the transcription as a TXT file
        output_filename = os.path.splitext(filename)[0] + '.txt'
        output_file = os.path.join(output_dir, output_filename)
        with open(output_file, "w", encoding="utf-8") as txt:
            txt.write(result["text"])

        print(f"Transcription saved as {output_file}")

100%|█████████████████████████████████████| 72.1M/72.1M [00:06<00:00, 10.8MiB/s]


Transcription saved as text\300_AUDIO.txt
Transcription saved as text\301_AUDIO.txt
Transcription saved as text\302_AUDIO.txt
Transcription saved as text\303_AUDIO.txt
Transcription saved as text\304_AUDIO.txt
Transcription saved as text\305_AUDIO.txt
Transcription saved as text\306_AUDIO.txt
Transcription saved as text\307_AUDIO.txt
Transcription saved as text\308_AUDIO.txt
Transcription saved as text\309_AUDIO.txt
Transcription saved as text\310_AUDIO.txt
Transcription saved as text\311_AUDIO.txt
Transcription saved as text\312_AUDIO.txt
Transcription saved as text\313_AUDIO.txt
Transcription saved as text\314_AUDIO.txt
Transcription saved as text\315_AUDIO.txt
Transcription saved as text\316_AUDIO.txt
Transcription saved as text\317_AUDIO.txt
Transcription saved as text\318_AUDIO.txt
Transcription saved as text\319_AUDIO.txt
Transcription saved as text\320_AUDIO.txt
Transcription saved as text\321_AUDIO.txt
Transcription saved as text\322_AUDIO.txt
Transcription saved as text\323_AU

In [1]:
import os
import whisper
from whisper.utils import get_writer

# Load the Whisper model
model = whisper.load_model("tiny")

# Set input and output directories
input_dir = 'audio_edited'
output_dir = 'text'

# Create the output directory if it doesn't exist
os.makedirs(output_dir, exist_ok=True)

# Flag to indicate when to start processing
start_processing = False

# Loop through all files in the input directory
for filename in sorted(os.listdir(input_dir)):
    if filename == '488_AUDIO.mp3':
        start_processing = True  # Start processing when 488_AUDIO.mp3 is found
    if start_processing and filename.endswith('.mp3'):
        # Load the audio file
        audio_file = os.path.join(input_dir, filename)

        # Transcribe the audio using Whisper
        result = model.transcribe(audio_file)

        # Save the transcription as a TXT file
        output_filename = os.path.splitext(filename)[0] + '.txt'
        output_file = os.path.join(output_dir, output_filename)
        with open(output_file, "w", encoding="utf-8") as txt:
            txt.write(result["text"])

        print(f"Transcription saved as {output_file}")


Transcription saved as text\488_AUDIO.txt
Transcription saved as text\489_AUDIO.txt
Transcription saved as text\490_AUDIO.txt
Transcription saved as text\491_AUDIO.txt
Transcription saved as text\492_AUDIO.txt
Transcription saved as text\600_AUDIO.txt
Transcription saved as text\601_AUDIO.txt
Transcription saved as text\602_AUDIO.txt
Transcription saved as text\603_AUDIO.txt
Transcription saved as text\604_AUDIO.txt
Transcription saved as text\605_AUDIO.txt
Transcription saved as text\606_AUDIO.txt
Transcription saved as text\607_AUDIO.txt
Transcription saved as text\608_AUDIO.txt
Transcription saved as text\609_AUDIO.txt
Transcription saved as text\612_AUDIO.txt
Transcription saved as text\615_AUDIO.txt
Transcription saved as text\617_AUDIO.txt
Transcription saved as text\618_AUDIO.txt
Transcription saved as text\619_AUDIO.txt
Transcription saved as text\620_AUDIO.txt
Transcription saved as text\622_AUDIO.txt
Transcription saved as text\623_AUDIO.txt
Transcription saved as text\624_AU