In [1]:
# Ensure you have ffmpeg installed locally
import whisper    
# Initialize Whisper model
model = whisper.load_model("large-v2")

In [3]:
# this part will continuously watch the audio_files_drop folder for new audio
# and process, transcribe and fix common mispelling you may list in doc/whisper_errors.xlsx
# transcript is placed in transcripts folder and the audio in processed_audio folder 
import os
import shutil
import time
from watchdog.observers import Observer
from watchdog.events import FileSystemEventHandler
import pandas as pd
import re
from pydub import AudioSegment 

# Function to load error corrections from the Excel file
def load_corrections(file_path):
    df = pd.read_excel(file_path)
    corrections = dict(zip(df['error'], df['correction']))
    return corrections

# Function to replace errors in text with case insensitivity
def clean_text(text, corrections):
    for error, correction in corrections.items():
        # Use re.sub for case-insensitive replacement
        text = re.sub(re.escape(error), correction, text, flags=re.IGNORECASE)
    return text

class AudioFileHandler(FileSystemEventHandler):
    def __init__(self, model, corrections, output_dir, processed_dir):
        self.model = model
        self.corrections = corrections
        self.output_dir = output_dir
        self.processed_dir = processed_dir

    def on_created(self, event):
        if not event.is_directory:
            self.process_file(event.src_path)

    def process_file(self, file_path):
        try:
            # Check if the file is an audio file
            if file_path.lower().endswith(('.wav', '.mp3', '.m4a')):
                print(f"Processing file: {file_path}")

                convert = False

                # Convert audio file to WAV if necessary
                if not file_path.lower().endswith('.wav'):
                    audio = AudioSegment.from_file(file_path)
                    output_audio_file = file_path[:-3] + "wav"
                    audio.export(output_audio_file, format="wav")
                    convert = True
                else:
                    output_audio_file = file_path

                # Calculate estimated transcription time
                audio_length = AudioSegment.from_file(output_audio_file).duration_seconds
                estimated_time = int(audio_length / 60)  # Estimated time in minutes

                # Extract basename and sanitize it for filename
                audio_basename = os.path.basename(file_path)
                sanitized_basename = re.sub(r'[^\w\-_\. ]', '_', audio_basename)

                # Create a temporary status file in the drop folder
                status_file = os.path.join(os.path.dirname(file_path), 
                                           f"transcription started for {sanitized_basename} - estimated run time {estimated_time} mins.txt")
                with open(status_file, "w") as f:
                    f.write(f"Transcription for {os.path.basename(file_path)} started.\n")
                    f.write(f"Estimated run time: {estimated_time} mins.\n")
                
                print(f"Created status file: {status_file}")

                # Transcribe audio using Whisper
                result = self.model.transcribe(output_audio_file)

                # Extract transcription text
                transcription_text = result['text']

                # Clean the transcription text
                transcription_text = clean_text(transcription_text, self.corrections)

                # Save transcription to a file
                transcript_file = os.path.join(self.output_dir, os.path.basename(file_path) + '.txt')
                with open(transcript_file, "w") as f:
                    f.write(transcription_text)

                print(f"Transcription saved to: {transcript_file}")

                # Move the processed audio file to another folder
                shutil.move(file_path, os.path.join(self.processed_dir, os.path.basename(file_path)))

                # If conversion occurred, delete the converted file 
                if convert:
                    os.remove(output_audio_file)

                print(f"Moved {file_path} to {self.processed_dir}")

                # Delete the status file after completion
                os.remove(status_file)

                print(f"Deleted status file: {status_file}")

        except Exception as e:
            print(f"Error processing file {file_path}: {e}")

if __name__ == "__main__":
    # Directory to watch for new files
    watch_dir = "../audio_files_drop"  # Directory to watch for new files
    output_dir = "../transcripts"  # Directory to save transcripts
    processed_dir = "../processed_audios"  # Directory to move processed audio files

    os.makedirs(output_dir, exist_ok=True)
    os.makedirs(processed_dir, exist_ok=True)

    # Initialize Whisper model
    #model = whisper.load_model("large-v2")

    # Load error corrections from the Excel file
    corrections_file = "../doc/whisper_errors.xlsx"  # list common whisper errors and their correction
    corrections = load_corrections(corrections_file)

    # Set up observer and event handler for the folder
    observer = Observer()
    event_handler = AudioFileHandler(model, corrections, output_dir, processed_dir)
    observer.schedule(event_handler, path=watch_dir, recursive=False)

    observer.start()

    print(f"Watching directory: {watch_dir}")

    try:
        while True:
            time.sleep(1)
    except KeyboardInterrupt:
        observer.stop()

    observer.join()

Watching directory: ../audio_files_drop
