In [None]:
import os
import json
import whisper
from pathlib import Path
from tqdm import tqdm


def finding_files(directory):
    # Finding audio files in directory
    extensionsofmedia = ('.mp3', '.wav', '.m4a', '.mp4', '.avi', '.mov')
    return [os.path.join(root,file) for root,_,files in os.walk(directory) for file in files if file.lower().endswith(extensionsofmedia)]

def transcribe_file(model, file_path):
    # Transcribe a single media file using Whisper
    return model.transcribe(file_path)["text"]

def save_transcription(file_path, transcription, output_dir=None):
    # Save transcription result to a file
    original_path = os.path.basename(file_path)
    output_name = os.path.splitext(original_path)[0] + ""
    output_path = os.path.join(output_dir, output_name)

    # Save as both text and JSON
    with open(f"{output_path}.txt", "w", encoding="utf-8") as f:
        f.write(transcription)

    with open(f"{output_path}.json", "w", encoding="utf-8") as f:
        json.dump({
            "original_file": str(original_path),
            "transcription": transcription
        }, f, indent=2)

def main():
    # directory from user
    input_directory = input("Enter the input directory path: ").strip()
    output_directory = input("Enter output directory path: ").strip()

    # Load the smallest Whisper model
    print("Loading Whisper model...")
    model = whisper.load_model("tiny")

    # Find all media files
    print("Scanning for media files...")
    media_files = finding_files(input_directory)

    # Process each file
    for file_path in tqdm(media_files):
        transcription = transcribe_file(model, file_path)
        save_transcription(file_path, transcription, output_directory)

    print("Transcription complete!")

    # # Process each file without tqdm
    # for file_path in media_files:
    #   print(f"Processing: {os.path.basename(file_path)}")
    #   transcription = transcribe_file(model, file_path)
    #   save_transcription(file_path, transcription, output_directory)

    print("Transcription complete!")

if __name__ == "__main__":
    main()