In [None]:
import utils.utils as utils
import app
from utils.ipynb_pipeline import create_file_selector, get_selected_file_paths

source_language = "it"
target_language = "en"
input_path = "/Users/ramiibrahimi/Documents/test.nosync/"

In [None]:
extensions = [".mp4", ".wmv", ".mpg", ".srt", ".mov"]
# extensions = [".wmv"]

dropdown, file_map = create_file_selector(input_path, extensions)

In [None]:
file_paths = get_selected_file_paths(dropdown, file_map)
print(file_paths)

In [None]:
import threading
import time
from concurrent.futures import ThreadPoolExecutor, as_completed

# multiple concurrent transcriptions
server_region = "us-central1"  # for google chirp europe-west4 or us-central1. global for google long
services = ["openai", "google"]
file_lock = threading.Lock()


def process_single_file(
    service_name, file_path, source_language, target_language, server_region
):
    """Process a single file with a single service"""
    try:
        # Add a small delay to prevent race conditions
        time.sleep(0.1)

        response = app.multi_transcribe(
            file_path=file_path,
            service_names=[service_name],
            source_language=source_language,
            target_language=target_language,
            audio_output_extension=".flac",
            server_region=server_region,
        )

        srt_response = response[service_name]
        print(f"{service_name} completed: {file_path}")
        # Use thread lock for file operations if needed

        with file_lock:
            download_path = utils.replace_extension(
                file_path=file_path,
                end_modifiers=f"_{service_name}",
                new_extension=".srt",
            )
            utils.save_srt_data(srt_data=srt_response, file_path=download_path)

        return f"{service_name}: {file_path} - Success"

    except Exception as e:
        import traceback

        error_msg = f"Error processing {file_path} with {service_name}: {e}\nTraceback: {traceback.format_exc()}"
        print(error_msg)
        return error_msg


def process_service_files(
    service_name,
    file_paths,
    source_language,
    target_language,
    server_region,
    max_workers=1,
):
    """Process all files for a single service using ThreadPoolExecutor"""
    print(f"Starting {service_name} processing with {max_workers} workers...")

    with ThreadPoolExecutor(max_workers=max_workers) as executor:
        # Submit all file processing tasks for this service
        future_to_file = {
            executor.submit(
                process_single_file,
                service_name,
                file_path,
                source_language,
                target_language,
                server_region,
            ): file_path
            for file_path in file_paths
        }

        # Process completed tasks
        for future in as_completed(future_to_file):
            result = future.result()
            # Result is already printed in process_single_file

    print(f"Finished {service_name} processing")


# Create and start threads - one per service
threads = []
for service in services:
    thread = threading.Thread(
        target=process_service_files,
        args=(
            service,
            file_paths,
            source_language,
            target_language,
            server_region,
            1,
        ),  # 3 workers per service
    )
    threads.append(thread)
    thread.start()

# Wait for all threads to complete
for thread in threads:
    thread.join()

print("All transcription services completed!")