In [1]:
import os
import time
from datetime import datetime
import whisper

# ---------------- Settings ----------------
MODEL_SIZE = "medium"  # or "small" for lower memory usage
RECORDINGS_DIR = os.path.join(
    os.getcwd(),
    #r"Bulla Audio Recordings-20250913T212225Z-1-001/Bulla Audio Recordings"
)
MASTER_OUTPUT_PREFIX = "transcripts_" + MODEL_SIZE
LANGUAGE = "es"
# ------------------------------------------

# ---------------- Device ----------------
DEVICE = "cpu"
print(f"⚠️ Using CPU for transcription")

# ---------------- Functions ----------------
def save_segment_csv(csv_path, segments):
    """Save segment-level transcript CSV file"""
    with open(csv_path, "w", encoding="utf-8") as f:
        f.write("start_time_s,end_time_s,text\n")
        for seg in segments:
            f.write(f"{seg['start']:.3f},{seg['end']:.3f},\"{seg['text'].strip()}\"\n")


def process_single_file(model, audio_file, output_master_dir):
    """Transcribe one file and save transcript CSV"""
    print(f"\n=== Processing {audio_file}")
    filename = os.path.basename(audio_file)
    name, _ = os.path.splitext(filename)

    start_time = time.time()

    # Transcribe with Whisper
    result = model.transcribe(
        audio_file,
        language=LANGUAGE,
        verbose=False
    )

    elapsed = time.time() - start_time
    elapsed_str = f"{int(elapsed)}s"

    # Build CSV filename
    csv_filename = f"{name}_{elapsed_str}_segments.csv"
    csv_path = os.path.join(output_master_dir, csv_filename)

    # Collect segments
    segments = [
        {"start": float(seg["start"]), "end": float(seg["end"]), "text": seg["text"].strip()}
        for seg in result.get("segments", [])
    ]

    save_segment_csv(csv_path, segments)
    print(f" CSV saved: {csv_path} (took {elapsed:.2f} seconds)")


def find_audio_files(directory=RECORDINGS_DIR):
    """Recursively find all audio files in the directory"""
    exts = [".wav", ".mp3", ".m4a", ".flac"]
    files = []
    for root, _, filenames in os.walk(directory):
        for fn in sorted(filenames):
            if any(fn.lower().endswith(e) for e in exts):
                files.append(os.path.join(root, fn))
    return files

# ---------------- Main ----------------
audio_files = find_audio_files(RECORDINGS_DIR)
print(f"📂 Found {len(audio_files)} audio files")

if not audio_files:
    raise SystemExit("No audio files found in recordings folder")

timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
output_master = os.path.join(os.getcwd(), f"{MASTER_OUTPUT_PREFIX}_{timestamp}")
os.makedirs(output_master, exist_ok=True)

print(f"Loading Whisper model '{MODEL_SIZE}' on CPU...")
model = whisper.load_model(MODEL_SIZE, device=DEVICE)

for fpath in audio_files:
    try:
        process_single_file(model, fpath, output_master)
    except Exception as e:
        print(f"⚠️ Error with {fpath}: {e}")

print(f"\nAll done! Results in: {output_master}")


⚠️ Using CPU for transcription
📂 Found 1 audio files
Loading Whisper model 'medium' on CPU...


100%|█████████████████████████████████████| 1.42G/1.42G [02:26<00:00, 10.4MiB/s]
  checkpoint = torch.load(fp, map_location=device)



=== Processing /home/mlw19mlw91/Downloads/H_Nonsense_Words_Examples.wav


100%|██████████| 18779/18779 [01:20<00:00, 234.49frames/s]

 CSV saved: /home/mlw19mlw91/Downloads/transcripts_medium_20250922_195739/H_Nonsense_Words_Examples_80s_segments.csv (took 80.52 seconds)

All done! Results in: /home/mlw19mlw91/Downloads/transcripts_medium_20250922_195739





In [2]:
# 🔁 Re-run transcription on audio files with the already loaded model

# Re-scan for audio files
audio_files = find_audio_files(RECORDINGS_DIR)
print(f"📂 Found {len(audio_files)} audio files")

if not audio_files:
    raise SystemExit("No audio files found in recordings folder")

# New output folder (with timestamp)
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
output_master = os.path.join(os.getcwd(), f"{MASTER_OUTPUT_PREFIX}_{timestamp}")
os.makedirs(output_master, exist_ok=True)

# Process files again using the already loaded model
for fpath in audio_files:
    try:
        process_single_file(model, fpath, output_master)
    except Exception as e:
        print(f"⚠️ Error with {fpath}: {e}")

print(f"\nAll done! Results in: {output_master}")


📂 Found 1 audio files

=== Processing /home/mlw19mlw91/Downloads/H_Nonsense_Words_Examples.wav


100%|██████████| 18779/18779 [01:27<00:00, 214.84frames/s]

 CSV saved: /home/mlw19mlw91/Downloads/transcripts_medium_20250922_203504/H_Nonsense_Words_Examples_87s_segments.csv (took 87.73 seconds)

All done! Results in: /home/mlw19mlw91/Downloads/transcripts_medium_20250922_203504





In [None]:
import os
import time
from datetime import datetime
import whisper
# import torch

# ---------------- Settings ----------------
MODEL_SIZE = "medium"  # or "small" for lower memory usage
DEVICE = "cpu"
RECORDINGS_DIR = os.path.join(
    os.getcwd(),
    r"Bulla Audio Recordings-20250913T212225Z-1-001/Bulla Audio Recordings"
)
MASTER_OUTPUT_PREFIX = "transcripts_" + MODEL_SIZE + DEVICE
LANGUAGE = "es"
# ------------------------------------------

# ---------------- Device ----------------

device_label = "CPU"
# if torch.cuda.is_available():
#     DEVICE = torch.device("cuda:0")
#     device_label = "GPU"
print(f"⚠️ Using {device_label} for transcription")

# ---------------- Functions ----------------
def save_segment_csv(csv_path, segments):
    """Save segment-level transcript CSV file"""
    with open(csv_path, "w", encoding="utf-8") as f:
        f.write("start_time_s,end_time_s,text\n")
        for seg in segments:
            f.write(f"{seg['start']:.3f},{seg['end']:.3f},\"{seg['text'].strip()}\"\n")


def process_single_file(model, audio_file, output_master_dir):
    """Transcribe one file and save transcript CSV"""
    print(f"\n=== Processing {audio_file}")
    filename = os.path.basename(audio_file)
    name, _ = os.path.splitext(filename)

    start_time = time.time()

    # Transcribe with Whisper
    result = model.transcribe(
        audio_file,
        language=LANGUAGE,
        verbose=False
    )

    elapsed = time.time() - start_time
    elapsed_str = f"{int(elapsed)}s"

    # Build CSV filename
    csv_filename = f"{name}_{elapsed_str}_segments.csv"
    csv_path = os.path.join(output_master_dir, csv_filename)

    # Collect segments
    segments = [
        {"start": float(seg["start"]), "end": float(seg["end"]), "text": seg["text"].strip()}
        for seg in result.get("segments", [])
    ]

    save_segment_csv(csv_path, segments)
    print(f" CSV saved: {csv_path} (took {elapsed:.2f} seconds)")


def find_audio_files(directory=RECORDINGS_DIR):
    """Recursively find all audio files in the directory"""
    exts = [".wav", ".mp3", ".m4a", ".flac"]
    files = []
    for root, _, filenames in os.walk(directory):
        for fn in sorted(filenames):
            if any(fn.lower().endswith(e) for e in exts):
                files.append(os.path.join(root, fn))
    return files

# ---------------- Main ----------------
audio_files = find_audio_files(RECORDINGS_DIR)
print(f"📂 Found {len(audio_files)} audio files")

if not audio_files:
    raise SystemExit("No audio files found in recordings folder")

timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
output_master = os.path.join(
    os.getcwd(),
    f"{MASTER_OUTPUT_PREFIX}_{device_label}_{timestamp}"  # Include CPU/GPU in folder name
)
os.makedirs(output_master, exist_ok=True)

print(f"Loading Whisper model '{MODEL_SIZE}' on {device_label}...")
model = whisper.load_model(MODEL_SIZE, device=DEVICE)

for fpath in audio_files:
    try:
        process_single_file(model, fpath, output_master)
    except Exception as e:
        print(f"⚠️ Error with {fpath}: {e}")

print(f"\nAll done! Results in: {output_master}")
