In [None]:
import os
import torch
import functools
import whisperx
from whisperx.diarize import DiarizationPipeline

# ---  SECURITY FIX ---
os.environ["TORCH_FORCE_WEIGHTS_ONLY_LOAD"] = "0"
_original_load = torch.load
@functools.wraps(_original_load)
def unsafe_load_wrapper(*args, **kwargs):
    if 'weights_only' in kwargs: del kwargs['weights_only']
    return _original_load(*args, **kwargs, weights_only=False)
torch.load = unsafe_load_wrapper
# -----------------------

In [None]:


device = "cuda"
audio_file = r"e:\legendary_interview\test\raw_TrLR.flac"
batch_size = 16 
compute_type = "float16"
model_dir = "e:/ai_models/whisper_large_v3" # Update path if using v3

# OPTION 1: Try 'large-v3' for better Danish accuracy
model_name = "large-v3" 

# Define Vocabulary Prompt
agri_prompt = "Landbrug, omdrift, gødning, efterafgrøder, biomasse, landmand, høst, hektar, bedrift."
asr_options = {"initial_prompt": agri_prompt}

print(f"Loading {model_name} on {device}...")
model = whisperx.load_model(
    model_name, 
    device, 
    compute_type=compute_type, 
    download_root=model_dir, 
    language="da",
    asr_options=asr_options
)

print("Transcribing...")
audio = whisperx.load_audio(audio_file)
result = model.transcribe(audio, batch_size=batch_size)

print("Aligning...")
model_a, metadata = whisperx.load_align_model(language_code="da", device=device)
result = whisperx.align(result["segments"], model_a, metadata, audio, device)

print("Diarizing (Forcing 2 Speakers)...")
diarize_model = DiarizationPipeline(use_auth_token="XXXX", device=device)

# OPTION 2: Force min/max speakers to stabilize speaker labels
diarize_segments = diarize_model(audio, min_speakers=2, max_speakers=2)

result = whisperx.assign_word_speakers(diarize_segments, result)

# Preview
print("-" * 50)
for seg in result["segments"][:10]: # Just print first 10 for check
    speaker = seg.get('speaker', 'Unknown')
    print(f"[{seg['start']:>7.2f} - {seg['end']:>7.2f}] {speaker}: {seg['text']}")

In [None]:
import torch
print(f"Version: {torch.__version__}")        # Should say something like '2.5.1+cu124'
print(f"CUDA Available: {torch.cuda.is_available()}") # MUST say 'True'

In [None]:
# Add this helper function at the top of your script
def write_srt(segments, file_path):
    def format_time(seconds):
        hours = int(seconds // 3600)
        minutes = int((seconds % 3600) // 60)
        secs = int(seconds % 60)
        millis = int((seconds - int(seconds)) * 1000)
        return f"{hours:02}:{minutes:02}:{secs:02},{millis:03}"

    with open(file_path, "w", encoding="utf-8") as f:
        for i, seg in enumerate(segments, start=1):
            start_str = format_time(seg['start'])
            end_str = format_time(seg['end'])
            speaker = seg.get('speaker', 'Unknown')
            # Format: [Speaker]: Text
            text = f"[{speaker}]: {seg['text'].strip()}"
            
            f.write(f"{i}\n")
            f.write(f"{start_str} --> {end_str}\n")
            f.write(f"{text}\n\n")

# --- INSIDE YOUR LOOP ---
# ... after result = whisperx.assign_word_speakers(...) ...

# Save SRT for Validation Tool
output_folder= r"e:\legendary_interview\test"
base_name = "test"
srt_path = os.path.join(output_folder, f"{base_name}.srt")
write_srt(result["segments"], srt_path)
print(f"✅ Saved SRT: {srt_path}")

In [None]:
result

In [None]:
import json
import numpy as np

# 1. Helper to handle the "float64" numbers from WhisperX
class NumpyEncoder(json.JSONEncoder):
    def default(self, obj):
        if isinstance(obj, np.integer): return int(obj)
        if isinstance(obj, np.floating): return float(obj)
        if isinstance(obj, np.ndarray): return obj.tolist()
        return super(NumpyEncoder, self).default(obj)

# ... Inside your "for audio_file in..." loop ...
# ... After result = whisperx.assign_word_speakers(...) ...

# 2. Save the Master JSON (The "Gold" Copy)
json_path = os.path.join(output_folder, f"{base_name}.json")
with open(json_path, "w", encoding="utf-8") as f:
    json.dump(result["segments"], f, cls=NumpyEncoder, indent=4, ensure_ascii=False)

print(f"✅ Saved Master JSON: {json_path}")