# Transcription CA

Ce notebook applique le **pipeline complet** :
- **Prétraitement** audio (FFmpeg + noisereduce)
- **Transcription** faster-whisper (réglages anti-hallucinations)
- **Chunks longs** pour une meilleure cohérence (3–5 min)
- **Diarisation** (pyannote → fallback whisperx)
- **Post-traitement** (dédup + normalisation chiffres/unités)
- **Nettoyage LLM** par morceaux (1000 caractères) avec borne de correction
- **Sauvegarde JSON** des sorties (raw, diarized, cleaned, llm_cleaned)

# **Installation des packages nécessaires**

In [None]:
%%capture
# Installation silencieuse des dépendances avec gestion des conflits

# 1. Mise à jour pip pour éviter les problèmes
!pip install --upgrade pip -q

# 2. Installation FFmpeg (système)
!apt-get update -qq
!apt-get install -qq ffmpeg sox

# 3. Nettoyage et verrouillage de la stack NumPy/Numba/Scipy
!pip uninstall -y numpy numba >/dev/null 2>&1 || true
!pip install -q numpy==1.26.4 scipy==1.11.4
!pip install -q numba==0.58.1
!pip install -q torch==2.1.2 torchaudio==2.1.2 --index-url https://download.pytorch.org/whl/cu118

# 4. Installation des packages de transcription
#!pip install -q openai-whisper==20231117
!pip install -q faster-whisper==1.0.3

# 5. Packages de débruitage audio
!pip install -q librosa==0.10.1
!pip install -q soundfile==0.12.1
!pip install -q noisereduce==3.0.0
!pip install -q pydub==0.25.1

# 6. Diarization
!pip install -q "pyannote.audio>=3.1"
!pip install -q whisperx

!pip install -q regex==2023.12.25 unidecode==1.3.8



# 7. Packages documents
!pip install -q python-docx==1.2.0
!pip install -q python-pptx==1.0.2

# 8. Packages LLM et NLP
!pip install -q openai==1.91.0
!pip install -q assemblyai==0.44.3
!pip install -q tiktoken==0.9.0

# 9. LangChain
!pip install -q langchain==0.3.27 langchain-community==0.3.29 langchain-core==0.3.30

# 10. Packages utilitaires
!pip install -q pandas==2.1.4 matplotlib==3.8.2 seaborn==0.13.2

# 11. Installation FAISS pour le RAG
!pip install -q faiss-cpu==1.7.4

print("✅ Installation terminée!")


In [4]:
# Vérification que tout est installé correctement
import importlib

packages_to_check = [
    ('numpy', 'numpy'),
    ('scipy', 'scipy'),
    ('numba', 'numba'),
    ('whisper', 'openai-whisper'),
    ('faster_whisper', 'faster-whisper'),
    ('librosa', 'librosa'),
    ('soundfile', 'soundfile'),
    ('noisereduce', 'noisereduce'),
    ('pydub', 'pydub'),
    ('docx', 'python-docx'),
    ('pptx', 'python-pptx'),
    ('openai', 'openai'),
    ('langchain', 'langchain'),
    ('langchain_community', 'langchain-community'),
    ('faiss', 'faiss-cpu'),
    ('assemblyai', 'assemblyai'),
    ('tiktoken', 'tiktoken')
]

print("🔍 Vérification des packages installés:")
print("-" * 50)

all_ok = True
for import_name, package_name in packages_to_check:
    try:
        module = importlib.import_module(import_name)
        version = getattr(module, '__version__', 'N/A')
        print(f"✅ {package_name:20} : {version}")
    except ImportError:
        print(f"❌ {package_name:20} : Non installé")
        all_ok = False
    except Exception as exc:
        print(f"⚠️ {package_name:20} : Erreur lors de l'import ({type(exc).__name__}: {exc})")
        all_ok = False

if all_ok:
    print("✨ Tous les packages sont installés correctement!")
else:
    print("⚠️ Certains packages nécessitent une attention. Consultez les messages ci-dessus.")


🔍 Vérification des packages installés:
--------------------------------------------------
✅ numpy                : 1.26.4
✅ scipy                : 1.16.2
✅ numba                : 0.58.1
✅ openai-whisper       : 20231117
✅ faster-whisper       : 1.2.0
✅ librosa              : 0.10.1
✅ soundfile            : 0.12.1
✅ noisereduce          : N/A
✅ pydub                : N/A
✅ python-docx          : 1.2.0
✅ python-pptx          : 1.0.2
✅ openai               : 1.91.0
✅ langchain            : 0.3.26
❌ langchain-community  : Non installé
✅ faiss-cpu            : 1.12.0
✅ assemblyai           : 0.44.3
✅ tiktoken             : 0.9.0
⚠️ Certains packages nécessitent une attention. Consultez les messages ci-dessus.


# **Imports et configuration GPU**

In [5]:
# Imports standards
import os, sys, json, math, re, shutil, subprocess
import warnings
warnings.filterwarnings('ignore')

from datetime import datetime, timezone
import time
try:
    from zoneinfo import ZoneInfo
except Exception:
    ZoneInfo = None

from pathlib import Path
from typing import Dict, List, Any, Optional, Tuple
from dataclasses import dataclass, field
import gc  # Garbage collector

import numpy as np
import pandas as pd

# Imports audio et débruitage
import librosa
import soundfile as sf
import noisereduce as nr
from scipy.signal import butter, filtfilt, medfilt
from pydub import AudioSegment

# Imports pour la transcription
import whisper
from faster_whisper import WhisperModel

# Imports pour les documents
from docx import Document
from pptx import Presentation

# Imports pour le NLP et LLM
import openai
try:
    from langchain.text_splitter import RecursiveCharacterTextSplitter
    from langchain_community.vectorstores import FAISS
    from langchain_community.embeddings import OpenAIEmbeddings
    langchain_available = True
except ImportError:
    print("⚠️ LangChain non disponible")
    langchain_available = False

import torch
print(f"🔧 PyTorch: {torch.__version__}")
print(f"🎮 CUDA disponible: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"   GPU: {torch.cuda.get_device_name(0)}")
    print(f"   Mémoire: {torch.cuda.get_device_properties(0).total_memory / 1e9:.2f} GB")

⚠️ LangChain non disponible
🔧 PyTorch: 2.6.0+cu124
🎮 CUDA disponible: True
   GPU: Tesla T4
   Mémoire: 15.83 GB


# **Configuration des clés API**

In [6]:
from kaggle_secrets import UserSecretsClient
user_secrets = UserSecretsClient()
OPENAI_API_KEY = user_secrets.get_secret("OPENAI_API_KEY")
ASSEMBLYAI_API_KEY = user_secrets.get_secret("ASSEMBLYAI_API_KEY")
HUGGINGFACE_TOKEN = user_secrets.get_secret("HUGGINGFACE_TOKEN")

# **Configuration des chemins**

In [7]:
UPLOAD_PATH = "/kaggle/input/meeting-audio/" # Chemin des fichiers uploadés 
OUTPUT_PATH = "/kaggle/working" # Chemin de sortie

# **Utilitaires de commande système**

In [9]:
def ensure_dir(p): Path(p).mkdir(parents=True, exist_ok=True) #Vérification création de dossier
def run(cmd): # Lancement commande
    p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
    out, err = p.communicate(); return p.returncode, out.decode(), err.decode()

# **Configuration du pipeline**

In [10]:
 @dataclass 
class Config: 
    """Configuration centralisée pour Kaggle""" 
    
    timezone: str = "Indian/Antananarivo"
    # Clés API 
    openai_key: str = OPENAI_API_KEY 
    assemblyai_key: str = ASSEMBLYAI_API_KEY
    
    # Whisper
    whisper_model: str = "large-v3" # 'tiny', 'base', 'small', 'medium', 'large'
    device: str = "cuda" if torch.cuda.is_available() else "cpu"
    compute_type: str = "float16" if torch.cuda.is_available() else "int8"
    # Audio
    sample_rate: int = 16000
    # Decoding / anti-hallucination
    beam_size: int = 5
    best_of: int = 2
    patience: float = 1.0
    temperature: float = 0.0
    compression_ratio_threshold: float = 2.8
    log_prob_threshold: float = -0.50
    no_speech_threshold: float = 0.80
    max_initial_timestamp: float = 1.0
    suppress_blank: bool = True
    suppress_tokens: list[int] = field(default_factory=lambda: [-1])
    # VAD
    use_vad: bool = True
    vad_threshold: float = 0.45
    vad_min_speech_duration_ms: int = 500
    vad_max_speech_duration_s: float = 60.0
    vad_min_silence_duration_ms: int = 1000
    vad_speech_pad_ms: int = 400
    # Chunks longs pour cohérence (3–5 min)
    chunk_length_s: int = 300
    chunk_overlap_s: int = 30
    # Post-traitement
    max_repetitions: int = 3
    # Prompt spécialisé
    initial_prompt: str = (
        "Conseil d'administration Madagascar. Vocabulaire financier: Ariary, millions, "
        "budget, rapport financier, résolution, délibération. "
        "Termes spécifiques: Fihariana, SON'INVEST, UNIMA, AQUALMA. "
        "Intervenants: Président, Directeur Général, Commissaire aux Comptes. "
        "Format: discours naturel sans répétitions."
    )
    # LLM (activé par défaut en production)
    enable_llm: bool = True
    openai_model: str = "gpt-4o-mini" # "gpt-3.5-turbo" : Plus économique que GPT-4
    max_correction_rate: float = 0.18
    chunk_size_chars: int = 1000
    chunk_overlap_chars: int = 200

config = Config() 
print(f"✅ FullRun — Whisper: {config.whisper_model} | device: {config.device} | compute: {config.compute_type}")

✅ Configuration chargée - Modèle Whisper: large-v3


***Comment régler les paramètres selon les cas***

Cas A — Audio propre (dictaphones, salle calme)
*  beam_size=3, best_of=1–2 (plus rapide)
* no_speech_threshold=0.6 (ok)
* temperature=0.0
* VAD : min_silence_duration_ms=1500

Cas B — Audio bruité (portes, brouhaha)
* beam_size=5, best_of=5 (qualité)
* baisser no_speech_threshold à 0.5 si coupures
* VAD : threshold=0.4–0.5, min_speech_duration_ms=200, min_silence_duration_ms=1800–2200
* Garde-fous : garder compression_ratio_threshold=2.4

Cas C — CPU-only (pas de GPU Kaggle)
* compute_type="int8", modèle tiny ou base
* beam_size=3, best_of=1
* Threads : cpu_threads=2, num_workers=1
* Attends un RTF ≈ 2–5 (selon longueur)

# **Préparation de l'audio**

**Extrait**

In [11]:
def slice_audio(input_path: str, output_path: str, start: float = 0.0, duration: Optional[int] = None) -> str:
    args = ["ffmpeg","-y","-hide_banner","-loglevel","error","-ss",str(start),"-i",input_path,"-ac","1","-ar",str(config.sample_rate)]
    if duration and duration > 0:
        args += ["-t",str(duration)]
    args += [output_path]
    ensure_dir(str(Path(output_path).parent))
    code, _, err = run(args)
    if code!=0:
        raise RuntimeError("FFmpeg slice failed: " + err)
    return output_path

# **Préprocessing et Débruitage Audio**
**Classe de débruitage audio avancé**


In [13]:
def prepare_audio_file(audio_path: str) -> Dict:
    """Prépare et valide le fichier audio pour la transcription"""
    import wave
    import contextlib
    
    file_info = {
        "path": audio_path,
        "exists": os.path.exists(audio_path),
        "size_mb": 0,
        "duration_seconds": 0,
        "format": audio_path.split('.')[-1],
        "sample_rate": 0,
        "channels": 0
    }
    
    if file_info["exists"]:
        file_info["size_mb"] = os.path.getsize(audio_path) / (1024 * 1024)
        
        try:
            # Charger avec librosa pour info
            y, sr = librosa.load(audio_path, sr=None, duration=10)
            file_info["sample_rate"] = sr
            
            # Durée totale
            duration = librosa.get_duration(path=audio_path)
            file_info["duration_seconds"] = duration
            
        except Exception as e:
            print(f"⚠️ Erreur lecture audio: {e}")
    
    return file_info

In [14]:
class AudioPreprocessor:
    def __init__(self, sample_rate: int):
        self.sr = sample_rate
    def ffmpeg_enhance(self, src: str, dst: str) -> str:
        chain = "highpass=f=100,lowpass=f=7500,adeclip,afftdn=nf=-25,compand=attacks=0.005:decays=0.05:points=-80/-90|-20/-20|0/-10:gain=5"
        code,_,err = run(["ffmpeg","-y","-hide_banner","-loglevel","error","-i",src,"-ac","1","-ar",str(self.sr),"-af",chain,dst])
        if code!=0: raise RuntimeError("FFmpeg failed: "+err)
        return dst
    def reduce_noise(self, src: str, dst: str) -> str:
        y, sr = librosa.load(src, sr=self.sr)
        y = nr.reduce_noise(y=y, sr=sr)
        sf.write(dst, y, sr); return dst
    def process(self, src: str, outdir: str) -> str:
        ensure_dir(outdir)
        ff = str(Path(outdir)/f"{Path(src).stem}_ffmpeg.wav")
        dn = str(Path(outdir)/f"{Path(src).stem}_denoise.wav")
        self.ffmpeg_enhance(src, ff)
        return self.reduce_noise(ff, dn)

# **Transcription Audio**
**Service de transcription avec audio nettoyé**

In [15]:
class Transcriber:
    def __init__(self, cfg: FullRunConfig):
        self.cfg = cfg; self.model=None
    def load(self):
        if self.model is None:
            self.model = WhisperModel(self.cfg.whisper_model, device=self.cfg.device, compute_type=self.cfg.compute_type)
        return self.model

    def _decode(self, wav_path: str):
        m = self.load()
        kwargs = dict(
            language="fr",
            beam_size=self.cfg.beam_size,
            best_of=self.cfg.best_of,
            patience=self.cfg.patience,
            temperature=self.cfg.temperature,
            compression_ratio_threshold=self.cfg.compression_ratio_threshold,
            log_prob_threshold=self.cfg.log_prob_threshold,
            no_speech_threshold=self.cfg.no_speech_threshold,
            condition_on_previous_text=False,
            initial_prompt=self.cfg.initial_prompt,
            word_timestamps=True,
            suppress_tokens=self.cfg.suppress_tokens,
            suppress_blank=self.cfg.suppress_blank,
            max_initial_timestamp=self.cfg.max_initial_timestamp,
            vad_filter=self.cfg.use_vad,
            vad_parameters={
                "threshold": self.cfg.vad_threshold,
                "min_speech_duration_ms": self.cfg.vad_min_speech_duration_ms,
                "max_speech_duration_s": self.cfg.vad_max_speech_duration_s,
                "min_silence_duration_ms": self.cfg.vad_min_silence_duration_ms,
                "speech_pad_ms": self.cfg.vad_speech_pad_ms,
            } if self.cfg.use_vad else None
        )
        return m.transcribe(wav_path, **kwargs)

    def transcribe_long_audio(self, audio_path: str) -> Dict[str, Any]:
        # Découpage manuel en gros segments (3–5 min) pour robustesse mémoire et cohérence
        y, sr = librosa.load(audio_path, sr=self.cfg.sample_rate, mono=True)
        total = len(y) / sr
        L = self.cfg.chunk_length_s; O = self.cfg.chunk_overlap_s
        segs_all=[]; text_parts=[]; start=0.0; idx=0

        while start < total:
            end = min(start + L, total)
            clip_path = str(Path(FULL_DIR)/f"chunk_{idx:04d}.wav")
            slice_audio(audio_path, clip_path, start=start, duration=int(end-start))
            segs, info = self._decode(clip_path)
            for s in segs:
                segs_all.append({
                    "start": float(s.start + start),
                    "end": float(s.end + start),
                    "text": s.text.strip(),
                    "no_speech_prob": float(getattr(s, "no_speech_prob", 0.0))
                })
            text_parts.append(" ".join(s.text.strip() for s in segs if float(getattr(s,"no_speech_prob",0.0))<=0.9))
            if end >= total: break
            start = end - O; idx += 1

        transcription = " ".join(text_parts).strip()
        return {"status":"success","duration": float(total), "segments": segs_all, "transcription": transcription}


In [None]:
# Exemple d'utilisation
#result = transcription_service.transcribe_audio(audio_file)
#print(f"Transcription: {result['transcription'][:500]}...")

# **Diarization**

In [16]:
def diarize(transcript: Dict[str,Any], audio_path: str, hf_token: Optional[str]):
    if transcript.get("status")!="success" or not transcript.get("segments"):
        return transcript
    try:
        if not hf_token:
            print("ℹ️ Pas de HUGGINGFACE_TOKEN — diarisation ignorée.")
            return transcript
        import whisperx
        align_model, meta = whisperx.load_align_model(language_code='fr', device=config.device)
        aligned = whisperx.align(
            [{"text":s["text"],"start":s["start"],"end":s["end"]} for s in transcript["segments"]],
            align_model, meta, audio_path, device=config.device
        )
        diar = whisperx.DiarizationPipeline(use_auth_token=hf_token, device=config.device)
        dsegs = diar(audio_path)
        result = whisperx.assign_word_speakers(dsegs, aligned)
        transcript["diarized_segments"] = result.get("segments", [])
        return transcript
    except Exception as e:
        print("⚠️ Diarisation échouée:", e)
        return transcript


# **Post-traitement du texte**

In [17]:
def normalize_compound_numbers(text: str) -> str:
    pattern = re.compile(r"(\\d+[\\s ]*)milliards?\\s+(\\d+[\\s ]*)millions?", re.IGNORECASE)
    def repl(m):
        b = int(m.group(1).replace(" ",""))
        M = int(m.group(2).replace(" ",""))
        total = b*1_000_000_000 + M*1_000_000
        return f"{total:,}".replace(",", " ")
    return pattern.sub(repl, text)

def normalize_units(text: str) -> str:
    pattern = re.compile(r"(\\d+[\\d\\s,.]*)\\s*(millions?|milliards?)", re.IGNORECASE)
    def repl(m):
        raw = m.group(1); unit = m.group(2).lower()
        try:
            val = float(raw.replace(" ","").replace(",", "."))
        except ValueError:
            return m.group(0)
        factor = 1_000_000 if "million" in unit else 1_000_000_000
        total = val*factor
        return f"{total:,.0f}".replace(",", " ")
    return pattern.sub(repl, text)

def deduplicate(text: str) -> str:
    sents = [s.strip() for s in re.split(r"(?<=[.!?])\\s+", text) if s.strip()]
    seen=set(); out=[]
    for s in sents:
        k=s.lower()
        if k in seen: continue
        seen.add(k); out.append(s)
    return ". ".join(out)

def postprocess_text(text: str) -> str:
    text = normalize_compound_numbers(text)
    text = normalize_units(text)
    text = deduplicate(text)
    return text


# **Nettoyage LLM**

In [18]:
class LLMPostEditor:
    def __init__(self, api_key: Optional[str], model: str, max_rate: float, size: int, overlap: int):
        self.api_key = api_key; self.model = model
        self.max_rate = max_rate; self.size=size; self.overlap=overlap
        self.client = None
        if api_key:
            from openai import OpenAI
            self.client = OpenAI(api_key=api_key)

    def chunks(self, text: str) -> List[str]:
        if not text: return []
        step = max(1, self.size - self.overlap)
        return [text[i:i+self.size] for i in range(0, len(text), step)]

    def clean(self, text: str) -> Tuple[str, float]:
        if not self.client or not text: return text, 0.0
        cleaned=[]; delta=0
        for i, chunk in enumerate(self.chunks(text), 1):
            messages=[
                {"role":"system","content":(
                    "Tu corriges une transcription FR: orthographe, grammaire, ponctuation, noms propres. "
                    "NE JAMAIS ajouter d'information non présente dans la transcription"
                    "Ne change pas le sens. Applique un style réunion formel."
                )},
                {"role":"user","content":chunk}
            ]
            try:
                resp = self.client.chat.completions.create(model=self.model, messages=messages, temperature=0.2, max_tokens=1400)
                ct = resp.choices[0].message.content.strip()
            except Exception as e:
                print(f"⚠️ LLM chunk {i} échoué:", e); ct = chunk
            cleaned.append(ct); delta += abs(len(ct)-len(chunk))
        merged = " ".join(cleaned)
        rate = delta/max(len(text),1)
        if rate > self.max_rate:
            print(f"⚠️ LLM correction rate {rate:.1%} > seuil {self.max_rate:.0%}. On garde le texte post-traité.")
            return text, rate
        return merged, rate


# **Fallback AssemblyAI (si échec Whisper)**

In [None]:
class AssemblyAIFallback:
    """Service de fallback avec AssemblyAI"""
    
    def __init__(self, api_key: str):
        self.api_key = api_key
        
    def transcribe_with_assemblyai(self, audio_path: str) -> Dict:
        """
        Transcription de secours via AssemblyAI
        
        Args:
            audio_path: Chemin du fichier audio
            
        Returns:
            Dict avec la transcription
        """
        if not self.api_key:
            return {
                "status": "error",
                "error": "Clé API AssemblyAI non configurée"
            }
        
        try:
            import assemblyai as aai
            
            print("🔄 Utilisation du fallback AssemblyAI...")
            
            aai.settings.api_key = self.api_key
            transcriber = aai.Transcriber()
            
            # Upload et transcription
            config_lang = aai.TranscriptionConfig(
                language_code="fr",
                punctuate=True,
                format_text=True,
                disfluencies=True,
                speaker_labels=True
            )
            transcript = transcriber.transcribe(audio_path, config=config_lang)
            
            if transcript.status == aai.TranscriptStatus.error:
                raise Exception(f"Erreur AssemblyAI: {transcript.error}")
            
            # Attente de la transcription
            while transcript.status not in [aai.TranscriptStatus.completed, aai.TranscriptStatus.error]:
                time.sleep(5)
                transcript = transcriber.get_transcript(transcript.id)
            
            return {
                "status": "success",
                "method": "assemblyai",
                "transcription": transcript.text,
                "confidence": transcript.confidence if hasattr(transcript, 'confidence') else 0.85,
                "words": transcript.words if hasattr(transcript, 'words') else []
            }
            
        except Exception as e:
            print(f"❌ Erreur AssemblyAI: {str(e)}")
            return {
                "status": "error",
                "error": str(e),
                "method": "assemblyai"
            }

# Service de fallback
fallback_service = AssemblyAIFallback(config.assemblyai_key)

1. Par défaut, la langue est auto. Pour ton cas, force français :
        config = aai.TranscriptionConfig(language_code="fr")
2. Diarisation (orateurs)
        config = aai.TranscriptionConfig(speaker_labels=True)

Exemple :
    config = aai.TranscriptionConfig(language_code="fr", speaker_labels=True)
    transcript = transcriber.transcribe(audio_path, config=config)

Appel :
    Si TranscriptionService.transcribe_audio renvoie status="error" ou un real_time_factor >> 5 (trop lent) ou trop de segments sous ton confidence_threshold, alors :
        > result = fallback_service.transcribe_with_assemblyai(audio_path)

**Pipeline de transcription avec gestion automatique du fallback**

In [None]:
def transcribe_audio_pipeline(audio_path: str, cfg: Config, save_json: bool=True) -> Dict[str, Any]:
    print("="*80); print("🎯 PHASE 2 — Pipeline complet (production)"); print("="*80)
    if not Path(audio_path).exists():
        raise FileNotFoundError(audio_path)

    # Prétraitement
    print("[1/5] Prétraitement audio (FFmpeg + NR)")
    clean_path = AudioPreprocessor(cfg.sample_rate).process(audio_path, FULL_DIR)

    # Transcription (long audio, chunks 3–5 min)
    print("[2/5] Transcription faster-whisper (chunks longs)")
    transcriber = Transcriber(cfg)
    asr = transcriber.transcribe_long_audio(clean_path)
    if asr.get("status")!="success":
        raise RuntimeError("Transcription échouée")

    # Diarisation
    print("[3/5] Diarisation (pyannote -> whisperx)")
    asr = diarize(asr, clean_path, HUGGINGFACE_TOKEN)

    # Post-traitement règles
    print("[4/5] Post-traitement (dédup + chiffres)")
    post_text = postprocess_text(asr.get("transcription",""))
    asr["transcription_postprocessed"] = post_text

    # LLM (si activé)
    print("[5/5] Nettoyage LLM (chunks 1000 caractères)")
    final_text = post_text; rate = 0.0
    if cfg.enable_llm and OPENAI_API_KEY:
        editor = LLMPostEditor(OPENAI_API_KEY, cfg.openai_model, cfg.max_correction_rate, cfg.chunk_size_chars, cfg.chunk_overlap_chars)
        final_text, rate = editor.clean(post_text)
    else:
        print("ℹ️ LLM non activé ou clé absente — on garde le post-traitement règles.")

    asr["transcription_llm"] = final_text
    asr["llm_correction_rate"] = rate

    # Sauvegarde JSONs
    if save_json:
        base = f"full_{int(time.time())}"
        out_path = str(Path(OUTPUT_PATH)/f"{base}.json")
        with open(out_path, "w", encoding="utf-8") as f:
            json.dump(asr, f, ensure_ascii=False, indent=2)
        print("💾 Sauvegardé:", out_path)
    return asr


In [None]:
# Test avec votre fichier audio
#audio_file = f"{UPLOAD_PATH}atelier.mp3"
#audio_file = f"{UPLOAD_PATH}test_1h.wav"
audio_file = f"{UPLOAD_PATH}test_30mn.mp3"
#audio_info = prepare_audio_file(audio_file)

In [None]:
result = transcribe_audio_pipeline(audio_file, config, save_json=True)
print(result.keys())