# **Installation des packages n√©cessaires**

In [None]:
%%capture
# Installation silencieuse des d√©pendances avec gestion des conflits

# 1. Mise √† jour pip pour √©viter les probl√®mes
!pip install --upgrade pip -q

# 2. Installation FFmpeg (syst√®me)
!apt-get update -qq
!apt-get install -qq ffmpeg sox

# 3. Installation des packages de transcription
!pip install -q openai-whisper==20250625
!pip install -q faster-whisper==1.0.3

# 4. Packages de d√©bruitage audio
!pip install -q librosa==0.10.1
!pip install -q soundfile==0.12.1
!pip install -q noisereduce==3.0.0
!pip install -q scipy==1.11.4
!pip install -q pydub==0.25.1

!pip -q install regex unidecode

# 5. Diarization
!pip -q install "pyannote.audio>=3.1" torch --index-url https://download.pytorch.org/whl/cu118
!pip -q install whisperx

# 5. Packages documents
!pip install -q python-docx==1.2.0
!pip install -q python-pptx==1.0.2

# 6. Packages LLM et NLP
!pip install -q openai==1.91.0
!pip install -q assemblyai==0.44.3
!pip install -q tiktoken==0.9.0

# 7. LangChain
!pip install -q langchain==0.3.27 langchain-community==0.3.29 langchain-core -q 2>/dev/null || true

# 8. Packages utilitaires
!pip install -q numpy
!pip install -q pandas matplotlib seaborn

# 9. Installation FAISS pour le RAG
!pip install -q faiss-cpu==1.12.0

print("‚úÖ Installation termin√©e!")

In [None]:
# V√©rification que tout est install√© correctement
import sys
import importlib

packages_to_check = [
    ('whisper', 'openai-whisper'),
    ('faster_whisper', 'faster-whisper'),
    ('librosa', 'librosa'),
    ('soundfile', 'soundfile'),
    ('noisereduce', 'noisereduce'),
    ('scipy', 'scipy'),
    ('pydub', 'pydub'),
    ('docx', 'python-docx'),
    ('pptx', 'python-pptx'),
    ('openai', 'openai'),
    ('langchain', 'langchain'),
    ('langchain_community', 'langchain-community'),
    ('faiss', 'faiss-cpu'),
    ('assemblyai', 'assemblyai'),
    ('tiktoken', 'tiktoken')
]

print("üîç V√©rification des packages install√©s:")
print("-" * 50)

all_ok = True
for import_name, package_name in packages_to_check:
    try:
        module = importlib.import_module(import_name)
        version = getattr(module, '__version__', 'N/A')
        print(f"‚úÖ {package_name:20} : {version}")
    except ImportError:
        print(f"‚ùå {package_name:20} : Non install√©")
        all_ok = False

if all_ok:
    print("\n‚ú® Tous les packages sont install√©s correctement!")
else:
    print("\n‚ö†Ô∏è Certains packages manquent. Relancez la cellule 1.")

# **Imports et configuration GPU**

In [None]:
# Imports standards
import os, sys, json, math, re, shutil, subprocess
import warnings
warnings.filterwarnings('ignore')

from datetime import datetime, timezone
import time
try:
    from zoneinfo import ZoneInfo
except Exception:
    ZoneInfo = None

from pathlib import Path
from typing import Dict, List, Any, Optional, Tuple
from dataclasses import dataclass, field
import gc  # Garbage collector

import numpy as np
import pandas as pd

# Imports audio et d√©bruitage
import librosa
import soundfile as sf
import noisereduce as nr
from scipy.signal import butter, filtfilt, medfilt
from pydub import AudioSegment

# Imports pour la transcription
import whisper
from faster_whisper import WhisperModel

# Imports pour les documents
from docx import Document
from pptx import Presentation

# Imports pour le NLP et LLM
import openai
try:
    from langchain.text_splitter import RecursiveCharacterTextSplitter
    from langchain_community.vectorstores import FAISS
    from langchain_community.embeddings import OpenAIEmbeddings
    langchain_available = True
except ImportError:
    print("‚ö†Ô∏è LangChain non disponible")
    langchain_available = False

import torch
print(f"üîß PyTorch: {torch.__version__}")
print(f"üéÆ CUDA disponible: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"   GPU: {torch.cuda.get_device_name(0)}")
    print(f"   M√©moire: {torch.cuda.get_device_properties(0).total_memory / 1e9:.2f} GB")

# **Configuration des cl√©s API**

In [None]:
from kaggle_secrets import UserSecretsClient
user_secrets = UserSecretsClient()
OPENAI_API_KEY = user_secrets.get_secret("OPENAI_API_KEY")
ASSEMBLYAI_API_KEY = user_secrets.get_secret("ASSEMBLYAI_API_KEY")
HUGGINGFACE_TOKEN = user_secrets.get_secret("HUGGINGFACE_TOKEN")

# **Configuration des chemins**

In [None]:
UPLOAD_PATH = "/kaggle/input/meeting-audio/" # Chemin des fichiers upload√©s 
OUTPUT_PATH = "/kaggle/working" # Chemin de sortie


Phase 1 : validation des configurations

In [None]:
PHASE1_DIR = f'{OUTPUT_PATH}/phase1_tests'
os.makedirs(PHASE1_DIR, exist_ok=True)

# **Utilitaires de commande syst√®me**

In [None]:
def ensure_dir(p): Path(p).mkdir(parents=True, exist_ok=True) #V√©rification cr√©ation de dossier
def run(cmd): # Lancement commande
    p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
    out, err = p.communicate(); return p.returncode, out.decode(), err.decode()

# **Configuration du pipeline**

In [None]:
 @dataclass 
class Config: 
    """Configuration centralis√©e pour Kaggle""" 
    
    timezone: str = "Indian/Antananarivo"
    
    # Mod√®le Whisper 
    whisper_model: str = "large-v3" # 'tiny', 'base', 'small', 'medium', 'large'
    device: str = "cuda" if torch.cuda.is_available() else "cpu"
    compute_type: str = "float16" if torch.cuda.is_available() else "int8"
    
    openai_model: str = "gpt-3.5-turbo" # Plus √©conomique que GPT-4 
    
    # Cl√©s API 
    openai_key: str = OPENAI_API_KEY 
    assemblyai_key: str = ASSEMBLYAI_API_KEY

    # Param√®tres audio
    # denoise_method: str = "hybrid"  # ffmpeg, noisereduce, hybrid
    # denoise_aggressive: bool = True
    sample_rate: int = 16000

    # Param√®tres de traitement 
    
    ## Longueur maximale d‚Äôun ‚Äúmorceau de texte‚Äù (chunk) qu‚Äôon d√©coupe avant d‚Äôenvoyer au LLM.
    chunk_sec: int = 15
    overlap_sec: int = 5
    ## R√®gle : chunk_size ‚âà 20-30% de la capacit√© max du mod√®le.
    #chunk_size: int = 1000 # nombre de caract√®re ‚âà 200‚Äì250 tokens (selon la langue et la densit√©) √† modifier selon la limitation du mod√®le choisie (ex. GPT-3.5 ‚âà 4k tokens, GPT-4 ‚âà 8k ou 32k).
    
    ## Nombre de caract√®res r√©p√©t√©s entre deux chunks.
    ## R√®gle : overlap = 15-25% du chunk_size.
    #chunk_overlap: int = 200 # nombre de caract√®re ‚âà 40 tokens. Suffisant pour garder la continuit√© (phrases coup√©es, dialogues, etc.).
    
    ## Proportion maximale de mots que le LLM a le droit de modifier dans une transcription brute.
    ## R√®gle : plus l‚Äôaudio est bruit√©, plus tu tol√®res une correction √©lev√©e. [propre (dictaphone, micro-cravate) ‚Üí mettre bas (0.10 √† 0.15). / bruyant (claquements de porte, plusieurs intervenants) ‚Üí monter √† 0.20 voire 0.25]
    #max_correction_rate: float = 0.15 # Max 15% du texte peut √™tre modifi√© (Pas de r√©√©criture compl√®te ‚Üí garde la fid√©lit√© au discours original.) Evite les hallucinations
    
    ## Score minimal de confiance (0‚Äì1) pour garder une phrase transcrite par Whisper/AssemblyAI.
    #confidence_threshold: float = 0.85 #Segments dont la transcription est jug√©e correcte √† au moins 85%.

    # Optimisation m√©moire pour Kaggle 
    num_workers: int = 2  # Ajust√© pour T4
    batch_size: int = 4 # Pour le traitement par lots [Si CPU seulement ‚Üí descendre (1‚Äì2).]
    use_gpu: bool = torch.cuda.is_available()

    # NOUVEAUX PARAM√àTRES ANTI-HALLUCINATIONS
    beam_size: int = 5  # Plus de beam = plus de pr√©cision
    #best_of: int = 2    # Prendre le meilleur de 3 tentatives
    #patience: float = 1.0
    temperature: float = 0.0  # Pas de randomness
    
    # # Seuils de confiance stricts
    # no_speech_threshold: float = 0.8 # Plus strict
    # logprob_threshold: float = -0.5  # Plus strict
    # compression_ratio_threshold: float = 2.8  # √âvite les r√©p√©titions

    # # NOUVEAU: Param√®tres anti-r√©p√©tition
    # max_initial_timestamp: float = 1.0
    # suppress_blank: bool = True
    # suppress_tokens: str = "-1"  # Supprime les tokens probl√©matiques
    
    # VAD (Voice Activity Detection) optimis√©
    # use_vad: bool = True
    vad_threshold: float = 0.5
    # vad_min_speech_duration_ms: int = 500  # Minimum 250ms de parole
    # vad_max_speech_duration_s: float = 60  # Max 30s par segment
    # vad_min_silence_duration_ms: int = 1000  # 2s de silence minimum
    # vad_speech_pad_ms: int = 400

    # Post
    max_repetitions: int = 3

    # Phase 1
    duration_limit: int = 600
    
    # Audio processing
    sample_rate: int = 16000
    # use_denoise: bool = "auto"  # auto, True, False
    # denoise_stationary: float = 0.97
    # denoise_prop_decrease: float = 1.0
    
    # # D√©tection r√©p√©titions
    # repetition_penalty: float = 1.2  # NOUVEAU
    # max_repetitions: int = 3  # NOUVEAU: max r√©p√©titions tol√©r√©es
    
    # # Prompt sp√©cialis√© CA - AM√âLIOR√â
    # # PROMPT AM√âLIOR√â avec contexte financier malgache
    # initial_prompt: str = (
    #     "Conseil d'administration Madagascar. Vocabulaire financier: Ariary, millions, "
    #     "budget, rapport financier, r√©solution, d√©lib√©ration. "
    #     "Termes sp√©cifiques: Fihariana, SON'INVEST, UNIMA, AQUALMA. "
    #     "Intervenants: Pr√©sident, Directeur G√©n√©ral, Commissaire aux Comptes. "
    #     "Format: discours naturel sans r√©p√©titions."
    # )

config = Config() 
print(f"‚úÖ Configuration charg√©e - Mod√®le Whisper: {config.whisper_model}")

# **Pr√©paration de l'audio**

**Extrait**

In [None]:
def slice_audio(input_path: str, output_path: str, duration: int) -> str:
    if duration <= 0:
        return input_path
    ensure_dir(Path(output_path).parent)
    code, out, err = run(['ffmpeg','-y','-i',input_path,'-t',str(duration),'-ac','1','-ar',str(config.sample_rate),output_path])
    if code!=0:
        print('‚ö†Ô∏è FFmpeg slice error:', err); return input_path
    return output_path

**Pr√©traitraite l‚Äôaudio**

Nettoyage via FFmpeg (filtres audio rapides),

R√©duction de bruit plus fine avec Python (librosa + noisereduce).

In [None]:
class AudioPreprocessor:
    def __init__(self, sample_rate: int): self.sr = sample_rate
    def ffmpeg_enhance(self, src: str, dst: str) -> str:
        chain = 'highpass=f=100,lowpass=f=7500,adeclip,afftdn=nf=-25,compand=attacks=0.005:decays=0.05:points=-80/-90|-20/-20|0/-10:gain=5'
        code,_,err = run(['ffmpeg','-y','-hide_banner','-loglevel','error','-i',src,'-ac','1','-ar',str(self.sr),'-af',chain,dst])
        if code!=0: raise RuntimeError('FFmpeg failed: '+err)
        return dst
    def reduce_noise(self, src: str, dst: str) -> str:
        y, sr = librosa.load(src, sr=self.sr)
        y = nr.reduce_noise(y=y, sr=sr)
        sf.write(dst, y, sr); return dst
    def process(self, src: str, outdir: str) -> str:
        ensure_dir(outdir)
        ff = str(Path(outdir)/f'{Path(src).stem}_ffmpeg.wav')
        dn = str(Path(outdir)/f'{Path(src).stem}_denoise.wav')
        self.ffmpeg_enhance(src, ff)
        return self.reduce_noise(ff, dn)

In [None]:
def prepare_audio_file(audio_path: str) -> Dict:
    """Pr√©pare et valide le fichier audio pour la transcription"""
    import wave
    import contextlib
    
    file_info = {
        "path": audio_path,
        "exists": os.path.exists(audio_path),
        "size_mb": 0,
        "duration_seconds": 0,
        "format": audio_path.split('.')[-1],
        "sample_rate": 0,
        "channels": 0
    }
    
    if file_info["exists"]:
        file_info["size_mb"] = os.path.getsize(audio_path) / (1024 * 1024)
        
        try:
            # Charger avec librosa pour info
            y, sr = librosa.load(audio_path, sr=None, duration=10)
            file_info["sample_rate"] = sr
            
            # Dur√©e totale
            duration = librosa.get_duration(path=audio_path)
            file_info["duration_seconds"] = duration
            
        except Exception as e:
            print(f"‚ö†Ô∏è Erreur lecture audio: {e}")
    
    return file_info

# **Pr√©processing et D√©bruitage Audio**
**Classe de d√©bruitage audio avanc√©**


In [None]:
class AudioPreprocessor:
    def __init__(self, sample_rate: int): self.sr = sample_rate
    def ffmpeg_enhance(self, src: str, dst: str) -> str:
        chain = 'highpass=f=100,lowpass=f=7500,adeclip,afftdn=nf=-25,compand=attacks=0.005:decays=0.05:points=-80/-90|-20/-20|0/-10:gain=5'
        code,_,err = run(['ffmpeg','-y','-hide_banner','-loglevel','error','-i',src,'-ac','1','-ar',str(self.sr),'-af',chain,dst])
        if code!=0: raise RuntimeError('FFmpeg failed: '+err)
        return dst
    def reduce_noise(self, src: str, dst: str) -> str:
        y, sr = librosa.load(src, sr=self.sr)
        y = nr.reduce_noise(y=y, sr=sr)
        sf.write(dst, y, sr); return dst
    def process(self, src: str, outdir: str) -> str:
        ensure_dir(outdir)
        ff = str(Path(outdir)/f'{Path(src).stem}_ffmpeg.wav')
        dn = str(Path(outdir)/f'{Path(src).stem}_denoise.wav')
        self.ffmpeg_enhance(src, ff)
        return self.reduce_noise(ff, dn)

# **Transcription Audio**
**Service de transcription avec audio nettoy√©**

In [None]:
class Transcriber:
    def __init__(self, cfg: Config):
        self.cfg = cfg; self.model=None
    def load(self):
        if self.model is None:
            self.model = WhisperModel(self.cfg.whisper_model, device=self.cfg.device, compute_type=self.cfg.compute_type)
        return self.model
    def transcribe(self, wav_path: str) -> Dict[str, Any]:
        m = self.load()
        segs, info = m.transcribe(wav_path, language='fr', beam_size=self.cfg.beam_size,
                                  temperature=self.cfg.temperature, vad_filter=True,
                                  vad_parameters={'threshold': self.cfg.vad_threshold})
        out=[]
        for s in segs:
            if float(getattr(s,'no_speech_prob',0.0))>0.9: continue
            out.append({'start':float(s.start),'end':float(s.end),'text':s.text.strip()})
        text=' '.join(x['text'] for x in out)
        return {'status':'success','segments':out,'transcription':text}

***Comment r√©gler les param√®tres selon les cas***

Cas A ‚Äî Audio propre (dictaphones, salle calme)
*  beam_size=3, best_of=1‚Äì2 (plus rapide)
* no_speech_threshold=0.6 (ok)
* temperature=0.0
* VAD : min_silence_duration_ms=1500

Cas B ‚Äî Audio bruit√© (portes, brouhaha)
* beam_size=5, best_of=5 (qualit√©)
* baisser no_speech_threshold √† 0.5 si coupures
* VAD : threshold=0.4‚Äì0.5, min_speech_duration_ms=200, min_silence_duration_ms=1800‚Äì2200
* Garde-fous : garder compression_ratio_threshold=2.4

Cas C ‚Äî CPU-only (pas de GPU Kaggle)
* compute_type="int8", mod√®le tiny ou base
* beam_size=3, best_of=1
* Threads : cpu_threads=2, num_workers=1
* Attends un RTF ‚âà 2‚Äì5 (selon longueur)

In [None]:
# Exemple d'utilisation
#result = transcription_service.transcribe_audio(audio_file)
#print(f"Transcription: {result['transcription'][:500]}...")

# **Diarization**

In [None]:
def diarize_stub(transcript: Dict[str,Any], wav_path: str, hf_token: Optional[str]):
    try:
        if not hf_token:
            print('‚ÑπÔ∏è Pas de HUGGINGFACE_TOKEN ‚Üí diarisation ignor√©e (Phase 1).'); return transcript
        import whisperx
        align_model, meta = whisperx.load_align_model(language_code='fr', device=config.device)
        aligned = whisperx.align([{k:v for k,v in s.items() if k in ('text','start','end')} for s in transcript['segments']],
                                 align_model, meta, wav_path, device=config.device)
        diar = whisperx.DiarizationPipeline(use_auth_token=hf_token, device=config.device)
        diar_segs = diar(wav_path)
        result = whisperx.assign_word_speakers(diar_segs, aligned)
        transcript['diarized_segments'] = result.get('segments',[])
        return transcript
    except Exception as e:
        print('‚ö†Ô∏è Diarisation √©chou√©e:', e); return transcript

# **Post-traitement du texte**

In [None]:
def postprocess_text(text: str) -> str:
    sents = [s.strip() for s in re.split(r'(?<=[.!?])\s+', text) if s.strip()]
    seen=set(); out=[]
    for s in sents:
        k=s.lower()
        if k in seen: continue
        seen.add(k); out.append(s)
    return '. '.join(out)

# **Test Phase 1**

In [None]:
def phase1_run(audio_file: str):
    print('üöÄ Phase 1 ‚Äî extrait 5‚Äì10 min')
    clip = str(Path(PHASE1_DIR)/f'clip_{Path(audio_file).stem}.wav')
    clip = slice_audio(audio_file, clip, config.duration_limit)
    pre = AudioPreprocessor(config.sample_rate).process(clip, PHASE1_DIR)
    t = Transcriber(config).transcribe(pre)
    if t.get('status')!='success': return t
    t = diarize_stub(t, pre, HUGGINGFACE_TOKEN)
    clean = postprocess_text(t.get('transcription',''))
    t['transcription_postprocessed']=clean
    out = Path(OUTPUT_PATH)/f"phase1_result_{int(time.time())}.json"
    with open(out,'w',encoding='utf-8') as f: json.dump(t,f,ensure_ascii=False,indent=2)
    print('üíæ Sauvegard√©:', out)
    return t

In [None]:
PHASE1_AUDIO_FILE = f"{UPLOAD_PATH}test_30mn.mp3"
# R√©sultat (d√©commente pour ex√©cuter sur Kaggle)
res = phase1_run(PHASE1_AUDIO_FILE)
res.keys()

# **Fallback AssemblyAI (si √©chec Whisper)**

In [None]:
class AssemblyAIFallback:
    """Service de fallback avec AssemblyAI"""
    
    def __init__(self, api_key: str):
        self.api_key = api_key
        
    def transcribe_with_assemblyai(self, audio_path: str) -> Dict:
        """
        Transcription de secours via AssemblyAI
        
        Args:
            audio_path: Chemin du fichier audio
            
        Returns:
            Dict avec la transcription
        """
        if not self.api_key:
            return {
                "status": "error",
                "error": "Cl√© API AssemblyAI non configur√©e"
            }
        
        try:
            import assemblyai as aai
            
            print("üîÑ Utilisation du fallback AssemblyAI...")
            
            aai.settings.api_key = self.api_key
            transcriber = aai.Transcriber()
            
            # Upload et transcription
            config_lang = aai.TranscriptionConfig(
                language_code="fr",
                punctuate=True,
                format_text=True,
                disfluencies=True,
                speaker_labels=True
            )
            transcript = transcriber.transcribe(audio_path, config=config_lang)
            
            if transcript.status == aai.TranscriptStatus.error:
                raise Exception(f"Erreur AssemblyAI: {transcript.error}")
            
            # Attente de la transcription
            while transcript.status not in [aai.TranscriptStatus.completed, aai.TranscriptStatus.error]:
                time.sleep(5)
                transcript = transcriber.get_transcript(transcript.id)
            
            return {
                "status": "success",
                "method": "assemblyai",
                "transcription": transcript.text,
                "confidence": transcript.confidence if hasattr(transcript, 'confidence') else 0.85,
                "words": transcript.words if hasattr(transcript, 'words') else []
            }
            
        except Exception as e:
            print(f"‚ùå Erreur AssemblyAI: {str(e)}")
            return {
                "status": "error",
                "error": str(e),
                "method": "assemblyai"
            }

# Service de fallback
fallback_service = AssemblyAIFallback(config.assemblyai_key)

1. Par d√©faut, la langue est auto. Pour ton cas, force fran√ßais :
        config = aai.TranscriptionConfig(language_code="fr")
2. Diarisation (orateurs)
        config = aai.TranscriptionConfig(speaker_labels=True)

Exemple :
    config = aai.TranscriptionConfig(language_code="fr", speaker_labels=True)
    transcript = transcriber.transcribe(audio_path, config=config)

Appel :
    Si TranscriptionService.transcribe_audio renvoie status="error" ou un real_time_factor >> 5 (trop lent) ou trop de segments sous ton confidence_threshold, alors :
        > result = fallback_service.transcribe_with_assemblyai(audio_path)

**Pipeline de transcription avec gestion automatique du fallback**

In [None]:
def transcribe_audio_pipeline(
    audio_path: str, 
    config: Config,
    force_denoise: Optional[bool] = None,
    analyze_quality: bool = True
) -> Dict:
    """
    Pipeline complet de transcription avec analyse de qualit√©
    
    Args:
        audio_path: Chemin du fichier audio
        config: Configuration
        force_denoise: Forcer le d√©bruitage (None=auto)
        analyze_quality: Analyser la qualit√© apr√®s transcription
    """
    
    print("=" * 70)
    print("üéØ PIPELINE DE TRANSCRIPTION INTELLIGENT V2")
    print("=" * 70)
    
    # Pr√©parer le fichier
    file_info = prepare_audio_file(audio_path)
    print(f"üìÅ Fichier: {os.path.basename(audio_path)}")
    print(f"   Format: {file_info['format']}")
    print(f"   Dur√©e: {format_timestamp(file_info['duration_seconds'])}")
    print(f"   Taille: {file_info['size_mb']:.1f} MB")
    
    # Service de transcription
    transcription_service = TranscriptionService(config)
    
    # D√©terminer si d√©bruitage n√©cessaire
    if force_denoise is None:
        force_denoise = "auto"
    
    # Transcription
    result = transcription_service.transcribe_with_preprocessing(
        audio_path,
        preprocess=force_denoise,
        language="fr"
    )
    
    # Analyse de qualit√©
    if analyze_quality and result["status"] == "success":
        print("\nüìä Analyse de la qualit√©...")
        analyzer = QualityAnalyzer(config)
        quality = analyzer.analyze_transcription(result)
        result["quality_analysis"] = quality
        
        print(f"   Score de qualit√©: {quality['quality_score']}/100")
        
        if quality["quality_issues"]:
            print("   ‚ö†Ô∏è Probl√®mes d√©tect√©s:")
            for issue in quality["quality_issues"]:
                print(f"      - {issue}")
        
        if quality["repetitions"]:
            print("   üîÑ R√©p√©titions excessives:")
            for word, data in list(quality["repetitions"].items())[:3]:
                print(f"      - '{word}': {data['count']} fois ({data['ratio']:.1%})")
    
    # Sauvegarder le r√©sultat
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    output_file = f"{config.output_dir}/transcription_{timestamp}.json"
    
    # Convertir les types NumPy en types Python natifs pour JSON
    def convert_numpy_types(obj):
        """Convertit r√©cursivement les types NumPy en types Python natifs"""
        import numpy as np
        
        if isinstance(obj, np.integer):
            return int(obj)
        elif isinstance(obj, np.floating):
            return float(obj)
        elif isinstance(obj, np.ndarray):
            return obj.tolist()
        elif isinstance(obj, (np.bool_, bool)):
            return bool(obj)
        elif isinstance(obj, dict):
            return {key: convert_numpy_types(value) for key, value in obj.items()}
        elif isinstance(obj, list):
            return [convert_numpy_types(item) for item in obj]
        else:
            return obj
    
    # Nettoyer le r√©sultat avant sauvegarde
    result_clean = convert_numpy_types(result)
    
    with open(output_file, 'w', encoding='utf-8') as f:
        json.dump(result_clean, f, ensure_ascii=False, indent=2)
    
    print(f"\nüíæ R√©sultat sauvegard√©: {output_file}")
    
    # R√©sum√© final
    if result["status"] == "success":
        print("\n" + "=" * 70)
        print("‚úÖ TRANSCRIPTION R√âUSSIE")
        print("=" * 70)
        print(f"üìù M√©thode: Whisper {config.whisper_model}")
        print(f"üìä Confiance: {result.get('confidence', 0):.2%}")
        print(f"üìë Segments: {len(result.get('segments', []))}")
        print(f"üìÑ Longueur: {len(result.get('transcription', ''))} caract√®res")
        
        if analyze_quality:
            print(f"‚≠ê Qualit√©: {result['quality_analysis']['quality_score']}/100")
        
        # Aper√ßu
        text = result.get('transcription', '')
        if text:
            print(f"\nüìñ Aper√ßu (300 premiers caract√®res):")
            print(f"   {text[:300]}...")
    else:
        print(f"\n‚ùå √âchec transcription: {result.get('error')}")
    
    return result

In [None]:
# Test avec votre fichier audio
#audio_file = f"{UPLOAD_PATH}atelier.mp3"
#audio_file = f"{UPLOAD_PATH}test_1h.wav"
audio_file = f"{UPLOAD_PATH}test_30mn.mp3"
#audio_info = prepare_audio_file(audio_file)