<a href="https://colab.research.google.com/github/Maya-crypto/A-Breast-Cancer-Diagnosis-Support-System-in-Traditional-Chinese-Medicine/blob/test/Untitled9.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from zipfile import ZipFile
import os

# 📍 Chemin vers le fichier zip (tu peux l'importer via l'interface Colab)
zip_path = '/content/train.zip'  # à adapter
extract_to = '/content/train'  # dossier de destination

# 📁 Création du dossier de destination s'il n'existe pas
os.makedirs(extract_to, exist_ok=True)

# 🔓 Décompression
with ZipFile(zip_path, 'r') as zip_ref:
    zip_ref.extractall(extract_to)

print(f"✅ Fichier dézippé dans : {extract_to}")


✅ Fichier dézippé dans : /content/train


In [3]:
import os
import sys
import subprocess
import shutil
import cv2
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.patches as patches
from matplotlib.patches import Polygon, Rectangle
from pathlib import Path
import json
import yaml
import random
from datetime import datetime
import pandas as pd
from collections import defaultdict
import warnings
warnings.filterwarnings('ignore')

# ML Libraries
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import train_test_split, cross_val_score, GridSearchCV
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
from sklearn.pipeline import Pipeline
import joblib
import xgboost as xgb

# Configuration système optimisée
CONFIG = {
    'base_dir': '.',
    'output_dir': 'mtc_output',
    'model_dir': 'mtc_models',
    'results_dir': 'mtc_results',
    'classifier_dir': 'mtc_classifiers',
    'train_split': 0.8,
    'epochs': 200,
    'batch_size': 16,
    'imgsz': 640,
    'patience': 50,
    'conf_threshold': 0.15,
    'iou_threshold': 0.3,
    'augmentation_factor': 8,
    'target_accuracy': 0.85,
    'random_seed': 42,
    'lr0': 0.0008,
    'weight_decay': 0.001,
    'mosaic': 0.8,
    'mixup': 0.1,
    'copy_paste': 0.1,
}

# Classes YOLO (16 classes)
CLASS_NAMES = [
    'Ecchymoses', 'Eduit_jaune_epais', 'Eduit_jaune_mince', 'Fissure',
    'Langue_normal', 'Langue_pale', 'Langue_petite', 'Langue_rose',
    'Langue_rouge', 'Langue_rouge_foncee', 'enduit_blanc_epais',
    'enduit_blanc_mince', 'langue_ganfelee', 'red_dot',
    'salive_humide', 'salive_normale'
]

# Zones MTC
TONGUE_ZONES = {
    'kidney': {
        'name': 'Rein',
        'coords': [(0.2, 0), (0.8, 0), (0.8, 0.15), (0.2, 0.15)],
        'color': (75, 0, 130)
    },
    'liver_gall_right': {
        'name': 'Foie-VB Droit',
        'coords': [(0, 0.15), (0.3, 0.15), (0.3, 0.65), (0, 0.65)],
        'color': (34, 139, 34)
    },
    'liver_gall_left': {
        'name': 'Foie-VB Gauche',
        'coords': [(0.7, 0.15), (1, 0.15), (1, 0.65), (0.7, 0.65)],
        'color': (50, 205, 50)
    },
    'spleen_stomach': {
        'name': 'Rate-Estomac',
        'coords': [(0.3, 0.15), (0.7, 0.15), (0.7, 0.65), (0.3, 0.65)],
        'color': (255, 215, 0)
    },
    'heart_lung': {
        'name': 'Coeur-Poumon',
        'coords': [(0.2, 0.65), (0.8, 0.65), (0.8, 1), (0.2, 1)],
        'color': (220, 20, 60)
    }
}

class YOLOFeatureExtractor:
    """Extracteur de features basé sur les détections YOLO"""

    def __init__(self, model_path):
        try:
            from ultralytics import YOLO
            self.model = YOLO(model_path)
        except ImportError:
            print("Installation d'ultralytics...")
            subprocess.check_call([sys.executable, '-m', 'pip', 'install', 'ultralytics'])
            from ultralytics import YOLO
            self.model = YOLO(model_path)

    def extract_features(self, image_path):
        """Extrait features complètes d'une image"""
        try:
            results = self.model(image_path, conf=CONFIG['conf_threshold'],
                               iou=CONFIG['iou_threshold'], verbose=False)

            image = cv2.imread(str(image_path))
            h, w = image.shape[:2]

            # Initialiser features
            features = self._initialize_features()

            # Traiter détections
            detections = []
            for r in results:
                if r.boxes is not None:
                    for box in r.boxes:
                        bbox = box.xyxy[0].cpu().numpy()
                        conf = float(box.conf)
                        cls = int(box.cls)

                        if cls < len(CLASS_NAMES):
                            detection = {
                                'bbox': bbox,
                                'confidence': conf,
                                'class': cls,
                                'class_name': CLASS_NAMES[cls]
                            }
                            detections.append(detection)

            # Extraire features des détections
            features = self._extract_detection_features(detections, features, w, h)
            features = self._extract_spatial_features(detections, features, w, h)
            features = self._extract_statistical_features(detections, features)
            features = self._extract_zone_features(detections, features, w, h)

            return np.array(list(features.values()))

        except Exception as e:
            print(f"Erreur extraction features: {e}")
            return np.zeros(self._get_feature_count())

    def _initialize_features(self):
        """Initialise toutes les features à 0"""
        features = {}

        # Features par classe (confidence max et count)
        for class_name in CLASS_NAMES:
            features[f'{class_name}_max_conf'] = 0.0
            features[f'{class_name}_count'] = 0.0
            features[f'{class_name}_avg_conf'] = 0.0

        # Features spatiales
        for zone in TONGUE_ZONES.keys():
            features[f'{zone}_detection_count'] = 0.0
            features[f'{zone}_avg_confidence'] = 0.0

        # Features statistiques globales
        features['total_detections'] = 0.0
        features['avg_confidence_all'] = 0.0
        features['std_confidence'] = 0.0
        features['detection_density'] = 0.0

        # Features morphologiques
        features['tongue_area_ratio'] = 0.0
        features['detection_spread_x'] = 0.0
        features['detection_spread_y'] = 0.0

        # Features de co-occurrence
        features['pathological_combinations'] = 0.0
        features['healthy_indicators'] = 0.0

        return features

    def _extract_detection_features(self, detections, features, w, h):
        """Extrait features basées sur les détections"""
        class_detections = defaultdict(list)

        for det in detections:
            class_name = det['class_name']
            conf = det['confidence']
            class_detections[class_name].append(conf)

        # Features par classe
        for class_name in CLASS_NAMES:
            confs = class_detections[class_name]
            if confs:
                features[f'{class_name}_max_conf'] = max(confs)
                features[f'{class_name}_count'] = len(confs)
                features[f'{class_name}_avg_conf'] = np.mean(confs)
            else:
                features[f'{class_name}_max_conf'] = 0.0
                features[f'{class_name}_count'] = 0.0
                features[f'{class_name}_avg_conf'] = 0.0

        return features

    def _extract_spatial_features(self, detections, features, w, h):
        """Extrait features spatiales et de zones"""
        if not detections:
            return features

        # Calculer centroids normalisés
        centroids = []
        for det in detections:
            bbox = det['bbox']
            cx = ((bbox[0] + bbox[2]) / 2) / w
            cy = ((bbox[1] + bbox[3]) / 2) / h
            centroids.append((cx, cy))

        # Spread spatial
        if centroids:
            x_coords = [c[0] for c in centroids]
            y_coords = [c[1] for c in centroids]
            features['detection_spread_x'] = np.std(x_coords) if len(x_coords) > 1 else 0
            features['detection_spread_y'] = np.std(y_coords) if len(y_coords) > 1 else 0

        return features

    def _extract_statistical_features(self, detections, features):
        """Extrait features statistiques globales"""
        if not detections:
            return features

        confidences = [det['confidence'] for det in detections]

        features['total_detections'] = len(detections)
        features['avg_confidence_all'] = np.mean(confidences)
        features['std_confidence'] = np.std(confidences)
        features['detection_density'] = len(detections) / 100.0  # Normalisé

        return features

    def _extract_zone_features(self, detections, features, w, h):
        """Extrait features par zone MTC"""
        zone_detections = defaultdict(list)

        for det in detections:
            bbox = det['bbox']
            cx = ((bbox[0] + bbox[2]) / 2) / w
            cy = ((bbox[1] + bbox[3]) / 2) / h

            zone = self._find_zone(cx, cy)
            if zone:
                zone_detections[zone].append(det['confidence'])

        # Features par zone
        for zone in TONGUE_ZONES.keys():
            confs = zone_detections[zone]
            if confs:
                features[f'{zone}_detection_count'] = len(confs)
                features[f'{zone}_avg_confidence'] = np.mean(confs)
            else:
                features[f'{zone}_detection_count'] = 0.0
                features[f'{zone}_avg_confidence'] = 0.0

        return features

    def _find_zone(self, x, y):
        """Trouve zone pour coordonnées"""
        for zone_name, zone_info in TONGUE_ZONES.items():
            if self._point_in_polygon(x, y, zone_info['coords']):
                return zone_name
        return None

    def _point_in_polygon(self, x, y, coords):
        """Test point dans polygone"""
        n = len(coords)
        inside = False
        j = n - 1
        for i in range(n):
            xi, yi = coords[i]
            xj, yj = coords[j]
            if ((yi > y) != (yj > y)) and (x < (xj - xi) * (y - yi) / (yj - yi) + xi):
                inside = not inside
            j = i
        return inside

    def _get_feature_count(self):
        """Retourne le nombre total de features"""
        # 3 features par classe (48) + 10 zones x 2 (20) + 7 globales + 2 morpho + 2 co-occur
        return len(CLASS_NAMES) * 3 + len(TONGUE_ZONES) * 2 + 7 + 2 + 2

    def get_feature_names(self):
        """Retourne noms des features"""
        names = []

        # Features par classe
        for class_name in CLASS_NAMES:
            names.extend([
                f'{class_name}_max_conf',
                f'{class_name}_count',
                f'{class_name}_avg_conf'
            ])

        # Features par zone
        for zone in TONGUE_ZONES.keys():
            names.extend([
                f'{zone}_detection_count',
                f'{zone}_avg_confidence'
            ])

        # Features globales
        names.extend([
            'total_detections', 'avg_confidence_all', 'std_confidence',
            'detection_density', 'tongue_area_ratio', 'detection_spread_x',
            'detection_spread_y', 'pathological_combinations', 'healthy_indicators'
        ])

        return names

class HybridClassificationSystem:
    """Système hybride YOLO + Classification ML"""

    def __init__(self, yolo_model_path):
        self.feature_extractor = YOLOFeatureExtractor(yolo_model_path)
        self.classifiers = {}
        self.scaler = StandardScaler()
        self.label_encoder = LabelEncoder()
        self.is_trained = False

        # Créer dossier pour sauvegardes
        self.classifier_dir = Path(CONFIG['classifier_dir'])
        self.classifier_dir.mkdir(exist_ok=True)

    def prepare_training_data(self, image_folder, labels_file=None):
        """Prépare données d'entraînement"""
        print("PREPARATION DES DONNEES D'ENTRAINEMENT")
        print("="*60)

        image_paths = []
        labels = []

        # Collecter images et labels
        for img_path in Path(image_folder).glob('*.jpg'):
            image_paths.append(img_path)

            # Extraire label du nom de fichier (comme dans votre code original)
            label = self._extract_label_from_filename(img_path.name)
            labels.append(label)

        print(f"Images trouvées: {len(image_paths)}")

        # Distribution des labels
        label_counts = defaultdict(int)
        for label in labels:
            label_counts[label] += 1

        print("Distribution des labels:")
        for label, count in label_counts.items():
            print(f"  - {label}: {count}")

        # Extraire features
        print("\nExtraction des features...")
        features_list = []
        valid_labels = []

        for i, (img_path, label) in enumerate(zip(image_paths, labels)):
            if i % 10 == 0:
                print(f"  Progression: {i}/{len(image_paths)}")

            features = self.feature_extractor.extract_features(img_path)
            if features is not None and not np.all(features == 0):
                features_list.append(features)
                valid_labels.append(label)

        X = np.array(features_list)
        y = np.array(valid_labels)

        print(f"\nFeatures extraites: {X.shape}")
        print(f"Features par image: {X.shape[1]}")

        return X, y

    def train_classifiers(self, X, y):
        """Entraîne plusieurs classificateurs"""
        print("\nENTRAINEMENT DES CLASSIFICATEURS")
        print("="*60)

        # Encoder labels
        y_encoded = self.label_encoder.fit_transform(y)

        # Split train/test
        X_train, X_test, y_train, y_test = train_test_split(
            X, y_encoded, test_size=0.2, random_state=CONFIG['random_seed'],
            stratify=y_encoded
        )

        # Normaliser features
        X_train_scaled = self.scaler.fit_transform(X_train)
        X_test_scaled = self.scaler.transform(X_test)

        # Définir classificateurs
        classifiers_config = {
            'RandomForest': {
                'model': RandomForestClassifier(
                    n_estimators=200,
                    max_depth=15,
                    min_samples_split=5,
                    min_samples_leaf=2,
                    random_state=CONFIG['random_seed'],
                    n_jobs=-1
                ),
                'use_scaling': False
            },
            'XGBoost': {
                'model': xgb.XGBClassifier(
                    n_estimators=200,
                    max_depth=8,
                    learning_rate=0.1,
                    subsample=0.8,
                    colsample_bytree=0.8,
                    random_state=CONFIG['random_seed'],
                    n_jobs=-1
                ),
                'use_scaling': False
            },
            'SVM': {
                'model': SVC(
                    kernel='rbf',
                    C=10,
                    gamma='scale',
                    probability=True,
                    random_state=CONFIG['random_seed']
                ),
                'use_scaling': True
            },
            'MLP': {
                'model': MLPClassifier(
                    hidden_layer_sizes=(128, 64, 32),
                    activation='relu',
                    solver='adam',
                    alpha=0.001,
                    learning_rate='adaptive',
                    max_iter=500,
                    random_state=CONFIG['random_seed']
                ),
                'use_scaling': True
            },
            'GradientBoosting': {
                'model': GradientBoostingClassifier(
                    n_estimators=200,
                    learning_rate=0.1,
                    max_depth=8,
                    random_state=CONFIG['random_seed']
                ),
                'use_scaling': False
            }
        }

        # Entraîner et évaluer chaque classificateur
        results = {}

        for name, config in classifiers_config.items():
            print(f"\nEntraînement {name}...")

            model = config['model']

            if config['use_scaling']:
                X_train_input = X_train_scaled
                X_test_input = X_test_scaled
            else:
                X_train_input = X_train
                X_test_input = X_test

            # Entraîner
            model.fit(X_train_input, y_train)

            # Prédictions
            y_pred = model.predict(X_test_input)
            y_pred_proba = model.predict_proba(X_test_input)

            # Métriques
            accuracy = accuracy_score(y_test, y_pred)

            # Cross-validation
            cv_scores = cross_val_score(model, X_train_input, y_train, cv=5)

            results[name] = {
                'model': model,
                'accuracy': accuracy,
                'cv_mean': cv_scores.mean(),
                'cv_std': cv_scores.std(),
                'use_scaling': config['use_scaling']
            }

            print(f"  Accuracy: {accuracy:.4f}")
            print(f"  CV Score: {cv_scores.mean():.4f} (+/- {cv_scores.std()*2:.4f})")

            # Rapport détaillé pour le meilleur modèle
            if name == 'RandomForest':  # Afficher détails pour RF
                print(f"\nRapport détaillé {name}:")
                class_names = self.label_encoder.classes_
                print(classification_report(y_test, y_pred, target_names=class_names))

        # Sélectionner meilleur modèle
        best_name = max(results.keys(), key=lambda k: results[k]['cv_mean'])
        best_model_info = results[best_name]

        print(f"\n🏆 MEILLEUR MODELE: {best_name}")
        print(f"   Accuracy: {best_model_info['accuracy']:.4f}")
        print(f"   CV Score: {best_model_info['cv_mean']:.4f}")

        # Sauvegarder modèles
        self.classifiers = results
        self.best_classifier_name = best_name
        self.is_trained = True

        # Sauvegarder sur disque
        self.save_models()

        return results

    def predict(self, image_path, return_probabilities=False):
        """Prédiction hybride YOLO + Classification"""
        if not self.is_trained:
            self.load_models()

        # Extraire features avec YOLO
        features = self.feature_extractor.extract_features(image_path)

        if features is None or np.all(features == 0):
            return {
                'prediction': 'unknown',
                'confidence': 0.0,
                'probabilities': {},
                'error': 'Aucune feature extraite'
            }

        # Utiliser le meilleur classificateur
        best_clf_info = self.classifiers[self.best_classifier_name]
        model = best_clf_info['model']
        use_scaling = best_clf_info['use_scaling']

        # Préparer features
        features_reshaped = features.reshape(1, -1)
        if use_scaling:
            features_reshaped = self.scaler.transform(features_reshaped)

        # Prédiction
        prediction_encoded = model.predict(features_reshaped)[0]
        prediction = self.label_encoder.inverse_transform([prediction_encoded])[0]

        # Probabilités
        probabilities = model.predict_proba(features_reshaped)[0]
        prob_dict = {}
        for i, class_name in enumerate(self.label_encoder.classes_):
            prob_dict[class_name] = probabilities[i]

        confidence = max(probabilities)

        result = {
            'prediction': prediction,
            'confidence': confidence,
            'probabilities': prob_dict,
            'classifier_used': self.best_classifier_name
        }

        if return_probabilities:
            result['all_classifier_predictions'] = self._get_ensemble_predictions(features_reshaped)

        return result

    def _get_ensemble_predictions(self, features):
        """Prédictions ensemble de tous les classificateurs"""
        ensemble_results = {}

        for name, clf_info in self.classifiers.items():
            model = clf_info['model']
            use_scaling = clf_info['use_scaling']

            features_input = self.scaler.transform(features) if use_scaling else features

            prediction_encoded = model.predict(features_input)[0]
            prediction = self.label_encoder.inverse_transform([prediction_encoded])[0]
            probabilities = model.predict_proba(features_input)[0]

            ensemble_results[name] = {
                'prediction': prediction,
                'confidence': max(probabilities),
                'probabilities': dict(zip(self.label_encoder.classes_, probabilities))
            }

        return ensemble_results

    def save_models(self):
        """Sauvegarde modèles et preprocessing"""
        print(f"\nSauvegarde modèles dans {self.classifier_dir}")

        # Sauvegarder chaque classificateur
        for name, clf_info in self.classifiers.items():
            model_path = self.classifier_dir / f"{name}_model.joblib"
            joblib.dump(clf_info['model'], model_path)

        # Sauvegarder preprocessing
        joblib.dump(self.scaler, self.classifier_dir / "scaler.joblib")
        joblib.dump(self.label_encoder, self.classifier_dir / "label_encoder.joblib")

        # Sauvegarder config
        config = {
            'best_classifier': self.best_classifier_name,
            'classifiers_config': {name: {'use_scaling': info['use_scaling']}
                                 for name, info in self.classifiers.items()},
            'feature_names': self.feature_extractor.get_feature_names()
        }

        with open(self.classifier_dir / "config.json", 'w') as f:
            json.dump(config, f, indent=2)

        print("✅ Modèles sauvegardés")

    def load_models(self):
        """Charge modèles sauvegardés"""
        config_path = self.classifier_dir / "config.json"

        if not config_path.exists():
            raise FileNotFoundError("Aucun modèle sauvegardé trouvé")

        print("Chargement modèles sauvegardés...")

        # Charger config
        with open(config_path, 'r') as f:
            config = json.load(f)

        self.best_classifier_name = config['best_classifier']

        # Charger preprocessing
        self.scaler = joblib.load(self.classifier_dir / "scaler.joblib")
        self.label_encoder = joblib.load(self.classifier_dir / "label_encoder.joblib")

        # Charger classificateurs
        self.classifiers = {}
        for name, clf_config in config['classifiers_config'].items():
            model_path = self.classifier_dir / f"{name}_model.joblib"
            if model_path.exists():
                model = joblib.load(model_path)
                self.classifiers[name] = {
                    'model': model,
                    'use_scaling': clf_config['use_scaling']
                }

        self.is_trained = True
        print("✅ Modèles chargés")

    def _extract_label_from_filename(self, filename):
        """Extrait label du nom de fichier"""
        filename_lower = filename.lower()

        if any(keyword in filename_lower for keyword in ['healthy', 'sain', 'normal']):
            return 'healthy'
        elif any(keyword in filename_lower for keyword in ['ebc', 'early', 'precoce']):
            return 'early'
        elif any(keyword in filename_lower for keyword in ['abc', 'advanced', 'avance']):
            return 'advanced'
        elif any(keyword in filename_lower for keyword in ['real']):
            if any(keyword in filename_lower for keyword in ['1', 'one', 'first']):
                return 'healthy'
            elif any(keyword in filename_lower for keyword in ['2', 'two', 'second']):
                return 'early'
            elif any(keyword in filename_lower for keyword in ['3', 'three', 'third']):
                return 'advanced'
            else:
                return 'unknown'
        else:
            return 'unknown'

class HybridDiagnosticVisualizer:
    """Visualiseur pour système hybride"""

    def __init__(self):
        self.results_dir = Path(CONFIG['results_dir'])
        self.results_dir.mkdir(exist_ok=True)

    def create_comprehensive_visualization(self, image_path, yolo_detections, ml_result):
        """Crée visualisation complète"""
        image = cv2.imread(str(image_path))

        fig = plt.figure(figsize=(24, 8))

        # Image avec détections YOLO
        ax1 = plt.subplot(1, 4, 1)
        ax1.imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
        ax1.set_title('Détections YOLO', fontsize=14, fontweight='bold')

        # Ajouter bounding boxes (si disponibles)
        # Cette partie nécessiterait les détections brutes de YOLO
        ax1.axis('off')

        # Graphique des probabilités ML
        ax2 = plt.subplot(1, 4, 2)
        probs = ml_result['probabilities']
        classes = list(probs.keys())
        prob_values = list(probs.values())

        colors = ['green' if cls == 'healthy' else 'orange' if cls == 'early' else 'red'
                 for cls in classes]

        bars = ax2.bar(classes, prob_values, color=colors, alpha=0.7)
        ax2.set_title('Probabilités ML', fontsize=14, fontweight='bold')
        ax2.set_ylabel('Probabilité')
        ax2.set_ylim(0, 1)

        # Ajouter valeurs sur barres
        for bar, prob in zip(bars, prob_values):
            height = bar.get_height()
            ax2.text(bar.get_x() + bar.get_width()/2., height + 0.01,
                    f'{prob:.3f}', ha='center', va='bottom', fontweight='bold')

        plt.setp(ax2.get_xticklabels(), rotation=45, ha='right')

        # Résultat final
        ax3 = plt.subplot(1, 4, 3)
        ax3.axis('off')

        prediction = ml_result['prediction']
        confidence = ml_result['confidence']
        classifier = ml_result['classifier_used']

        # Couleur selon prédiction
        pred_color = 'green' if prediction == 'healthy' else 'orange' if prediction == 'early' else 'red'

        ax3.text(0.5, 0.9, 'DIAGNOSTIC HYBRIDE', ha='center', fontsize=16,
                weight='bold', transform=ax3.transAxes)

        ax3.text(0.5, 0.7, f'Prédiction: {prediction.upper()}', ha='center',
                fontsize=14, color=pred_color, weight='bold', transform=ax3.transAxes)

        ax3.text(0.5, 0.5, f'Confiance: {confidence:.2%}', ha='center',
                fontsize=12, transform=ax3.transAxes)

        ax3.text(0.5, 0.3, f'Classificateur: {classifier}', ha='center',
                fontsize=10, transform=ax3.transAxes)

        # Comparaison si ensemble disponible
        ax4 = plt.subplot(1, 4, 4)
        if 'all_classifier_predictions' in ml_result:
            ensemble = ml_result['all_classifier_predictions']

            clf_names = list(ensemble.keys())
            clf_confidences = [ensemble[name]['confidence'] for name in clf_names]
            clf_predictions = [ensemble[name]['prediction'] for name in clf_names]

            # Couleurs selon prédictions
            bar_colors = ['green' if pred == 'healthy' else 'orange' if pred == 'early' else 'red'
                         for pred in clf_predictions]

            bars = ax4.barh(clf_names, clf_confidences, color=bar_colors, alpha=0.7)
            ax4.set_title('Ensemble Classifiers', fontsize=14, fontweight='bold')
            ax4.set_xlabel('Confiance')
            ax4.set_xlim(0, 1)

            # Ajouter prédictions
            for i, (bar, pred) in enumerate(zip(bars, clf_predictions)):
                ax4.text(bar.get_width() + 0.01, bar.get_y() + bar.get_height()/2,
                        pred, va='center', fontweight='bold')
        else:
            ax4.axis('off')
            ax4.text(0.5, 0.5, 'Ensemble non disponible', ha='center',
                    transform=ax4.transAxes)

        plt.tight_layout()

        # Sauvegarder
        save_path = self.results_dir / f"hybrid_diag_{Path(image_path).stem}_{datetime.now().strftime('%Y%m%d_%H%M%S')}.png"
        plt.savefig(save_path, dpi=300, bbox_inches='tight')
        plt.close()

        return save_path

def main_hybrid():
    """Programme principal système hybride"""
    print("="*80)
    print("SYSTEME HYBRIDE YOLO + CLASSIFICATION ML - DIAGNOSTIC MTC")
    print("Détection YOLO + Machine Learning pour Cancer du Sein")
    print("SMAILI Maya & MORSLI Manel - UMMTO 2024/2025")
    print("="*80)

    # Configuration seeds
    random.seed(CONFIG['random_seed'])
    np.random.seed(CONFIG['random_seed'])

    while True:
        print("\nMENU SYSTEME HYBRIDE")
        print("-"*40)
        print("1. Entraîner système hybride")
        print("2. Diagnostic hybride (image unique)")
        print("3. Diagnostic hybride (lot)")
        print("4. Évaluation comparative")
        print("5. Analyser features importantes")
        print("6. Quitter")
        print("-"*40)

        choice = input("Choix (1-6): ").strip()

        if choice == '1':
            print("\n🔄 ENTRAINEMENT SYSTEME HYBRIDE")

            # Vérifier modèle YOLO
            yolo_model = input("Chemin modèle YOLO: ").strip()
            if not Path(yolo_model).exists():
                print("❌ Modèle YOLO non trouvé")
                continue

            # Dossier images d'entraînement
            train_folder = input("Dossier images d'entraînement: ").strip()
            if not Path(train_folder).exists():
                print("❌ Dossier non trouvé")
                continue

            try:
                # Initialiser système
                hybrid_system = HybridClassificationSystem(yolo_model)

                # Préparer données
                X, y = hybrid_system.prepare_training_data(train_folder)

                if len(X) == 0:
                    print("❌ Aucune donnée d'entraînement valide")
                    continue

                # Entraîner
                results = hybrid_system.train_classifiers(X, y)

                print("\n✅ ENTRAINEMENT TERMINE!")
                print(f"Modèles sauvegardés dans: {hybrid_system.classifier_dir}")

            except Exception as e:
                print(f"❌ ERREUR: {e}")
                import traceback
                traceback.print_exc()

        elif choice == '2':
            print("\n🔍 DIAGNOSTIC HYBRIDE")

            yolo_model = input("Chemin modèle YOLO: ").strip()
            if not Path(yolo_model).exists():
                print("❌ Modèle YOLO non trouvé")
                continue

            image_path = input("Chemin image: ").strip().strip('"')
            if not Path(image_path).exists():
                print("❌ Image non trouvée")
                continue

            try:
                # Initialiser système
                hybrid_system = HybridClassificationSystem(yolo_model)

                # Diagnostic
                result = hybrid_system.predict(image_path, return_probabilities=True)

                # Afficher résultats
                print("\n" + "="*60)
                print("DIAGNOSTIC HYBRIDE YOLO + ML")
                print("="*60)
                print(f"📸 Image: {Path(image_path).name}")
                print(f"🎯 Prédiction: {result['prediction'].upper()}")
                print(f"📊 Confiance: {result['confidence']:.2%}")
                print(f"🤖 Classificateur: {result['classifier_used']}")

                print("\n📈 Probabilités détaillées:")
                for class_name, prob in result['probabilities'].items():
                    print(f"  - {class_name}: {prob:.4f} ({prob*100:.2f}%)")

                if 'all_classifier_predictions' in result:
                    print("\n🔄 Prédictions ensemble:")
                    for clf_name, clf_result in result['all_classifier_predictions'].items():
                        print(f"  - {clf_name}: {clf_result['prediction']} ({clf_result['confidence']:.3f})")

                # Visualisation
                visualizer = HybridDiagnosticVisualizer()
                save_path = visualizer.create_comprehensive_visualization(
                    image_path, None, result
                )
                print(f"\n📊 Visualisation: {save_path}")

            except Exception as e:
                print(f"❌ ERREUR: {e}")

        elif choice == '3':
            print("\n📁 DIAGNOSTIC LOT HYBRIDE")

            yolo_model = input("Chemin modèle YOLO: ").strip()
            if not Path(yolo_model).exists():
                print("❌ Modèle YOLO non trouvé")
                continue

            folder_path = input("Dossier images: ").strip().strip('"')
            if not Path(folder_path).exists():
                print("❌ Dossier non trouvé")
                continue

            try:
                hybrid_system = HybridClassificationSystem(yolo_model)
                images = list(Path(folder_path).glob('*.jpg'))

                if not images:
                    print("❌ Aucune image .jpg trouvée")
                    continue

                print(f"\n🔄 Traitement de {len(images)} images...")

                results_summary = defaultdict(int)
                confidence_scores = defaultdict(list)
                detailed_results = []

                for i, img_path in enumerate(images):
                    print(f"  Progression: {i+1}/{len(images)} - {img_path.name}")

                    result = hybrid_system.predict(img_path)
                    prediction = result['prediction']
                    confidence = result['confidence']

                    results_summary[prediction] += 1
                    confidence_scores[prediction].append(confidence)
                    detailed_results.append({
                        'image': img_path.name,
                        'prediction': prediction,
                        'confidence': confidence
                    })

                # Afficher résumé
                print("\n" + "="*60)
                print("RESUME DIAGNOSTIC LOT HYBRIDE")
                print("="*60)

                total = len(images)
                for stage in ['healthy', 'early', 'advanced']:
                    count = results_summary.get(stage, 0)
                    avg_conf = np.mean(confidence_scores[stage]) if confidence_scores[stage] else 0
                    percentage = (count / total) * 100

                    print(f"{stage.upper()}: {count} images ({percentage:.1f}%)")
                    print(f"  Confiance moyenne: {avg_conf:.2%}")

                # Sauvegarder résultats détaillés
                df = pd.DataFrame(detailed_results)
                results_file = Path(CONFIG['results_dir']) / f"batch_results_{datetime.now().strftime('%Y%m%d_%H%M%S')}.csv"
                df.to_csv(results_file, index=False)
                print(f"\n📄 Résultats détaillés: {results_file}")

            except Exception as e:
                print(f"❌ ERREUR: {e}")

        elif choice == '4':
            print("\n📊 EVALUATION COMPARATIVE")
            print("Fonctionnalité à implémenter...")
            # Ici vous pouvez comparer performances système hybride vs règles expertes

        elif choice == '5':
            print("\n🎯 ANALYSE FEATURES IMPORTANTES")
            print("Fonctionnalité à implémenter...")
            # Ici vous pouvez analyser quelles features sont les plus importantes

        elif choice == '6':
            print("\n👋 Au revoir! Merci d'avoir utilisé le système hybride!")
            break

if __name__ == "__main__":
    try:
        main_hybrid()
    except KeyboardInterrupt:
        print("\n\n⚠️ Interruption utilisateur")
    except Exception as e:
        print(f"\n❌ ERREUR CRITIQUE: {e}")
        import traceback
        traceback.print_exc()

SYSTEME HYBRIDE YOLO + CLASSIFICATION ML - DIAGNOSTIC MTC
Détection YOLO + Machine Learning pour Cancer du Sein
SMAILI Maya & MORSLI Manel - UMMTO 2024/2025

MENU SYSTEME HYBRIDE
----------------------------------------
1. Entraîner système hybride
2. Diagnostic hybride (image unique)
3. Diagnostic hybride (lot)
4. Évaluation comparative
5. Analyser features importantes
6. Quitter
----------------------------------------
Choix (1-6): 1

🔄 ENTRAINEMENT SYSTEME HYBRIDE
Chemin modèle YOLO: /content/mon_modele.pt
Dossier images d'entraînement: /content/train/train/images
Installation d'ultralytics...
Creating new Ultralytics Settings v0.0.6 file ✅ 
View Ultralytics Settings with 'yolo settings' or at '/root/.config/Ultralytics/settings.json'
Update Settings with 'yolo settings key=value', i.e. 'yolo settings runs_dir=path/to/dir'. For help see https://docs.ultralytics.com/quickstart/#ultralytics-settings.
PREPARATION DES DONNEES D'ENTRAINEMENT
Images trouvées: 80
Distribution des labels:
 

Traceback (most recent call last):
  File "/tmp/ipython-input-3-1953035634.py", line 820, in main_hybrid
    X, y = hybrid_system.prepare_training_data(train_folder)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/tmp/ipython-input-3-1953035634.py", line 376, in prepare_training_data
    print(f"Features par image: {X.shape[1]}")
                                 ~~~~~~~^^^
IndexError: tuple index out of range


PREPARATION DES DONNEES D'ENTRAINEMENT
Images trouvées: 0
Distribution des labels:

Extraction des features...

Features extraites: (0,)
❌ ERREUR: tuple index out of range

MENU SYSTEME HYBRIDE
----------------------------------------
1. Entraîner système hybride
2. Diagnostic hybride (image unique)
3. Diagnostic hybride (lot)
4. Évaluation comparative
5. Analyser features importantes
6. Quitter
----------------------------------------
Choix (1-6): 1

🔄 ENTRAINEMENT SYSTEME HYBRIDE
Chemin modèle YOLO: /content/mon_modele.pt
Dossier images d'entraînement: /content/train/train/images/healthy_2_png_segmented_jpg.rf.83df3613866fe49ec911801361fc0a63.jpg


Traceback (most recent call last):
  File "/tmp/ipython-input-3-1953035634.py", line 820, in main_hybrid
    X, y = hybrid_system.prepare_training_data(train_folder)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/tmp/ipython-input-3-1953035634.py", line 376, in prepare_training_data
    print(f"Features par image: {X.shape[1]}")
                                 ~~~~~~~^^^
IndexError: tuple index out of range


PREPARATION DES DONNEES D'ENTRAINEMENT
Images trouvées: 0
Distribution des labels:

Extraction des features...

Features extraites: (0,)
❌ ERREUR: tuple index out of range

MENU SYSTEME HYBRIDE
----------------------------------------
1. Entraîner système hybride
2. Diagnostic hybride (image unique)
3. Diagnostic hybride (lot)
4. Évaluation comparative
5. Analyser features importantes
6. Quitter
----------------------------------------
Choix (1-6): 2

🔍 DIAGNOSTIC HYBRIDE
Chemin modèle YOLO: /content/mon_modele.pt
Chemin image: /content/train/train/images/healthy_4_png_segmented_jpg.rf.72a89ed9986e658ad3fe57a5fb6e0e5a.jpg
Chargement modèles sauvegardés...
✅ Modèles chargés

DIAGNOSTIC HYBRIDE YOLO + ML
📸 Image: healthy_4_png_segmented_jpg.rf.72a89ed9986e658ad3fe57a5fb6e0e5a.jpg
🎯 Prédiction: HEALTHY
📊 Confiance: 92.00%
🤖 Classificateur: SVM

📈 Probabilités détaillées:
  - advanced: 0.0234 (2.34%)
  - early: 0.0566 (5.66%)
  - healthy: 0.9200 (92.00%)

🔄 Prédictions ensemble:
  - Random

In [4]:
import os
import sys
import subprocess
import shutil
import cv2
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.patches as patches
from matplotlib.patches import Polygon, Rectangle
from pathlib import Path
import json
import yaml
import random
from datetime import datetime
import pandas as pd
from collections import defaultdict
import warnings
warnings.filterwarnings('ignore')

# ML Libraries
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import train_test_split, cross_val_score, GridSearchCV
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
from sklearn.pipeline import Pipeline
import joblib
import xgboost as xgb

# Configuration système optimisée
CONFIG = {
    'base_dir': '.',
    'output_dir': 'mtc_output',
    'model_dir': 'mtc_models',
    'results_dir': 'mtc_results',
    'classifier_dir': 'mtc_classifiers',
    'train_split': 0.8,
    'epochs': 200,
    'batch_size': 16,
    'imgsz': 640,
    'patience': 50,
    'conf_threshold': 0.15,
    'iou_threshold': 0.3,
    'augmentation_factor': 8,
    'target_accuracy': 0.85,
    'random_seed': 42,
    'lr0': 0.0008,
    'weight_decay': 0.001,
    'mosaic': 0.8,
    'mixup': 0.1,
    'copy_paste': 0.1,
}

# Classes YOLO (16 classes)
CLASS_NAMES = [
    'Ecchymoses', 'Eduit_jaune_epais', 'Eduit_jaune_mince', 'Fissure',
    'Langue_normal', 'Langue_pale', 'Langue_petite', 'Langue_rose',
    'Langue_rouge', 'Langue_rouge_foncee', 'enduit_blanc_epais',
    'enduit_blanc_mince', 'langue_ganfelee', 'red_dot',
    'salive_humide', 'salive_normale'
]

# Zones MTC
TONGUE_ZONES = {
    'kidney': {
        'name': 'Rein',
        'coords': [(0.2, 0), (0.8, 0), (0.8, 0.15), (0.2, 0.15)],
        'color': (75, 0, 130)
    },
    'liver_gall_right': {
        'name': 'Foie-VB Droit',
        'coords': [(0, 0.15), (0.3, 0.15), (0.3, 0.65), (0, 0.65)],
        'color': (34, 139, 34)
    },
    'liver_gall_left': {
        'name': 'Foie-VB Gauche',
        'coords': [(0.7, 0.15), (1, 0.15), (1, 0.65), (0.7, 0.65)],
        'color': (50, 205, 50)
    },
    'spleen_stomach': {
        'name': 'Rate-Estomac',
        'coords': [(0.3, 0.15), (0.7, 0.15), (0.7, 0.65), (0.3, 0.65)],
        'color': (255, 215, 0)
    },
    'heart_lung': {
        'name': 'Coeur-Poumon',
        'coords': [(0.2, 0.65), (0.8, 0.65), (0.8, 1), (0.2, 1)],
        'color': (220, 20, 60)
    }
}

class YOLOFeatureExtractor:
    """Extracteur de features basé sur les détections YOLO"""

    def __init__(self, model_path):
        try:
            from ultralytics import YOLO
            self.model = YOLO(model_path)
        except ImportError:
            print("Installation d'ultralytics...")
            subprocess.check_call([sys.executable, '-m', 'pip', 'install', 'ultralytics'])
            from ultralytics import YOLO
            self.model = YOLO(model_path)

        # Stocker dernières détections pour visualisation
        self.last_detections = []

    def extract_features(self, image_path, return_detections=False):
        """Extrait features complètes d'une image"""
        try:
            results = self.model(image_path, conf=CONFIG['conf_threshold'],
                               iou=CONFIG['iou_threshold'], verbose=False)

            image = cv2.imread(str(image_path))
            h, w = image.shape[:2]

            # Initialiser features
            features = self._initialize_features()

            # Traiter détections
            detections = []
            for r in results:
                if r.boxes is not None:
                    for box in r.boxes:
                        bbox = box.xyxy[0].cpu().numpy()
                        conf = float(box.conf)
                        cls = int(box.cls)

                        if cls < len(CLASS_NAMES):
                            detection = {
                                'bbox': bbox,
                                'confidence': conf,
                                'class': cls,
                                'class_name': CLASS_NAMES[cls]
                            }
                            detections.append(detection)

            # Stocker détections pour visualisation
            self.last_detections = detections

            # Extraire features des détections
            features = self._extract_detection_features(detections, features, w, h)
            features = self._extract_spatial_features(detections, features, w, h)
            features = self._extract_statistical_features(detections, features)
            features = self._extract_zone_features(detections, features, w, h)

            if return_detections:
                return np.array(list(features.values())), detections
            else:
                return np.array(list(features.values()))

        except Exception as e:
            print(f"Erreur extraction features: {e}")
            if return_detections:
                return np.zeros(self._get_feature_count()), []
            else:
                return np.zeros(self._get_feature_count())

    def get_last_detections(self):
        """Retourne dernières détections YOLO"""
        return self.last_detections

    def _initialize_features(self):
        """Initialise toutes les features à 0"""
        features = {}

        # Features par classe (confidence max et count)
        for class_name in CLASS_NAMES:
            features[f'{class_name}_max_conf'] = 0.0
            features[f'{class_name}_count'] = 0.0
            features[f'{class_name}_avg_conf'] = 0.0

        # Features spatiales
        for zone in TONGUE_ZONES.keys():
            features[f'{zone}_detection_count'] = 0.0
            features[f'{zone}_avg_confidence'] = 0.0

        # Features statistiques globales
        features['total_detections'] = 0.0
        features['avg_confidence_all'] = 0.0
        features['std_confidence'] = 0.0
        features['detection_density'] = 0.0

        # Features morphologiques
        features['tongue_area_ratio'] = 0.0
        features['detection_spread_x'] = 0.0
        features['detection_spread_y'] = 0.0

        # Features de co-occurrence
        features['pathological_combinations'] = 0.0
        features['healthy_indicators'] = 0.0

        return features

    def _extract_detection_features(self, detections, features, w, h):
        """Extrait features basées sur les détections"""
        class_detections = defaultdict(list)

        for det in detections:
            class_name = det['class_name']
            conf = det['confidence']
            class_detections[class_name].append(conf)

        # Features par classe
        for class_name in CLASS_NAMES:
            confs = class_detections[class_name]
            if confs:
                features[f'{class_name}_max_conf'] = max(confs)
                features[f'{class_name}_count'] = len(confs)
                features[f'{class_name}_avg_conf'] = np.mean(confs)
            else:
                features[f'{class_name}_max_conf'] = 0.0
                features[f'{class_name}_count'] = 0.0
                features[f'{class_name}_avg_conf'] = 0.0

        return features

    def _extract_spatial_features(self, detections, features, w, h):
        """Extrait features spatiales et de zones"""
        if not detections:
            return features

        # Calculer centroids normalisés
        centroids = []
        for det in detections:
            bbox = det['bbox']
            cx = ((bbox[0] + bbox[2]) / 2) / w
            cy = ((bbox[1] + bbox[3]) / 2) / h
            centroids.append((cx, cy))

        # Spread spatial
        if centroids:
            x_coords = [c[0] for c in centroids]
            y_coords = [c[1] for c in centroids]
            features['detection_spread_x'] = np.std(x_coords) if len(x_coords) > 1 else 0
            features['detection_spread_y'] = np.std(y_coords) if len(y_coords) > 1 else 0

        return features

    def _extract_statistical_features(self, detections, features):
        """Extrait features statistiques globales"""
        if not detections:
            return features

        confidences = [det['confidence'] for det in detections]

        features['total_detections'] = len(detections)
        features['avg_confidence_all'] = np.mean(confidences)
        features['std_confidence'] = np.std(confidences)
        features['detection_density'] = len(detections) / 100.0  # Normalisé

        return features

    def _extract_zone_features(self, detections, features, w, h):
        """Extrait features par zone MTC"""
        zone_detections = defaultdict(list)

        for det in detections:
            bbox = det['bbox']
            cx = ((bbox[0] + bbox[2]) / 2) / w
            cy = ((bbox[1] + bbox[3]) / 2) / h

            zone = self._find_zone(cx, cy)
            if zone:
                zone_detections[zone].append(det['confidence'])

        # Features par zone
        for zone in TONGUE_ZONES.keys():
            confs = zone_detections[zone]
            if confs:
                features[f'{zone}_detection_count'] = len(confs)
                features[f'{zone}_avg_confidence'] = np.mean(confs)
            else:
                features[f'{zone}_detection_count'] = 0.0
                features[f'{zone}_avg_confidence'] = 0.0

        return features

    def _find_zone(self, x, y):
        """Trouve zone pour coordonnées"""
        for zone_name, zone_info in TONGUE_ZONES.items():
            if self._point_in_polygon(x, y, zone_info['coords']):
                return zone_name
        return None

    def _point_in_polygon(self, x, y, coords):
        """Test point dans polygone"""
        n = len(coords)
        inside = False
        j = n - 1
        for i in range(n):
            xi, yi = coords[i]
            xj, yj = coords[j]
            if ((yi > y) != (yj > y)) and (x < (xj - xi) * (y - yi) / (yj - yi) + xi):
                inside = not inside
            j = i
        return inside

    def _get_feature_count(self):
        """Retourne le nombre total de features"""
        # 3 features par classe (48) + 10 zones x 2 (20) + 7 globales + 2 morpho + 2 co-occur
        return len(CLASS_NAMES) * 3 + len(TONGUE_ZONES) * 2 + 7 + 2 + 2

    def get_feature_names(self):
        """Retourne noms des features"""
        names = []

        # Features par classe
        for class_name in CLASS_NAMES:
            names.extend([
                f'{class_name}_max_conf',
                f'{class_name}_count',
                f'{class_name}_avg_conf'
            ])

        # Features par zone
        for zone in TONGUE_ZONES.keys():
            names.extend([
                f'{zone}_detection_count',
                f'{zone}_avg_confidence'
            ])

        # Features globales
        names.extend([
            'total_detections', 'avg_confidence_all', 'std_confidence',
            'detection_density', 'tongue_area_ratio', 'detection_spread_x',
            'detection_spread_y', 'pathological_combinations', 'healthy_indicators'
        ])

        return names

class HybridClassificationSystem:
    """Système hybride YOLO + Classification ML"""

    def __init__(self, yolo_model_path):
        self.feature_extractor = YOLOFeatureExtractor(yolo_model_path)
        self.classifiers = {}
        self.scaler = StandardScaler()
        self.label_encoder = LabelEncoder()
        self.is_trained = False

        # Créer dossier pour sauvegardes
        self.classifier_dir = Path(CONFIG['classifier_dir'])
        self.classifier_dir.mkdir(exist_ok=True)

    def prepare_training_data(self, image_folder, labels_file=None):
        """Prépare données d'entraînement"""
        print("PREPARATION DES DONNEES D'ENTRAINEMENT")
        print("="*60)

        image_paths = []
        labels = []

        # Collecter images et labels
        for img_path in Path(image_folder).glob('*.jpg'):
            image_paths.append(img_path)

            # Extraire label du nom de fichier (comme dans votre code original)
            label = self._extract_label_from_filename(img_path.name)
            labels.append(label)

        print(f"Images trouvées: {len(image_paths)}")

        # Distribution des labels
        label_counts = defaultdict(int)
        for label in labels:
            label_counts[label] += 1

        print("Distribution des labels:")
        for label, count in label_counts.items():
            print(f"  - {label}: {count}")

        # Extraire features
        print("\nExtraction des features...")
        features_list = []
        valid_labels = []

        for i, (img_path, label) in enumerate(zip(image_paths, labels)):
            if i % 10 == 0:
                print(f"  Progression: {i}/{len(image_paths)}")

            features = self.feature_extractor.extract_features(img_path)
            if features is not None and not np.all(features == 0):
                features_list.append(features)
                valid_labels.append(label)

        X = np.array(features_list)
        y = np.array(valid_labels)

        print(f"\nFeatures extraites: {X.shape}")
        print(f"Features par image: {X.shape[1]}")

        return X, y

    def train_classifiers(self, X, y):
        """Entraîne plusieurs classificateurs"""
        print("\nENTRAINEMENT DES CLASSIFICATEURS")
        print("="*60)

        # Encoder labels
        y_encoded = self.label_encoder.fit_transform(y)

        # Split train/test
        X_train, X_test, y_train, y_test = train_test_split(
            X, y_encoded, test_size=0.2, random_state=CONFIG['random_seed'],
            stratify=y_encoded
        )

        # Normaliser features
        X_train_scaled = self.scaler.fit_transform(X_train)
        X_test_scaled = self.scaler.transform(X_test)

        # Définir classificateurs
        classifiers_config = {
            'RandomForest': {
                'model': RandomForestClassifier(
                    n_estimators=200,
                    max_depth=15,
                    min_samples_split=5,
                    min_samples_leaf=2,
                    random_state=CONFIG['random_seed'],
                    n_jobs=-1
                ),
                'use_scaling': False
            },
            'XGBoost': {
                'model': xgb.XGBClassifier(
                    n_estimators=200,
                    max_depth=8,
                    learning_rate=0.1,
                    subsample=0.8,
                    colsample_bytree=0.8,
                    random_state=CONFIG['random_seed'],
                    n_jobs=-1
                ),
                'use_scaling': False
            },
            'SVM': {
                'model': SVC(
                    kernel='rbf',
                    C=10,
                    gamma='scale',
                    probability=True,
                    random_state=CONFIG['random_seed']
                ),
                'use_scaling': True
            },
            'MLP': {
                'model': MLPClassifier(
                    hidden_layer_sizes=(128, 64, 32),
                    activation='relu',
                    solver='adam',
                    alpha=0.001,
                    learning_rate='adaptive',
                    max_iter=500,
                    random_state=CONFIG['random_seed']
                ),
                'use_scaling': True
            },
            'GradientBoosting': {
                'model': GradientBoostingClassifier(
                    n_estimators=200,
                    learning_rate=0.1,
                    max_depth=8,
                    random_state=CONFIG['random_seed']
                ),
                'use_scaling': False
            }
        }

        # Entraîner et évaluer chaque classificateur
        results = {}

        for name, config in classifiers_config.items():
            print(f"\nEntraînement {name}...")

            model = config['model']

            if config['use_scaling']:
                X_train_input = X_train_scaled
                X_test_input = X_test_scaled
            else:
                X_train_input = X_train
                X_test_input = X_test

            # Entraîner
            model.fit(X_train_input, y_train)

            # Prédictions
            y_pred = model.predict(X_test_input)
            y_pred_proba = model.predict_proba(X_test_input)

            # Métriques
            accuracy = accuracy_score(y_test, y_pred)

            # Cross-validation
            cv_scores = cross_val_score(model, X_train_input, y_train, cv=5)

            results[name] = {
                'model': model,
                'accuracy': accuracy,
                'cv_mean': cv_scores.mean(),
                'cv_std': cv_scores.std(),
                'use_scaling': config['use_scaling']
            }

            print(f"  Accuracy: {accuracy:.4f}")
            print(f"  CV Score: {cv_scores.mean():.4f} (+/- {cv_scores.std()*2:.4f})")

            # Rapport détaillé pour le meilleur modèle
            if name == 'RandomForest':  # Afficher détails pour RF
                print(f"\nRapport détaillé {name}:")
                class_names = self.label_encoder.classes_
                print(classification_report(y_test, y_pred, target_names=class_names))

        # Sélectionner meilleur modèle
        best_name = max(results.keys(), key=lambda k: results[k]['cv_mean'])
        best_model_info = results[best_name]

        print(f"\n🏆 MEILLEUR MODELE: {best_name}")
        print(f"   Accuracy: {best_model_info['accuracy']:.4f}")
        print(f"   CV Score: {best_model_info['cv_mean']:.4f}")

        # Sauvegarder modèles
        self.classifiers = results
        self.best_classifier_name = best_name
        self.is_trained = True

        # Sauvegarder sur disque
        self.save_models()

        return results

    def predict(self, image_path, return_probabilities=False):
        """Prédiction hybride YOLO + Classification"""
        if not self.is_trained:
            self.load_models()

        # Extraire features avec YOLO et récupérer détections
        features, detections = self.feature_extractor.extract_features(image_path, return_detections=True)

        if features is None or np.all(features == 0):
            return {
                'prediction': 'unknown',
                'confidence': 0.0,
                'probabilities': {},
                'detections': [],
                'detected_features': {},
                'error': 'Aucune feature extraite'
            }

        # Utiliser le meilleur classificateur
        best_clf_info = self.classifiers[self.best_classifier_name]
        model = best_clf_info['model']
        use_scaling = best_clf_info['use_scaling']

        # Préparer features
        features_reshaped = features.reshape(1, -1)
        if use_scaling:
            features_reshaped = self.scaler.transform(features_reshaped)

        # Prédiction
        prediction_encoded = model.predict(features_reshaped)[0]
        prediction = self.label_encoder.inverse_transform([prediction_encoded])[0]

        # Probabilités
        probabilities = model.predict_proba(features_reshaped)[0]
        prob_dict = {}
        for i, class_name in enumerate(self.label_encoder.classes_):
            prob_dict[class_name] = probabilities[i]

        confidence = max(probabilities)

        # Extraire features significatives pour affichage
        feature_names = self.feature_extractor.get_feature_names()
        detected_features = {}
        for i, (name, value) in enumerate(zip(feature_names, features)):
            if value > 0 and 'count' not in name.lower():  # Exclure les counts pour lisibilité
                detected_features[name] = value

        # Trier par importance
        detected_features = dict(sorted(detected_features.items(),
                                      key=lambda x: x[1], reverse=True))

        result = {
            'prediction': prediction,
            'confidence': confidence,
            'probabilities': prob_dict,
            'classifier_used': self.best_classifier_name,
            'detections': detections,
            'detected_features': detected_features
        }

        if return_probabilities:
            result['all_classifier_predictions'] = self._get_ensemble_predictions(features_reshaped)

        return result

    def _get_ensemble_predictions(self, features):
        """Prédictions ensemble de tous les classificateurs"""
        ensemble_results = {}

        for name, clf_info in self.classifiers.items():
            model = clf_info['model']
            use_scaling = clf_info['use_scaling']

            features_input = self.scaler.transform(features) if use_scaling else features

            prediction_encoded = model.predict(features_input)[0]
            prediction = self.label_encoder.inverse_transform([prediction_encoded])[0]
            probabilities = model.predict_proba(features_input)[0]

            ensemble_results[name] = {
                'prediction': prediction,
                'confidence': max(probabilities),
                'probabilities': dict(zip(self.label_encoder.classes_, probabilities))
            }

        return ensemble_results

    def save_models(self):
        """Sauvegarde modèles et preprocessing"""
        print(f"\nSauvegarde modèles dans {self.classifier_dir}")

        # Sauvegarder chaque classificateur
        for name, clf_info in self.classifiers.items():
            model_path = self.classifier_dir / f"{name}_model.joblib"
            joblib.dump(clf_info['model'], model_path)

        # Sauvegarder preprocessing
        joblib.dump(self.scaler, self.classifier_dir / "scaler.joblib")
        joblib.dump(self.label_encoder, self.classifier_dir / "label_encoder.joblib")

        # Sauvegarder config
        config = {
            'best_classifier': self.best_classifier_name,
            'classifiers_config': {name: {'use_scaling': info['use_scaling']}
                                 for name, info in self.classifiers.items()},
            'feature_names': self.feature_extractor.get_feature_names()
        }

        with open(self.classifier_dir / "config.json", 'w') as f:
            json.dump(config, f, indent=2)

        print("✅ Modèles sauvegardés")

    def load_models(self):
        """Charge modèles sauvegardés"""
        config_path = self.classifier_dir / "config.json"

        if not config_path.exists():
            raise FileNotFoundError("Aucun modèle sauvegardé trouvé")

        print("Chargement modèles sauvegardés...")

        # Charger config
        with open(config_path, 'r') as f:
            config = json.load(f)

        self.best_classifier_name = config['best_classifier']

        # Charger preprocessing
        self.scaler = joblib.load(self.classifier_dir / "scaler.joblib")
        self.label_encoder = joblib.load(self.classifier_dir / "label_encoder.joblib")

        # Charger classificateurs
        self.classifiers = {}
        for name, clf_config in config['classifiers_config'].items():
            model_path = self.classifier_dir / f"{name}_model.joblib"
            if model_path.exists():
                model = joblib.load(model_path)
                self.classifiers[name] = {
                    'model': model,
                    'use_scaling': clf_config['use_scaling']
                }

        self.is_trained = True
        print("✅ Modèles chargés")

    def _extract_label_from_filename(self, filename):
        """Extrait label du nom de fichier"""
        filename_lower = filename.lower()

        if any(keyword in filename_lower for keyword in ['healthy', 'sain', 'normal']):
            return 'healthy'
        elif any(keyword in filename_lower for keyword in ['ebc', 'early', 'precoce']):
            return 'early'
        elif any(keyword in filename_lower for keyword in ['abc', 'advanced', 'avance']):
            return 'advanced'
        elif any(keyword in filename_lower for keyword in ['real']):
            if any(keyword in filename_lower for keyword in ['1', 'one', 'first']):
                return 'healthy'
            elif any(keyword in filename_lower for keyword in ['2', 'two', 'second']):
                return 'early'
            elif any(keyword in filename_lower for keyword in ['3', 'three', 'third']):
                return 'advanced'
            else:
                return 'unknown'
        else:
            return 'unknown'

class HybridDiagnosticVisualizer:
    """Visualiseur pour système hybride avec cartographie MTC"""

    def __init__(self):
        self.results_dir = Path(CONFIG['results_dir'])
        self.results_dir.mkdir(exist_ok=True)
        self.zones = TONGUE_ZONES

    def create_comprehensive_visualization(self, image_path, yolo_detections, ml_result, feature_extractor=None):
        """Crée visualisation complète avec cartographie MTC"""
        image = cv2.imread(str(image_path))

        # Récupérer les détections YOLO brutes si nécessaire
        if yolo_detections is None:
            yolo_detections = self._get_yolo_detections(image_path, feature_extractor)

        fig = plt.figure(figsize=(20, 10))

        # 1. Image avec détections YOLO et bounding boxes
        ax1 = plt.subplot(1, 3, 1)
        ax1.imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
        ax1.set_title('Détections YOLO', fontsize=14, fontweight='bold')

        # Ajouter bounding boxes avec labels
        for det in yolo_detections:
            bbox = det['bbox']
            x1, y1, x2, y2 = bbox

            color = self._get_color_for_class(det['class_name'])

            # Rectangle de détection
            rect = Rectangle((x1, y1), x2-x1, y2-y1,
                           linewidth=2, edgecolor=color, facecolor='none')
            ax1.add_patch(rect)

            # Label avec confiance
            label = f"{det['class_name']}: {det['confidence']:.2f}"
            ax1.text(x1, y1-5, label, color='white', backgroundcolor=color,
                    fontsize=8, weight='bold')

        ax1.axis('off')

        # 2. Cartographie MTC avec zones et détections
        ax2 = plt.subplot(1, 3, 2)
        ax2.set_xlim(0, 1)
        ax2.set_ylim(1, 0)  # Inverser Y pour correspondre à l'image
        ax2.set_aspect('equal')
        ax2.set_title('Cartographie MTC', fontsize=14, fontweight='bold')

        # Dessiner zones MTC
        zone_counts = defaultdict(int)
        for zone_name, zone_info in self.zones.items():
            coords = zone_info['coords']
            color = np.array(zone_info['color']) / 255.0

            # Polygone de zone
            polygon = Polygon(coords, facecolor=color, alpha=0.3,
                            edgecolor='black', linewidth=2)
            ax2.add_patch(polygon)

            # Nom de zone au centre
            cx = np.mean([c[0] for c in coords])
            cy = np.mean([c[1] for c in coords])
            ax2.text(cx, cy, zone_info['name'], ha='center', va='center',
                    fontsize=11, weight='bold',
                    bbox=dict(boxstyle="round,pad=0.3", facecolor='white', alpha=0.8))

        # Projeter détections sur cartographie
        h, w = image.shape[:2]
        for det in yolo_detections:
            bbox = det['bbox']
            # Calculer centre de la détection
            cx = ((bbox[0] + bbox[2]) / 2) / w
            cy = ((bbox[1] + bbox[3]) / 2) / h

            # Trouver zone correspondante
            zone = self._find_zone(cx, cy)
            if zone:
                zone_counts[zone] += 1

            # Afficher détection sur cartographie
            color = self._get_color_for_class(det['class_name'])
            ax2.scatter(cx, cy, s=150, c=color, marker='x', linewidths=4, alpha=0.8)

            # Ajouter label près du point
            ax2.text(cx+0.02, cy, det['class_name'][:8], fontsize=8,
                    color=color, weight='bold')

        ax2.set_xlabel('Gauche ← → Droite', fontsize=10)
        ax2.set_ylabel('Avant ← → Arrière', fontsize=10)
        ax2.grid(True, alpha=0.3)

        # 3. Diagnostic final avec détails
        ax3 = plt.subplot(1, 3, 3)
        ax3.axis('off')

        prediction = ml_result['prediction']
        confidence = ml_result['confidence']
        classifier = ml_result['classifier_used']

        # Couleurs et messages selon diagnostic
        if prediction == 'healthy':
            color = 'green'
            message = "LANGUE SAINE\nAucun signe pathologique"
        elif prediction == 'early':
            color = 'orange'
            message = "STADE PRECOCE (EBC)\nSignes initiaux détectés"
        else:
            color = 'red'
            message = "STADE AVANCE (ABC)\nSignes pathologiques importants"

        # Titre diagnostic
        ax3.text(0.5, 0.95, 'DIAGNOSTIC MTC HYBRIDE', ha='center', fontsize=16,
                weight='bold', transform=ax3.transAxes)

        # Résultat principal
        ax3.text(0.5, 0.85, message, ha='center', fontsize=12,
                color=color, weight='bold', transform=ax3.transAxes,
                bbox=dict(boxstyle="round,pad=0.5", facecolor=color, alpha=0.1))

        # Confiance
        ax3.text(0.5, 0.70, f'Confiance: {confidence:.1%}', ha='center',
                fontsize=12, weight='bold', transform=ax3.transAxes)

        # Classificateur utilisé
        ax3.text(0.5, 0.62, f'Classificateur: {classifier}', ha='center',
                fontsize=10, style='italic', transform=ax3.transAxes)

        # Scores détaillés
        y_pos = 0.52
        ax3.text(0.1, y_pos, 'Scores détaillés:', fontsize=11, weight='bold',
                transform=ax3.transAxes)

        for stage, prob in ml_result['probabilities'].items():
            y_pos -= 0.06
            stage_color = 'green' if stage == 'healthy' else 'orange' if stage == 'early' else 'red'
            ax3.text(0.15, y_pos, f'{stage}: {prob:.1%}', fontsize=10,
                    color=stage_color, transform=ax3.transAxes)

        # Localisation par zones
        if zone_counts:
            y_pos -= 0.08
            ax3.text(0.1, y_pos, 'Localisation MTC:', fontsize=11, weight='bold',
                    transform=ax3.transAxes)

            for zone, count in zone_counts.items():
                if count > 0:
                    y_pos -= 0.05
                    zone_name = self.zones[zone]['name']
                    ax3.text(0.15, y_pos, f'{zone_name}: {count}', fontsize=9,
                            transform=ax3.transAxes)

        # Caractéristiques détectées
        if 'detected_features' in ml_result:
            y_pos -= 0.08
            ax3.text(0.1, y_pos, 'Caractéristiques:', fontsize=11, weight='bold',
                    transform=ax3.transAxes)

            features = ml_result['detected_features']
            for feature, score in list(features.items())[:5]:  # Top 5
                if score > 0:
                    y_pos -= 0.04
                    ax3.text(0.15, y_pos, f'{feature}: {score:.2f}', fontsize=8,
                            transform=ax3.transAxes)

        plt.tight_layout()

        # Sauvegarder
        save_path = self.results_dir / f"hybrid_mtc_{Path(image_path).stem}_{datetime.now().strftime('%Y%m%d_%H%M%S')}.png"
        plt.savefig(save_path, dpi=300, bbox_inches='tight')
        plt.close()

        return save_path, zone_counts

    def _get_yolo_detections(self, image_path, feature_extractor=None):
        """Récupère détections YOLO brutes si pas fournies"""
        if feature_extractor:
            _, detections = feature_extractor.extract_features(image_path, return_detections=True)
            return detections
        else:
            # Fallback : essayer de créer un extracteur temporaire
            try:
                from ultralytics import YOLO
                model = YOLO('yolov8s.pt')  # Modèle par défaut
                results = model(image_path, conf=CONFIG['conf_threshold'], verbose=False)

                detections = []
                for r in results:
                    if r.boxes is not None:
                        for box in r.boxes:
                            bbox = box.xyxy[0].cpu().numpy()
                            conf = float(box.conf)
                            cls = int(box.cls)

                            if cls < len(CLASS_NAMES):
                                detection = {
                                    'bbox': bbox,
                                    'confidence': conf,
                                    'class': cls,
                                    'class_name': CLASS_NAMES[cls]
                                }
                                detections.append(detection)
                return detections
            except:
                return []

    def _get_color_for_class(self, class_name):
        """Retourne couleur selon classe (même logique que votre code original)"""
        if any(x in class_name for x in ['normal', 'rose', 'salive_normale']):
            return 'green'
        elif any(x in class_name for x in ['pale', 'blanc_mince', 'jaune_mince']):
            return 'orange'
        elif any(x in class_name for x in ['rouge_foncee', 'jaune_epais', 'Ecchymoses']):
            return 'red'
        elif any(x in class_name for x in ['rouge', 'blanc_epais']):
            return 'darkorange'
        elif 'red_dot' in class_name:
            return 'crimson'
        elif 'Fissure' in class_name:
            return 'purple'
        else:
            return 'gray'

    def _find_zone(self, x, y):
        """Trouve zone MTC pour coordonnées (même logique que votre code)"""
        for zone_name, zone_info in self.zones.items():
            if self._point_in_polygon(x, y, zone_info['coords']):
                return zone_name
        return None

    def _point_in_polygon(self, x, y, coords):
        """Test point dans polygone (même algorithme que votre code)"""
        n = len(coords)
        inside = False
        j = n - 1
        for i in range(n):
            xi, yi = coords[i]
            xj, yj = coords[j]
            if ((yi > y) != (yj > y)) and (x < (xj - xi) * (y - yi) / (yj - yi) + xi):
                inside = not inside
            j = i
        return inside

def main_hybrid():
    """Programme principal système hybride"""
    print("="*80)
    print("SYSTEME HYBRIDE YOLO + CLASSIFICATION ML - DIAGNOSTIC MTC")
    print("Détection YOLO + Machine Learning pour Cancer du Sein")
    print("SMAILI Maya & MORSLI Manel - UMMTO 2024/2025")
    print("="*80)

    # Configuration seeds
    random.seed(CONFIG['random_seed'])
    np.random.seed(CONFIG['random_seed'])

    while True:
        print("\nMENU SYSTEME HYBRIDE")
        print("-"*40)
        print("1. Entraîner système hybride")
        print("2. Diagnostic hybride (image unique)")
        print("3. Diagnostic hybride (lot)")
        print("4. Évaluation comparative")
        print("5. Analyser features importantes")
        print("6. Quitter")
        print("-"*40)

        choice = input("Choix (1-6): ").strip()

        if choice == '1':
            print("\n🔄 ENTRAINEMENT SYSTEME HYBRIDE")

            # Vérifier modèle YOLO
            yolo_model = input("Chemin modèle YOLO: ").strip()
            if not Path(yolo_model).exists():
                print("❌ Modèle YOLO non trouvé")
                continue

            # Dossier images d'entraînement
            train_folder = input("Dossier images d'entraînement: ").strip()
            if not Path(train_folder).exists():
                print("❌ Dossier non trouvé")
                continue

            try:
                # Initialiser système
                hybrid_system = HybridClassificationSystem(yolo_model)

                # Préparer données
                X, y = hybrid_system.prepare_training_data(train_folder)

                if len(X) == 0:
                    print("❌ Aucune donnée d'entraînement valide")
                    continue

                # Entraîner
                results = hybrid_system.train_classifiers(X, y)

                print("\n✅ ENTRAINEMENT TERMINE!")
                print(f"Modèles sauvegardés dans: {hybrid_system.classifier_dir}")

            except Exception as e:
                print(f"❌ ERREUR: {e}")
                import traceback
                traceback.print_exc()

        elif choice == '2':
            print("\n🔍 DIAGNOSTIC HYBRIDE")

            yolo_model = input("Chemin modèle YOLO: ").strip()
            if not Path(yolo_model).exists():
                print("❌ Modèle YOLO non trouvé")
                continue

            image_path = input("Chemin image: ").strip().strip('"')
            if not Path(image_path).exists():
                print("❌ Image non trouvée")
                continue

            try:
                # Initialiser système
                hybrid_system = HybridClassificationSystem(yolo_model)

                # Diagnostic
                result = hybrid_system.predict(image_path, return_probabilities=True)

                # Afficher résultats
                print("\n" + "="*60)
                print("DIAGNOSTIC HYBRIDE YOLO + ML")
                print("="*60)
                print(f"📸 Image: {Path(image_path).name}")
                print(f"🎯 Prédiction: {result['prediction'].upper()}")
                print(f"📊 Confiance: {result['confidence']:.2%}")
                print(f"🤖 Classificateur: {result['classifier_used']}")

                print("\n📈 Probabilités détaillées:")
                for class_name, prob in result['probabilities'].items():
                    print(f"  - {class_name}: {prob:.4f} ({prob*100:.2f}%)")

                if 'all_classifier_predictions' in result:
                    print("\n🔄 Prédictions ensemble:")
                    for clf_name, clf_result in result['all_classifier_predictions'].items():
                        print(f"  - {clf_name}: {clf_result['prediction']} ({clf_result['confidence']:.3f})")

                # Visualisation avec cartographie MTC
                visualizer = HybridDiagnosticVisualizer()
                save_path, zone_counts = visualizer.create_comprehensive_visualization(
                    image_path, result['detections'], result, hybrid_system.feature_extractor
                )

                # Afficher informations zones
                if zone_counts:
                    print("\n🗺️ Localisation par zones MTC:")
                    for zone, count in zone_counts.items():
                        if count > 0:
                            zone_name = TONGUE_ZONES[zone]['name']
                            print(f"  - {zone_name}: {count} détection(s)")

                print(f"\n📊 Visualisation complète: {save_path}")

            except Exception as e:
                print(f"❌ ERREUR: {e}")

        elif choice == '3':
            print("\n📁 DIAGNOSTIC LOT HYBRIDE")

            yolo_model = input("Chemin modèle YOLO: ").strip()
            if not Path(yolo_model).exists():
                print("❌ Modèle YOLO non trouvé")
                continue

            folder_path = input("Dossier images: ").strip().strip('"')
            if not Path(folder_path).exists():
                print("❌ Dossier non trouvé")
                continue

            try:
                hybrid_system = HybridClassificationSystem(yolo_model)
                images = list(Path(folder_path).glob('*.jpg'))

                if not images:
                    print("❌ Aucune image .jpg trouvée")
                    continue

                print(f"\n🔄 Traitement de {len(images)} images...")

                results_summary = defaultdict(int)
                confidence_scores = defaultdict(list)
                detailed_results = []

                for i, img_path in enumerate(images):
                    print(f"  Progression: {i+1}/{len(images)} - {img_path.name}")

                    result = hybrid_system.predict(img_path)
                    prediction = result['prediction']
                    confidence = result['confidence']

                    results_summary[prediction] += 1
                    confidence_scores[prediction].append(confidence)
                    detailed_results.append({
                        'image': img_path.name,
                        'prediction': prediction,
                        'confidence': confidence
                    })

                # Afficher résumé
                print("\n" + "="*60)
                print("RESUME DIAGNOSTIC LOT HYBRIDE")
                print("="*60)

                total = len(images)
                for stage in ['healthy', 'early', 'advanced']:
                    count = results_summary.get(stage, 0)
                    avg_conf = np.mean(confidence_scores[stage]) if confidence_scores[stage] else 0
                    percentage = (count / total) * 100

                    print(f"{stage.upper()}: {count} images ({percentage:.1f}%)")
                    print(f"  Confiance moyenne: {avg_conf:.2%}")

                # Sauvegarder résultats détaillés
                df = pd.DataFrame(detailed_results)
                results_file = Path(CONFIG['results_dir']) / f"batch_results_{datetime.now().strftime('%Y%m%d_%H%M%S')}.csv"
                df.to_csv(results_file, index=False)
                print(f"\n📄 Résultats détaillés: {results_file}")

            except Exception as e:
                print(f"❌ ERREUR: {e}")

        elif choice == '4':
            print("\n📊 EVALUATION COMPARATIVE")
            print("Fonctionnalité à implémenter...")
            # Ici vous pouvez comparer performances système hybride vs règles expertes

        elif choice == '5':
            print("\n🎯 ANALYSE FEATURES IMPORTANTES")
            print("Fonctionnalité à implémenter...")
            # Ici vous pouvez analyser quelles features sont les plus importantes

        elif choice == '6':
            print("\n👋 Au revoir! Merci d'avoir utilisé le système hybride!")
            break

if __name__ == "__main__":
    try:
        main_hybrid()
    except KeyboardInterrupt:
        print("\n\n⚠️ Interruption utilisateur")
    except Exception as e:
        print(f"\n❌ ERREUR CRITIQUE: {e}")
        import traceback
        traceback.print_exc()

SYSTEME HYBRIDE YOLO + CLASSIFICATION ML - DIAGNOSTIC MTC
Détection YOLO + Machine Learning pour Cancer du Sein
SMAILI Maya & MORSLI Manel - UMMTO 2024/2025

MENU SYSTEME HYBRIDE
----------------------------------------
1. Entraîner système hybride
2. Diagnostic hybride (image unique)
3. Diagnostic hybride (lot)
4. Évaluation comparative
5. Analyser features importantes
6. Quitter
----------------------------------------

🔄 ENTRAINEMENT SYSTEME HYBRIDE


Traceback (most recent call last):
  File "/tmp/ipython-input-4-1113902059.py", line 991, in main_hybrid
    X, y = hybrid_system.prepare_training_data(train_folder)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/tmp/ipython-input-4-1113902059.py", line 392, in prepare_training_data
    print(f"Features par image: {X.shape[1]}")
                                 ~~~~~~~^^^
IndexError: tuple index out of range


PREPARATION DES DONNEES D'ENTRAINEMENT
Images trouvées: 0
Distribution des labels:

Extraction des features...

Features extraites: (0,)
❌ ERREUR: tuple index out of range

MENU SYSTEME HYBRIDE
----------------------------------------
1. Entraîner système hybride
2. Diagnostic hybride (image unique)
3. Diagnostic hybride (lot)
4. Évaluation comparative
5. Analyser features importantes
6. Quitter
----------------------------------------

MENU SYSTEME HYBRIDE
----------------------------------------
1. Entraîner système hybride
2. Diagnostic hybride (image unique)
3. Diagnostic hybride (lot)
4. Évaluation comparative
5. Analyser features importantes
6. Quitter
----------------------------------------

🔄 ENTRAINEMENT SYSTEME HYBRIDE


Traceback (most recent call last):
  File "/tmp/ipython-input-4-1113902059.py", line 991, in main_hybrid
    X, y = hybrid_system.prepare_training_data(train_folder)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/tmp/ipython-input-4-1113902059.py", line 392, in prepare_training_data
    print(f"Features par image: {X.shape[1]}")
                                 ~~~~~~~^^^
IndexError: tuple index out of range


PREPARATION DES DONNEES D'ENTRAINEMENT
Images trouvées: 0
Distribution des labels:

Extraction des features...

Features extraites: (0,)
❌ ERREUR: tuple index out of range

MENU SYSTEME HYBRIDE
----------------------------------------
1. Entraîner système hybride
2. Diagnostic hybride (image unique)
3. Diagnostic hybride (lot)
4. Évaluation comparative
5. Analyser features importantes
6. Quitter
----------------------------------------

🔄 ENTRAINEMENT SYSTEME HYBRIDE


Traceback (most recent call last):
  File "/tmp/ipython-input-4-1113902059.py", line 991, in main_hybrid
    X, y = hybrid_system.prepare_training_data(train_folder)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/tmp/ipython-input-4-1113902059.py", line 392, in prepare_training_data
    print(f"Features par image: {X.shape[1]}")
                                 ~~~~~~~^^^
IndexError: tuple index out of range


PREPARATION DES DONNEES D'ENTRAINEMENT
Images trouvées: 0
Distribution des labels:

Extraction des features...

Features extraites: (0,)
❌ ERREUR: tuple index out of range

MENU SYSTEME HYBRIDE
----------------------------------------
1. Entraîner système hybride
2. Diagnostic hybride (image unique)
3. Diagnostic hybride (lot)
4. Évaluation comparative
5. Analyser features importantes
6. Quitter
----------------------------------------

🔄 ENTRAINEMENT SYSTEME HYBRIDE
PREPARATION DES DONNEES D'ENTRAINEMENT
Images trouvées: 80
Distribution des labels:
  - healthy: 38
  - advanced: 19
  - early: 23

Extraction des features...
  Progression: 0/80
  Progression: 10/80
  Progression: 20/80
  Progression: 30/80
  Progression: 40/80
  Progression: 50/80
  Progression: 60/80
  Progression: 70/80

Features extraites: (80, 67)
Features par image: 67

ENTRAINEMENT DES CLASSIFICATEURS

Entraînement RandomForest...
  Accuracy: 0.9375
  CV Score: 0.7949 (+/- 0.1716)

Rapport détaillé RandomForest:
  

In [None]:
import os
import sys
import subprocess
import shutil
import cv2
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.patches as patches
from matplotlib.patches import Polygon, Rectangle
from pathlib import Path
import json
import yaml
import random
from datetime import datetime
import pandas as pd
from collections import defaultdict
import warnings
warnings.filterwarnings('ignore')

# ML Libraries
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import train_test_split, cross_val_score, GridSearchCV
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
from sklearn.pipeline import Pipeline
import joblib
import xgboost as xgb

# Configuration système optimisée
CONFIG = {
    'base_dir': '.',
    'output_dir': 'mtc_output',
    'model_dir': 'mtc_models',
    'results_dir': 'mtc_results',
    'classifier_dir': 'mtc_classifiers',
    'train_split': 0.8,
    'epochs': 200,
    'batch_size': 16,
    'imgsz': 640,
    'patience': 50,
    'conf_threshold': 0.15,
    'iou_threshold': 0.3,
    'augmentation_factor': 8,
    'target_accuracy': 0.85,
    'random_seed': 42,
    'lr0': 0.0008,
    'weight_decay': 0.001,
    'mosaic': 0.8,
    'mixup': 0.1,
    'copy_paste': 0.1,
    'tongue_conf_threshold': 0.5,  # Seuil pour détection langue
}

# Classes YOLO (16 classes)
CLASS_NAMES = [
    'Ecchymoses', 'Eduit_jaune_epais', 'Eduit_jaune_mince', 'Fissure',
    'Langue_normal', 'Langue_pale', 'Langue_petite', 'Langue_rose',
    'Langue_rouge', 'Langue_rouge_foncee', 'enduit_blanc_epais',
    'enduit_blanc_mince', 'langue_ganfelee', 'red_dot',
    'salive_humide', 'salive_normale'
]

# Zones MTC
TONGUE_ZONES = {
    'kidney': {
        'name': 'Rein',
        'coords': [(0.2, 0), (0.8, 0), (0.8, 0.15), (0.2, 0.15)],
        'color': (75, 0, 130)
    },
    'liver_gall_right': {
        'name': 'Foie-VB Droit',
        'coords': [(0, 0.15), (0.3, 0.15), (0.3, 0.65), (0, 0.65)],
        'color': (34, 139, 34)
    },
    'liver_gall_left': {
        'name': 'Foie-VB Gauche',
        'coords': [(0.7, 0.15), (1, 0.15), (1, 0.65), (0.7, 0.65)],
        'color': (50, 205, 50)
    },
    'spleen_stomach': {
        'name': 'Rate-Estomac',
        'coords': [(0.3, 0.15), (0.7, 0.15), (0.7, 0.65), (0.3, 0.65)],
        'color': (255, 215, 0)
    },
    'heart_lung': {
        'name': 'Coeur-Poumon',
        'coords': [(0.2, 0.65), (0.8, 0.65), (0.8, 1), (0.2, 1)],
        'color': (220, 20, 60)
    }
}

class TongueDetector:
    """Détecteur de langue pour prétraitement"""

    def __init__(self, tongue_model_path=None):
        """
        Initialise le détecteur de langue
        tongue_model_path: chemin vers un modèle YOLO entraîné pour détecter les langues
        """
        try:
            from ultralytics import YOLO
        except ImportError:
            print("Installation d'ultralytics...")
            subprocess.check_call([sys.executable, '-m', 'pip', 'install', 'ultralytics'])
            from ultralytics import YOLO

        if tongue_model_path and Path(tongue_model_path).exists():
            self.model = YOLO(tongue_model_path)
        else:
            # Utiliser un modèle générique et filtrer par classe
            self.model = YOLO('yolov8m.pt')  # Modèle medium pour meilleure précision

        self.temp_dir = Path('temp_tongues')
        self.temp_dir.mkdir(exist_ok=True)

    def detect_and_extract_tongue(self, image_path, margin_percent=10):
        """
        Détecte et extrait la région de la langue
        margin_percent: pourcentage de marge à ajouter autour de la détection
        """
        try:
            # Charger image
            image = cv2.imread(str(image_path))
            if image is None:
                print(f"Impossible de charger l'image: {image_path}")
                return None, None

            h, w = image.shape[:2]

            # Détection YOLO
            results = self.model(image_path, conf=CONFIG['tongue_conf_threshold'], verbose=False)

            # Rechercher une détection de langue
            tongue_bbox = None
            best_conf = 0

            for r in results:
                if r.boxes is not None:
                    for box in r.boxes:
                        # Si on a un modèle spécifique langue, toute détection est valide
                        # Sinon, chercher des classes pertinentes (person, etc.)
                        conf = float(box.conf)
                        if conf > best_conf:
                            best_conf = conf
                            tongue_bbox = box.xyxy[0].cpu().numpy()

            # Si pas de détection, essayer avec segmentation couleur
            if tongue_bbox is None:
                print("Pas de détection YOLO, tentative de segmentation couleur...")
                tongue_bbox = self._segment_tongue_by_color(image)

            if tongue_bbox is None:
                print("Aucune langue détectée, utilisation de l'image complète")
                return str(image_path), None

            # Extraire région avec marge
            x1, y1, x2, y2 = tongue_bbox
            margin_x = int((x2 - x1) * margin_percent / 100)
            margin_y = int((y2 - y1) * margin_percent / 100)

            # Appliquer marges avec limites
            x1 = max(0, int(x1 - margin_x))
            y1 = max(0, int(y1 - margin_y))
            x2 = min(w, int(x2 + margin_x))
            y2 = min(h, int(y2 + margin_y))

            # Extraire région
            tongue_region = image[y1:y2, x1:x2]

            # Sauvegarder région extraite
            temp_path = self.temp_dir / f"tongue_{Path(image_path).stem}_{datetime.now().strftime('%Y%m%d_%H%M%S')}.jpg"
            cv2.imwrite(str(temp_path), tongue_region)

            return str(temp_path), (x1, y1, x2, y2)

        except Exception as e:
            print(f"Erreur détection langue: {e}")
            return str(image_path), None

    def _segment_tongue_by_color(self, image):
        """
        Segmentation basée couleur pour détecter la langue
        """
        try:
            # Convertir en HSV
            hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)

            # Plages de couleur pour langue (rose/rouge)
            lower_tongue = np.array([0, 30, 30])
            upper_tongue = np.array([20, 255, 255])

            # Masque pour tons roses/rouges
            mask1 = cv2.inRange(hsv, lower_tongue, upper_tongue)

            # Deuxième plage pour tons plus foncés
            lower_tongue2 = np.array([160, 30, 30])
            upper_tongue2 = np.array([180, 255, 255])
            mask2 = cv2.inRange(hsv, lower_tongue2, upper_tongue2)

            # Combiner masques
            mask = cv2.bitwise_or(mask1, mask2)

            # Morphologie pour nettoyer
            kernel = np.ones((15, 15), np.uint8)
            mask = cv2.morphologyEx(mask, cv2.MORPH_CLOSE, kernel)
            mask = cv2.morphologyEx(mask, cv2.MORPH_OPEN, kernel)

            # Trouver contours
            contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

            if not contours:
                return None

            # Prendre le plus grand contour (probablement la langue)
            largest_contour = max(contours, key=cv2.contourArea)

            # Vérifier que c'est assez grand
            area = cv2.contourArea(largest_contour)
            image_area = image.shape[0] * image.shape[1]

            if area < 0.05 * image_area:  # Moins de 5% de l'image
                return None

            # Obtenir bounding box
            x, y, w, h = cv2.boundingRect(largest_contour)

            # Vérifier ratio largeur/hauteur (langue généralement plus large que haute)
            aspect_ratio = w / h
            if aspect_ratio < 0.5 or aspect_ratio > 2.5:
                return None

            return np.array([x, y, x + w, y + h])

        except Exception as e:
            print(f"Erreur segmentation couleur: {e}")
            return None

    def cleanup_temp_files(self):
        """Nettoie les fichiers temporaires"""
        if self.temp_dir.exists():
            shutil.rmtree(self.temp_dir)

class YOLOFeatureExtractor:
    """Extracteur de features basé sur les détections YOLO"""

    def __init__(self, model_path):
        try:
            from ultralytics import YOLO
            self.model = YOLO(model_path)
        except ImportError:
            print("Installation d'ultralytics...")
            subprocess.check_call([sys.executable, '-m', 'pip', 'install', 'ultralytics'])
            from ultralytics import YOLO
            self.model = YOLO(model_path)

        # Stocker dernières détections pour visualisation
        self.last_detections = []

    def extract_features(self, image_path, return_detections=False):
        """Extrait features complètes d'une image"""
        try:
            results = self.model(image_path, conf=CONFIG['conf_threshold'],
                               iou=CONFIG['iou_threshold'], verbose=False)

            image = cv2.imread(str(image_path))
            h, w = image.shape[:2]

            # Initialiser features
            features = self._initialize_features()

            # Traiter détections
            detections = []
            for r in results:
                if r.boxes is not None:
                    for box in r.boxes:
                        bbox = box.xyxy[0].cpu().numpy()
                        conf = float(box.conf)
                        cls = int(box.cls)

                        if cls < len(CLASS_NAMES):
                            detection = {
                                'bbox': bbox,
                                'confidence': conf,
                                'class': cls,
                                'class_name': CLASS_NAMES[cls]
                            }
                            detections.append(detection)

            # Stocker détections pour visualisation
            self.last_detections = detections

            # Extraire features des détections
            features = self._extract_detection_features(detections, features, w, h)
            features = self._extract_spatial_features(detections, features, w, h)
            features = self._extract_statistical_features(detections, features)
            features = self._extract_zone_features(detections, features, w, h)

            if return_detections:
                return np.array(list(features.values())), detections
            else:
                return np.array(list(features.values()))

        except Exception as e:
            print(f"Erreur extraction features: {e}")
            if return_detections:
                return np.zeros(self._get_feature_count()), []
            else:
                return np.zeros(self._get_feature_count())

    def get_last_detections(self):
        """Retourne dernières détections YOLO"""
        return self.last_detections

    def _initialize_features(self):
        """Initialise toutes les features à 0"""
        features = {}

        # Features par classe (confidence max et count)
        for class_name in CLASS_NAMES:
            features[f'{class_name}_max_conf'] = 0.0
            features[f'{class_name}_count'] = 0.0
            features[f'{class_name}_avg_conf'] = 0.0

        # Features spatiales
        for zone in TONGUE_ZONES.keys():
            features[f'{zone}_detection_count'] = 0.0
            features[f'{zone}_avg_confidence'] = 0.0

        # Features statistiques globales
        features['total_detections'] = 0.0
        features['avg_confidence_all'] = 0.0
        features['std_confidence'] = 0.0
        features['detection_density'] = 0.0

        # Features morphologiques
        features['tongue_area_ratio'] = 0.0
        features['detection_spread_x'] = 0.0
        features['detection_spread_y'] = 0.0

        # Features de co-occurrence
        features['pathological_combinations'] = 0.0
        features['healthy_indicators'] = 0.0

        return features

    def _extract_detection_features(self, detections, features, w, h):
        """Extrait features basées sur les détections"""
        class_detections = defaultdict(list)

        for det in detections:
            class_name = det['class_name']
            conf = det['confidence']
            class_detections[class_name].append(conf)

        # Features par classe
        for class_name in CLASS_NAMES:
            confs = class_detections[class_name]
            if confs:
                features[f'{class_name}_max_conf'] = max(confs)
                features[f'{class_name}_count'] = len(confs)
                features[f'{class_name}_avg_conf'] = np.mean(confs)
            else:
                features[f'{class_name}_max_conf'] = 0.0
                features[f'{class_name}_count'] = 0.0
                features[f'{class_name}_avg_conf'] = 0.0

        return features

    def _extract_spatial_features(self, detections, features, w, h):
        """Extrait features spatiales et de zones"""
        if not detections:
            return features

        # Calculer centroids normalisés
        centroids = []
        for det in detections:
            bbox = det['bbox']
            cx = ((bbox[0] + bbox[2]) / 2) / w
            cy = ((bbox[1] + bbox[3]) / 2) / h
            centroids.append((cx, cy))

        # Spread spatial
        if centroids:
            x_coords = [c[0] for c in centroids]
            y_coords = [c[1] for c in centroids]
            features['detection_spread_x'] = np.std(x_coords) if len(x_coords) > 1 else 0
            features['detection_spread_y'] = np.std(y_coords) if len(y_coords) > 1 else 0

        return features

    def _extract_statistical_features(self, detections, features):
        """Extrait features statistiques globales"""
        if not detections:
            return features

        confidences = [det['confidence'] for det in detections]

        features['total_detections'] = len(detections)
        features['avg_confidence_all'] = np.mean(confidences)
        features['std_confidence'] = np.std(confidences)
        features['detection_density'] = len(detections) / 100.0  # Normalisé

        return features

    def _extract_zone_features(self, detections, features, w, h):
        """Extrait features par zone MTC"""
        zone_detections = defaultdict(list)

        for det in detections:
            bbox = det['bbox']
            cx = ((bbox[0] + bbox[2]) / 2) / w
            cy = ((bbox[1] + bbox[3]) / 2) / h

            zone = self._find_zone(cx, cy)
            if zone:
                zone_detections[zone].append(det['confidence'])

        # Features par zone
        for zone in TONGUE_ZONES.keys():
            confs = zone_detections[zone]
            if confs:
                features[f'{zone}_detection_count'] = len(confs)
                features[f'{zone}_avg_confidence'] = np.mean(confs)
            else:
                features[f'{zone}_detection_count'] = 0.0
                features[f'{zone}_avg_confidence'] = 0.0

        return features

    def _find_zone(self, x, y):
        """Trouve zone pour coordonnées"""
        for zone_name, zone_info in TONGUE_ZONES.items():
            if self._point_in_polygon(x, y, zone_info['coords']):
                return zone_name
        return None

    def _point_in_polygon(self, x, y, coords):
        """Test point dans polygone"""
        n = len(coords)
        inside = False
        j = n - 1
        for i in range(n):
            xi, yi = coords[i]
            xj, yj = coords[j]
            if ((yi > y) != (yj > y)) and (x < (xj - xi) * (y - yi) / (yj - yi) + xi):
                inside = not inside
            j = i
        return inside

    def _get_feature_count(self):
        """Retourne le nombre total de features"""
        # 3 features par classe (48) + 10 zones x 2 (20) + 7 globales + 2 morpho + 2 co-occur
        return len(CLASS_NAMES) * 3 + len(TONGUE_ZONES) * 2 + 7 + 2 + 2

    def get_feature_names(self):
        """Retourne noms des features"""
        names = []

        # Features par classe
        for class_name in CLASS_NAMES:
            names.extend([
                f'{class_name}_max_conf',
                f'{class_name}_count',
                f'{class_name}_avg_conf'
            ])

        # Features par zone
        for zone in TONGUE_ZONES.keys():
            names.extend([
                f'{zone}_detection_count',
                f'{zone}_avg_confidence'
            ])

        # Features globales
        names.extend([
            'total_detections', 'avg_confidence_all', 'std_confidence',
            'detection_density', 'tongue_area_ratio', 'detection_spread_x',
            'detection_spread_y', 'pathological_combinations', 'healthy_indicators'
        ])

        return names

class HybridClassificationSystem:
    """Système hybride YOLO + Classification ML avec détection de langue"""

    def __init__(self, yolo_model_path, tongue_detector_model_path=None):
        self.feature_extractor = YOLOFeatureExtractor(yolo_model_path)
        self.tongue_detector = TongueDetector(tongue_detector_model_path)
        self.classifiers = {}
        self.scaler = StandardScaler()
        self.label_encoder = LabelEncoder()
        self.is_trained = False

        # Créer dossier pour sauvegardes
        self.classifier_dir = Path(CONFIG['classifier_dir'])
        self.classifier_dir.mkdir(exist_ok=True)

    def prepare_training_data(self, image_folder, labels_file=None, extract_tongue=True):
        """
        Prépare données d'entraînement
        extract_tongue: si True, extrait d'abord la langue avant analyse
        """
        print("PREPARATION DES DONNEES D'ENTRAINEMENT")
        print("="*60)

        image_paths = []
        labels = []

        # Collecter images et labels
        for img_path in Path(image_folder).glob('*.jpg'):
            image_paths.append(img_path)

            # Extraire label du nom de fichier
            label = self._extract_label_from_filename(img_path.name)
            labels.append(label)

        print(f"Images trouvées: {len(image_paths)}")

        # Distribution des labels
        label_counts = defaultdict(int)
        for label in labels:
            label_counts[label] += 1

        print("Distribution des labels:")
        for label, count in label_counts.items():
            print(f"  - {label}: {count}")

        # Extraire features
        print("\nExtraction des features...")
        features_list = []
        valid_labels = []

        for i, (img_path, label) in enumerate(zip(image_paths, labels)):
            if i % 10 == 0:
                print(f"  Progression: {i}/{len(image_paths)}")

            # Extraire langue si demandé
            if extract_tongue:
                tongue_path, bbox = self.tongue_detector.detect_and_extract_tongue(img_path)
                if tongue_path:
                    features = self.feature_extractor.extract_features(tongue_path)
                else:
                    features = self.feature_extractor.extract_features(img_path)
            else:
                features = self.feature_extractor.extract_features(img_path)

            if features is not None and not np.all(features == 0):
                features_list.append(features)
                valid_labels.append(label)

        # Nettoyer fichiers temporaires
        if extract_tongue:
            self.tongue_detector.cleanup_temp_files()

        X = np.array(features_list)
        y = np.array(valid_labels)

        print(f"\nFeatures extraites: {X.shape}")
        print(f"Features par image: {X.shape[1]}")

        return X, y

    def train_classifiers(self, X, y):
        """Entraîne plusieurs classificateurs"""
        print("\nENTRAINEMENT DES CLASSIFICATEURS")
        print("="*60)

        # Encoder labels
        y_encoded = self.label_encoder.fit_transform(y)

        # Split train/test
        X_train, X_test, y_train, y_test = train_test_split(
            X, y_encoded, test_size=0.2, random_state=CONFIG['random_seed'],
            stratify=y_encoded
        )

        # Normaliser features
        X_train_scaled = self.scaler.fit_transform(X_train)
        X_test_scaled = self.scaler.transform(X_test)

        # Définir classificateurs
        classifiers_config = {
            'RandomForest': {
                'model': RandomForestClassifier(
                    n_estimators=200,
                    max_depth=15,
                    min_samples_split=5,
                    min_samples_leaf=2,
                    random_state=CONFIG['random_seed'],
                    n_jobs=-1
                ),
                'use_scaling': False
            },
            'XGBoost': {
                'model': xgb.XGBClassifier(
                    n_estimators=200,
                    max_depth=8,
                    learning_rate=0.1,
                    subsample=0.8,
                    colsample_bytree=0.8,
                    random_state=CONFIG['random_seed'],
                    n_jobs=-1
                ),
                'use_scaling': False
            },
            'SVM': {
                'model': SVC(
                    kernel='rbf',
                    C=10,
                    gamma='scale',
                    probability=True,
                    random_state=CONFIG['random_seed']
                ),
                'use_scaling': True
            },
            'MLP': {
                'model': MLPClassifier(
                    hidden_layer_sizes=(128, 64, 32),
                    activation='relu',
                    solver='adam',
                    alpha=0.001,
                    learning_rate='adaptive',
                    max_iter=500,
                    random_state=CONFIG['random_seed']
                ),
                'use_scaling': True
            },
            'GradientBoosting': {
                'model': GradientBoostingClassifier(
                    n_estimators=200,
                    learning_rate=0.1,
                    max_depth=8,
                    random_state=CONFIG['random_seed']
                ),
                'use_scaling': False
            }
        }

        # Entraîner et évaluer chaque classificateur
        results = {}

        for name, config in classifiers_config.items():
            print(f"\nEntraînement {name}...")

            model = config['model']

            if config['use_scaling']:
                X_train_input = X_train_scaled
                X_test_input = X_test_scaled
            else:
                X_train_input = X_train
                X_test_input = X_test

            # Entraîner
            model.fit(X_train_input, y_train)

            # Prédictions
            y_pred = model.predict(X_test_input)
            y_pred_proba = model.predict_proba(X_test_input)

            # Métriques
            accuracy = accuracy_score(y_test, y_pred)

            # Cross-validation
            cv_scores = cross_val_score(model, X_train_input, y_train, cv=5)

            results[name] = {
                'model': model,
                'accuracy': accuracy,
                'cv_mean': cv_scores.mean(),
                'cv_std': cv_scores.std(),
                'use_scaling': config['use_scaling']
            }

            print(f"  Accuracy: {accuracy:.4f}")
            print(f"  CV Score: {cv_scores.mean():.4f} (+/- {cv_scores.std()*2:.4f})")

            # Rapport détaillé pour le meilleur modèle
            if name == 'RandomForest':  # Afficher détails pour RF
                print(f"\nRapport détaillé {name}:")
                class_names = self.label_encoder.classes_
                print(classification_report(y_test, y_pred, target_names=class_names))

        # Sélectionner meilleur modèle
        best_name = max(results.keys(), key=lambda k: results[k]['cv_mean'])
        best_model_info = results[best_name]

        print(f"\n🏆 MEILLEUR MODELE: {best_name}")
        print(f"   Accuracy: {best_model_info['accuracy']:.4f}")
        print(f"   CV Score: {best_model_info['cv_mean']:.4f}")

        # Sauvegarder modèles
        self.classifiers = results
        self.best_classifier_name = best_name
        self.is_trained = True

        # Sauvegarder sur disque
        self.save_models()

        return results

    def predict(self, image_path, return_probabilities=False, extract_tongue=True):
        """
        Prédiction hybride YOLO + Classification
        extract_tongue: si True, extrait d'abord la langue
        """
        if not self.is_trained:
            self.load_models()

        original_image_path = image_path
        tongue_bbox = None

        # Extraire langue si demandé
        if extract_tongue:
            print("🔍 Détection de la langue...")
            tongue_path, tongue_bbox = self.tongue_detector.detect_and_extract_tongue(image_path)
            if tongue_path:
                print("✅ Langue détectée et extraite")
                image_path = tongue_path
            else:
                print("⚠️ Pas de langue détectée, analyse de l'image complète")

        # Extraire features avec YOLO et récupérer détections
        features, detections = self.feature_extractor.extract_features(image_path, return_detections=True)

        if features is None or np.all(features == 0):
            return {
                'prediction': 'unknown',
                'confidence': 0.0,
                'probabilities': {},
                'detections': [],
                'detected_features': {},
                'error': 'Aucune feature extraite',
                'tongue_detected': tongue_bbox is not None,
                'original_image': str(original_image_path),
                'analyzed_image': str(image_path)
            }

        # Utiliser le meilleur classificateur
        best_clf_info = self.classifiers[self.best_classifier_name]
        model = best_clf_info['model']
        use_scaling = best_clf_info['use_scaling']

        # Préparer features
        features_reshaped = features.reshape(1, -1)
        if use_scaling:
            features_reshaped = self.scaler.transform(features_reshaped)

        # Prédiction
        prediction_encoded = model.predict(features_reshaped)[0]
        prediction = self.label_encoder.inverse_transform([prediction_encoded])[0]

        # Probabilités
        probabilities = model.predict_proba(features_reshaped)[0]
        prob_dict = {}
        for i, class_name in enumerate(self.label_encoder.classes_):
            prob_dict[class_name] = probabilities[i]

        confidence = max(probabilities)

        # Extraire features significatives pour affichage
        feature_names = self.feature_extractor.get_feature_names()
        detected_features = {}
        for i, (name, value) in enumerate(zip(feature_names, features)):
            if value > 0 and 'count' not in name.lower():  # Exclure les counts pour lisibilité
                detected_features[name] = value

        # Trier par importance
        detected_features = dict(sorted(detected_features.items(),
                                      key=lambda x: x[1], reverse=True))

        result = {
            'prediction': prediction,
            'confidence': confidence,
            'probabilities': prob_dict,
            'classifier_used': self.best_classifier_name,
            'detections': detections,
            'detected_features': detected_features,
            'tongue_detected': tongue_bbox is not None,
            'tongue_bbox': tongue_bbox,
            'original_image': str(original_image_path),
            'analyzed_image': str(image_path)
        }

        if return_probabilities:
            result['all_classifier_predictions'] = self._get_ensemble_predictions(features_reshaped)

        # Nettoyer fichiers temporaires si extraction langue
        if extract_tongue and tongue_bbox is not None:
            # Garder le fichier temporaire pour visualisation
            pass

        return result

    def _get_ensemble_predictions(self, features):
        """Prédictions ensemble de tous les classificateurs"""
        ensemble_results = {}

        for name, clf_info in self.classifiers.items():
            model = clf_info['model']
            use_scaling = clf_info['use_scaling']

            features_input = self.scaler.transform(features) if use_scaling else features

            prediction_encoded = model.predict(features_input)[0]
            prediction = self.label_encoder.inverse_transform([prediction_encoded])[0]
            probabilities = model.predict_proba(features_input)[0]

            ensemble_results[name] = {
                'prediction': prediction,
                'confidence': max(probabilities),
                'probabilities': dict(zip(self.label_encoder.classes_, probabilities))
            }

        return ensemble_results

    def save_models(self):
        """Sauvegarde modèles et preprocessing"""
        print(f"\nSauvegarde modèles dans {self.classifier_dir}")

        # Sauvegarder chaque classificateur
        for name, clf_info in self.classifiers.items():
            model_path = self.classifier_dir / f"{name}_model.joblib"
            joblib.dump(clf_info['model'], model_path)

        # Sauvegarder preprocessing
        joblib.dump(self.scaler, self.classifier_dir / "scaler.joblib")
        joblib.dump(self.label_encoder, self.classifier_dir / "label_encoder.joblib")

        # Sauvegarder config
        config = {
            'best_classifier': self.best_classifier_name,
            'classifiers_config': {name: {'use_scaling': info['use_scaling']}
                                 for name, info in self.classifiers.items()},
            'feature_names': self.feature_extractor.get_feature_names()
        }

        with open(self.classifier_dir / "config.json", 'w') as f:
            json.dump(config, f, indent=2)

        print("✅ Modèles sauvegardés")

    def load_models(self):
        """Charge modèles sauvegardés"""
        config_path = self.classifier_dir / "config.json"

        if not config_path.exists():
            raise FileNotFoundError("Aucun modèle sauvegardé trouvé")

        print("Chargement modèles sauvegardés...")

        # Charger config
        with open(config_path, 'r') as f:
            config = json.load(f)

        self.best_classifier_name = config['best_classifier']

        # Charger preprocessing
        self.scaler = joblib.load(self.classifier_dir / "scaler.joblib")
        self.label_encoder = joblib.load(self.classifier_dir / "label_encoder.joblib")

        # Charger classificateurs
        self.classifiers = {}
        for name, clf_config in config['classifiers_config'].items():
            model_path = self.classifier_dir / f"{name}_model.joblib"
            if model_path.exists():
                model = joblib.load(model_path)
                self.classifiers[name] = {
                    'model': model,
                    'use_scaling': clf_config['use_scaling']
                }

        self.is_trained = True
        print("✅ Modèles chargés")

    def _extract_label_from_filename(self, filename):
        """Extrait label du nom de fichier"""
        filename_lower = filename.lower()

        if any(keyword in filename_lower for keyword in ['healthy', 'sain', 'normal']):
            return 'healthy'
        elif any(keyword in filename_lower for keyword in ['ebc', 'early', 'precoce']):
            return 'early'
        elif any(keyword in filename_lower for keyword in ['abc', 'advanced', 'avance']):
            return 'advanced'
        elif any(keyword in filename_lower for keyword in ['real']):
            if any(keyword in filename_lower for keyword in ['1', 'one', 'first']):
                return 'healthy'
            elif any(keyword in filename_lower for keyword in ['2', 'two', 'second']):
                return 'early'
            elif any(keyword in filename_lower for keyword in ['3', 'three', 'third']):
                return 'advanced'
            else:
                return 'unknown'
        else:
            return 'unknown'

class HybridDiagnosticVisualizer:
    """Visualiseur pour système hybride avec cartographie MTC"""

    def __init__(self):
        self.results_dir = Path(CONFIG['results_dir'])
        self.results_dir.mkdir(exist_ok=True)
        self.zones = TONGUE_ZONES

    def create_comprehensive_visualization(self, image_path, yolo_detections, ml_result, feature_extractor=None):
        """Crée visualisation complète avec cartographie MTC"""
        # Charger images
        original_image = cv2.imread(str(ml_result.get('original_image', image_path)))
        analyzed_image = cv2.imread(str(ml_result.get('analyzed_image', image_path)))

        if analyzed_image is None:
            analyzed_image = original_image

        # Récupérer les détections YOLO brutes si nécessaire
        if yolo_detections is None:
            yolo_detections = self._get_yolo_detections(ml_result['analyzed_image'], feature_extractor)

        # Créer figure avec 4 subplots si langue détectée, sinon 3
        if ml_result.get('tongue_detected', False):
            fig = plt.figure(figsize=(24, 10))
            num_cols = 4
        else:
            fig = plt.figure(figsize=(20, 10))
            num_cols = 3

        current_col = 1

        # 1. Image originale avec détection langue (si applicable)
        if ml_result.get('tongue_detected', False):
            ax0 = plt.subplot(1, num_cols, current_col)
            ax0.imshow(cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB))
            ax0.set_title('Image Originale + Détection Langue', fontsize=14, fontweight='bold')

            # Afficher bbox de la langue détectée
            if ml_result.get('tongue_bbox'):
                x1, y1, x2, y2 = ml_result['tongue_bbox']
                rect = Rectangle((x1, y1), x2-x1, y2-y1,
                               linewidth=3, edgecolor='lime', facecolor='none')
                ax0.add_patch(rect)
                ax0.text(x1, y1-10, "Langue Détectée", color='white',
                        backgroundcolor='lime', fontsize=10, weight='bold')

            ax0.axis('off')
            current_col += 1

        # 2. Image analysée avec détections YOLO et bounding boxes
        ax1 = plt.subplot(1, num_cols, current_col)
        ax1.imshow(cv2.cvtColor(analyzed_image, cv2.COLOR_BGR2RGB))
        ax1.set_title('Détections YOLO sur Langue', fontsize=14, fontweight='bold')

        # Ajouter bounding boxes avec labels
        for det in yolo_detections:
            bbox = det['bbox']
            x1, y1, x2, y2 = bbox

            color = self._get_color_for_class(det['class_name'])

            # Rectangle de détection
            rect = Rectangle((x1, y1), x2-x1, y2-y1,
                           linewidth=2, edgecolor=color, facecolor='none')
            ax1.add_patch(rect)

            # Label avec confiance
            label = f"{det['class_name']}: {det['confidence']:.2f}"
            ax1.text(x1, y1-5, label, color='white', backgroundcolor=color,
                    fontsize=8, weight='bold')

        ax1.axis('off')
        current_col += 1

        # 3. Cartographie MTC avec zones et détections
        ax2 = plt.subplot(1, num_cols, current_col)
        ax2.set_xlim(0, 1)
        ax2.set_ylim(1, 0)  # Inverser Y pour correspondre à l'image
        ax2.set_aspect('equal')
        ax2.set_title('Cartographie MTC', fontsize=14, fontweight='bold')

        # Dessiner zones MTC
        zone_counts = defaultdict(int)
        for zone_name, zone_info in self.zones.items():
            coords = zone_info['coords']
            color = np.array(zone_info['color']) / 255.0

            # Polygone de zone
            polygon = Polygon(coords, facecolor=color, alpha=0.3,
                            edgecolor='black', linewidth=2)
            ax2.add_patch(polygon)

            # Nom de zone au centre
            cx = np.mean([c[0] for c in coords])
            cy = np.mean([c[1] for c in coords])
            ax2.text(cx, cy, zone_info['name'], ha='center', va='center',
                    fontsize=11, weight='bold',
                    bbox=dict(boxstyle="round,pad=0.3", facecolor='white', alpha=0.8))

        # Projeter détections sur cartographie
        h, w = analyzed_image.shape[:2]
        for det in yolo_detections:
            bbox = det['bbox']
            # Calculer centre de la détection
            cx = ((bbox[0] + bbox[2]) / 2) / w
            cy = ((bbox[1] + bbox[3]) / 2) / h

            # Trouver zone correspondante
            zone = self._find_zone(cx, cy)
            if zone:
                zone_counts[zone] += 1

            # Afficher détection sur cartographie
            color = self._get_color_for_class(det['class_name'])
            ax2.scatter(cx, cy, s=150, c=color, marker='x', linewidths=4, alpha=0.8)

            # Ajouter label près du point
            ax2.text(cx+0.02, cy, det['class_name'][:8], fontsize=8,
                    color=color, weight='bold')

        ax2.set_xlabel('Gauche ← → Droite', fontsize=10)
        ax2.set_ylabel('Avant ← → Arrière', fontsize=10)
        ax2.grid(True, alpha=0.3)
        current_col += 1

        # 4. Diagnostic final avec détails
        ax3 = plt.subplot(1, num_cols, current_col)
        ax3.axis('off')

        prediction = ml_result['prediction']
        confidence = ml_result['confidence']
        classifier = ml_result['classifier_used']

        # Couleurs et messages selon diagnostic
        if prediction == 'healthy':
            color = 'green'
            message = "LANGUE SAINE\nAucun signe pathologique"
        elif prediction == 'early':
            color = 'orange'
            message = "STADE PRECOCE (EBC)\nSignes initiaux détectés"
        else:
            color = 'red'
            message = "STADE AVANCE (ABC)\nSignes pathologiques importants"

        # Titre diagnostic
        ax3.text(0.5, 0.95, 'DIAGNOSTIC MTC HYBRIDE', ha='center', fontsize=16,
                weight='bold', transform=ax3.transAxes)

        # Statut détection langue
        if ml_result.get('tongue_detected'):
            ax3.text(0.5, 0.88, '✅ Langue détectée et extraite', ha='center',
                    fontsize=10, color='green', transform=ax3.transAxes)
        else:
            ax3.text(0.5, 0.88, '⚠️ Analyse sur image complète', ha='center',
                    fontsize=10, color='orange', transform=ax3.transAxes)

        # Résultat principal
        ax3.text(0.5, 0.78, message, ha='center', fontsize=12,
                color=color, weight='bold', transform=ax3.transAxes,
                bbox=dict(boxstyle="round,pad=0.5", facecolor=color, alpha=0.1))

        # Confiance
        ax3.text(0.5, 0.63, f'Confiance: {confidence:.1%}', ha='center',
                fontsize=12, weight='bold', transform=ax3.transAxes)

        # Classificateur utilisé
        ax3.text(0.5, 0.55, f'Classificateur: {classifier}', ha='center',
                fontsize=10, style='italic', transform=ax3.transAxes)

        # Scores détaillés
        y_pos = 0.45
        ax3.text(0.1, y_pos, 'Scores détaillés:', fontsize=11, weight='bold',
                transform=ax3.transAxes)

        for stage, prob in ml_result['probabilities'].items():
            y_pos -= 0.06
            stage_color = 'green' if stage == 'healthy' else 'orange' if stage == 'early' else 'red'
            ax3.text(0.15, y_pos, f'{stage}: {prob:.1%}', fontsize=10,
                    color=stage_color, transform=ax3.transAxes)

        # Localisation par zones
        if zone_counts:
            y_pos -= 0.08
            ax3.text(0.1, y_pos, 'Localisation MTC:', fontsize=11, weight='bold',
                    transform=ax3.transAxes)

            for zone, count in zone_counts.items():
                if count > 0:
                    y_pos -= 0.05
                    zone_name = self.zones[zone]['name']
                    ax3.text(0.15, y_pos, f'{zone_name}: {count}', fontsize=9,
                            transform=ax3.transAxes)

        # Caractéristiques détectées
        if 'detected_features' in ml_result and y_pos > 0.1:
            y_pos -= 0.08
            ax3.text(0.1, y_pos, 'Caractéristiques:', fontsize=11, weight='bold',
                    transform=ax3.transAxes)

            features = ml_result['detected_features']
            for feature, score in list(features.items())[:4]:  # Top 4
                if score > 0 and y_pos > 0.05:
                    y_pos -= 0.04
                    ax3.text(0.15, y_pos, f'{feature}: {score:.2f}', fontsize=8,
                            transform=ax3.transAxes)

        plt.tight_layout()

        # Sauvegarder
        save_path = self.results_dir / f"hybrid_mtc_{Path(image_path).stem}_{datetime.now().strftime('%Y%m%d_%H%M%S')}.png"
        plt.savefig(save_path, dpi=300, bbox_inches='tight')
        plt.close()

        return save_path, zone_counts

    def _get_yolo_detections(self, image_path, feature_extractor=None):
        """Récupère détections YOLO brutes si pas fournies"""
        if feature_extractor:
            _, detections = feature_extractor.extract_features(image_path, return_detections=True)
            return detections
        else:
            # Fallback : essayer de créer un extracteur temporaire
            try:
                from ultralytics import YOLO
                model = YOLO('yolov8s.pt')  # Modèle par défaut
                results = model(image_path, conf=CONFIG['conf_threshold'], verbose=False)

                detections = []
                for r in results:
                    if r.boxes is not None:
                        for box in r.boxes:
                            bbox = box.xyxy[0].cpu().numpy()
                            conf = float(box.conf)
                            cls = int(box.cls)

                            if cls < len(CLASS_NAMES):
                                detection = {
                                    'bbox': bbox,
                                    'confidence': conf,
                                    'class': cls,
                                    'class_name': CLASS_NAMES[cls]
                                }
                                detections.append(detection)
                return detections
            except:
                return []

    def _get_color_for_class(self, class_name):
        """Retourne couleur selon classe"""
        if any(x in class_name for x in ['normal', 'rose', 'salive_normale']):
            return 'green'
        elif any(x in class_name for x in ['pale', 'blanc_mince', 'jaune_mince']):
            return 'orange'
        elif any(x in class_name for x in ['rouge_foncee', 'jaune_epais', 'Ecchymoses']):
            return 'red'
        elif any(x in class_name for x in ['rouge', 'blanc_epais']):
            return 'darkorange'
        elif 'red_dot' in class_name:
            return 'crimson'
        elif 'Fissure' in class_name:
            return 'purple'
        else:
            return 'gray'

    def _find_zone(self, x, y):
        """Trouve zone MTC pour coordonnées"""
        for zone_name, zone_info in self.zones.items():
            if self._point_in_polygon(x, y, zone_info['coords']):
                return zone_name
        return None

    def _point_in_polygon(self, x, y, coords):
        """Test point dans polygone"""
        n = len(coords)
        inside = False
        j = n - 1
        for i in range(n):
            xi, yi = coords[i]
            xj, yj = coords[j]
            if ((yi > y) != (yj > y)) and (x < (xj - xi) * (y - yi) / (yj - yi) + xi):
                inside = not inside
            j = i
        return inside

def main_hybrid():
    """Programme principal système hybride avec détection de langue"""
    print("="*80)
    print("SYSTEME HYBRIDE YOLO + CLASSIFICATION ML - DIAGNOSTIC MTC")
    print("Avec Détection Automatique de Langue")
    print("SMAILI Maya & MORSLI Manel - UMMTO 2024/2025")
    print("="*80)

    # Configuration seeds
    random.seed(CONFIG['random_seed'])
    np.random.seed(CONFIG['random_seed'])

    while True:
        print("\nMENU SYSTEME HYBRIDE")
        print("-"*40)
        print("1. Entraîner système hybride")
        print("2. Diagnostic hybride (image unique)")
        print("3. Diagnostic hybride (lot)")
        print("4. Évaluation comparative")
        print("5. Analyser features importantes")
        print("6. Quitter")
        print("-"*40)

        choice = input("Choix (1-6): ").strip()

        if choice == '1':
            print("\n🔄 ENTRAINEMENT SYSTEME HYBRIDE")

            # Vérifier modèle YOLO
            yolo_model = input("Chemin modèle YOLO pathologies: ").strip()
            if not Path(yolo_model).exists():
                print("❌ Modèle YOLO non trouvé")
                continue

            # Modèle détection langue (optionnel)
            tongue_model = input("Chemin modèle YOLO langue (Enter pour segmentation couleur): ").strip()
            if tongue_model and not Path(tongue_model).exists():
                print("⚠️ Modèle langue non trouvé, utilisation segmentation couleur")
                tongue_model = None

            # Dossier images d'entraînement
            train_folder = input("Dossier images d'entraînement: ").strip()
            if not Path(train_folder).exists():
                print("❌ Dossier non trouvé")
                continue

            # Demander si extraction langue
            extract_tongue = input("Extraire langue avant analyse? (o/n) [o]: ").strip().lower()
            extract_tongue = extract_tongue != 'n'

            try:
                # Initialiser système
                hybrid_system = HybridClassificationSystem(yolo_model, tongue_model)

                # Préparer données
                X, y = hybrid_system.prepare_training_data(train_folder, extract_tongue=extract_tongue)

                if len(X) == 0:
                    print("❌ Aucune donnée d'entraînement valide")
                    continue

                # Entraîner
                results = hybrid_system.train_classifiers(X, y)

                print("\n✅ ENTRAINEMENT TERMINE!")
                print(f"Modèles sauvegardés dans: {hybrid_system.classifier_dir}")

            except Exception as e:
                print(f"❌ ERREUR: {e}")
                import traceback
                traceback.print_exc()

        elif choice == '2':
            print("\n🔍 DIAGNOSTIC HYBRIDE")

            yolo_model = input("Chemin modèle YOLO pathologies: ").strip()
            if not Path(yolo_model).exists():
                print("❌ Modèle YOLO non trouvé")
                continue

            # Modèle détection langue (optionnel)
            tongue_model = input("Chemin modèle YOLO langue (Enter pour segmentation couleur): ").strip()
            if tongue_model and not Path(tongue_model).exists():
                print("⚠️ Modèle langue non trouvé, utilisation segmentation couleur")
                tongue_model = None

            image_path = input("Chemin image: ").strip().strip('"')
            if not Path(image_path).exists():
                print("❌ Image non trouvée")
                continue

            # Demander si extraction langue
            extract_tongue = input("Extraire langue avant analyse? (o/n) [o]: ").strip().lower()
            extract_tongue = extract_tongue != 'n'

            try:
                # Initialiser système
                hybrid_system = HybridClassificationSystem(yolo_model, tongue_model)

                # Diagnostic
                result = hybrid_system.predict(image_path, return_probabilities=True,
                                             extract_tongue=extract_tongue)

                # Afficher résultats
                print("\n" + "="*60)
                print("DIAGNOSTIC HYBRIDE YOLO + ML")
                print("="*60)
                print(f"📸 Image originale: {Path(result['original_image']).name}")
                if result['tongue_detected']:
                    print(f"✅ Langue détectée et extraite")
                    print(f"📸 Image analysée: {Path(result['analyzed_image']).name}")
                else:
                    print(f"⚠️ Pas de langue détectée, analyse sur image complète")

                print(f"\n🎯 Prédiction: {result['prediction'].upper()}")
                print(f"📊 Confiance: {result['confidence']:.2%}")
                print(f"🤖 Classificateur: {result['classifier_used']}")

                print("\n📈 Probabilités détaillées:")
                for class_name, prob in result['probabilities'].items():
                    print(f"  - {class_name}: {prob:.4f} ({prob*100:.2f}%)")

                if 'all_classifier_predictions' in result:
                    print("\n🔄 Prédictions ensemble:")
                    for clf_name, clf_result in result['all_classifier_predictions'].items():
                        print(f"  - {clf_name}: {clf_result['prediction']} ({clf_result['confidence']:.3f})")

                # Visualisation avec cartographie MTC
                visualizer = HybridDiagnosticVisualizer()
                save_path, zone_counts = visualizer.create_comprehensive_visualization(
                    image_path, result['detections'], result, hybrid_system.feature_extractor
                )

                # Afficher informations zones
                if zone_counts:
                    print("\n🗺️ Localisation par zones MTC:")
                    for zone, count in zone_counts.items():
                        if count > 0:
                            zone_name = TONGUE_ZONES[zone]['name']
                            print(f"  - {zone_name}: {count} détection(s)")

                print(f"\n📊 Visualisation complète: {save_path}")

                # Nettoyer fichiers temporaires
                if extract_tongue:
                    cleanup = input("\nNettoyer fichiers temporaires? (o/n) [n]: ").strip().lower()
                    if cleanup == 'o':
                        hybrid_system.tongue_detector.cleanup_temp_files()

            except Exception as e:
                print(f"❌ ERREUR: {e}")
                import traceback
                traceback.print_exc()

        elif choice == '3':
            print("\n📁 DIAGNOSTIC LOT HYBRIDE")

            yolo_model = input("Chemin modèle YOLO pathologies: ").strip()
            if not Path(yolo_model).exists():
                print("❌ Modèle YOLO non trouvé")
                continue

            # Modèle détection langue (optionnel)
            tongue_model = input("Chemin modèle YOLO langue (Enter pour segmentation couleur): ").strip()
            if tongue_model and not Path(tongue_model).exists():
                print("⚠️ Modèle langue non trouvé, utilisation segmentation couleur")
                tongue_model = None

            folder_path = input("Dossier images: ").strip().strip('"')
            if not Path(folder_path).exists():
                print("❌ Dossier non trouvé")
                continue

            # Demander si extraction langue
            extract_tongue = input("Extraire langue avant analyse? (o/n) [o]: ").strip().lower()
            extract_tongue = extract_tongue != 'n'

            try:
                hybrid_system = HybridClassificationSystem(yolo_model, tongue_model)
                images = list(Path(folder_path).glob('*.jpg'))

                if not images:
                    print("❌ Aucune image .jpg trouvée")
                    continue

                print(f"\n🔄 Traitement de {len(images)} images...")

                results_summary = defaultdict(int)
                confidence_scores = defaultdict(list)
                detailed_results = []
                tongue_detection_stats = {'detected': 0, 'not_detected': 0}

                for i, img_path in enumerate(images):
                    print(f"  Progression: {i+1}/{len(images)} - {img_path.name}")

                    result = hybrid_system.predict(img_path, extract_tongue=extract_tongue)
                    prediction = result['prediction']
                    confidence = result['confidence']

                    results_summary[prediction] += 1
                    confidence_scores[prediction].append(confidence)

                    if result['tongue_detected']:
                        tongue_detection_stats['detected'] += 1
                    else:
                        tongue_detection_stats['not_detected'] += 1

                    detailed_results.append({
                        'image': img_path.name,
                        'prediction': prediction,
                        'confidence': confidence,
                        'tongue_detected': result['tongue_detected']
                    })

                # Afficher résumé
                print("\n" + "="*60)
                print("RESUME DIAGNOSTIC LOT HYBRIDE")
                print("="*60)

                if extract_tongue:
                    print(f"\nDétection langue:")
                    print(f"  - Langues détectées: {tongue_detection_stats['detected']}")
                    print(f"  - Non détectées: {tongue_detection_stats['not_detected']}")

                total = len(images)
                print("\nRésultats diagnostic:")
                for stage in ['healthy', 'early', 'advanced']:
                    count = results_summary.get(stage, 0)
                    avg_conf = np.mean(confidence_scores[stage]) if confidence_scores[stage] else 0
                    percentage = (count / total) * 100

                    print(f"{stage.upper()}: {count} images ({percentage:.1f}%)")
                    print(f"  Confiance moyenne: {avg_conf:.2%}")

                # Sauvegarder résultats détaillés
                df = pd.DataFrame(detailed_results)
                results_file = Path(CONFIG['results_dir']) / f"batch_results_{datetime.now().strftime('%Y%m%d_%H%M%S')}.csv"
                df.to_csv(results_file, index=False)
                print(f"\n📄 Résultats détaillés: {results_file}")

                # Nettoyer fichiers temporaires
                if extract_tongue:
                    cleanup = input("\nNettoyer fichiers temporaires? (o/n) [o]: ").strip().lower()
                    if cleanup != 'n':
                        hybrid_system.tongue_detector.cleanup_temp_files()

            except Exception as e:
                print(f"❌ ERREUR: {e}")

        elif choice == '4':
            print("\n📊 EVALUATION COMPARATIVE")
            print("Fonctionnalité à implémenter...")
            # Ici vous pouvez comparer performances avec/sans extraction langue

        elif choice == '5':
            print("\n🎯 ANALYSE FEATURES IMPORTANTES")
            print("Fonctionnalité à implémenter...")
            # Ici vous pouvez analyser impact de l'extraction langue sur features

        elif choice == '6':
            print("\n👋 Au revoir! Merci d'avoir utilisé le système hybride!")
            break

if __name__ == "__main__":
    try:
        main_hybrid()
    except KeyboardInterrupt:
        print("\n\n⚠️ Interruption utilisateur")
    except Exception as e:
        print(f"\n❌ ERREUR CRITIQUE: {e}")
        import traceback
        traceback.print_exc()

SYSTEME HYBRIDE YOLO + CLASSIFICATION ML - DIAGNOSTIC MTC
Avec Détection Automatique de Langue
SMAILI Maya & MORSLI Manel - UMMTO 2024/2025

MENU SYSTEME HYBRIDE
----------------------------------------
1. Entraîner système hybride
2. Diagnostic hybride (image unique)
3. Diagnostic hybride (lot)
4. Évaluation comparative
5. Analyser features importantes
6. Quitter
----------------------------------------
Choix (1-6): 1

🔄 ENTRAINEMENT SYSTEME HYBRIDE
Chemin modèle YOLO pathologies: /content/mon_modele.pt
Chemin modèle YOLO langue (Enter pour segmentation couleur): /content/bestYolo8.pt
Dossier images d'entraînement: /content/train/train/images
Extraire langue avant analyse? (o/n) [o]: o
PREPARATION DES DONNEES D'ENTRAINEMENT
Images trouvées: 80
Distribution des labels:
  - healthy: 38
  - advanced: 19
  - early: 23

Extraction des features...
  Progression: 0/80
  Progression: 10/80
  Progression: 20/80
  Progression: 30/80
  Progression: 40/80
  Progression: 50/80
  Progression: 60/80