## üì¶ Imports et Configuration

In [1]:
try:
    import nltk
    nltk.download('punkt', quiet=True)
    nltk.download('stopwords', quiet=True)
    nltk.download('punkt_tab', quiet=True)
    print("‚úÖ NLTK configur√©")
except Exception as e:
    print(f"‚ö†Ô∏è NLTK setup warning: {e}")

‚úÖ NLTK configur√©


In [1]:
import os
import sys
from pathlib import Path
import pandas as pd
import numpy as np
import pickle
from datetime import datetime
import warnings
warnings.filterwarnings('ignore')

In [2]:
from deepchecks.nlp import TextData
from deepchecks.nlp.suites import (
    data_integrity,
    train_test_validation,
    model_evaluation
)
from deepchecks.nlp.checks import (
    TextPropertyOutliers,
    UnknownTokens,
    ConflictingLabels,
    LabelDrift,
    PropertyDrift,
    PredictionDrift,
    TextDuplicates
)


In [3]:
# ML
from sklearn.metrics import (
    accuracy_score, precision_score, recall_score, f1_score,
    classification_report, confusion_matrix
)
from sklearn.model_selection import train_test_split
import joblib

In [4]:
# Configuration des chemins
BASE_DIR = Path.cwd().parent if Path.cwd().name == 'testing' else Path.cwd()
PROCESSOR_DIR = BASE_DIR / 'processors'
MODELS_DIR = BASE_DIR / 'models'
TESTING_DIR = BASE_DIR / 'testing'
TESTING_DIR.mkdir(parents=True, exist_ok=True)

print("="*80)
print("üîç DEEPCHECKS NLP - VALIDATION MLOps Election")
print("="*80)
print(f"üìÖ Date: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
print(f"üìÅ Base: {BASE_DIR}")
print()

üîç DEEPCHECKS NLP - VALIDATION MLOps Election
üìÖ Date: 2025-12-16 17:53:49
üìÅ Base: c:\Users\user\Downloads\Notre_Mlops



---

## üì• Chargement des Donn√©es

In [5]:
def load_preprocessed_data():
    """Charge les donn√©es preprocess√©es"""
    print("üì¶ Chargement des donn√©es")
    print("-" * 80)
    
    data_path = PROCESSOR_DIR / 'preprocessed_data.pkl'
    if not data_path.exists():
        raise FileNotFoundError(
            f"Donn√©es non trouv√©es: {data_path}\n"
            "Ex√©cutez: python scripts/preprocess.py"
        )
    
    with open(data_path, 'rb') as f:
        data = pickle.load(f)
    
    print(f"‚úÖ Donn√©es charg√©es:")
    print(f"   Train: {data['X_train'].shape}")
    print(f"   Val:   {data['X_val'].shape}")
    print(f"   Test:  {data['X_test'].shape}")
    print()
    
    return data

In [6]:
def load_cleaned_texts():
    """Charge les textes nettoy√©s"""
    texts_path = PROCESSOR_DIR / 'cleaned_texts.pkl'
    if not texts_path.exists():
        raise FileNotFoundError(f"Textes non trouv√©s: {texts_path}")
    
    with open(texts_path, 'rb') as f:
        data = pickle.load(f)
    
    return data['cleaned'], data['labels']

In [7]:
def load_best_model():
    """Charge le meilleur mod√®le ML"""
    print("ü§ñ Chargement du meilleur mod√®le")
    print("-" * 80)
    
    # Essayer de trouver le meilleur mod√®le
    model_files = list(MODELS_DIR.glob('*.pkl'))
    
    if not model_files:
        print("‚ö†Ô∏è  Aucun mod√®le trouv√© - chargement du LogisticRegression par d√©faut")
        model_path = MODELS_DIR / 'model_lr.pkl'
    else:
        # Pour l'exemple, on prend Logistic Regression
        model_path = MODELS_DIR / 'model_lr.pkl'
        if not model_path.exists():
            model_path = model_files[0]
    
    if model_path.exists():
        model = joblib.load(model_path)
        print(f"‚úÖ Mod√®le charg√©: {model_path.name}")
        print(f"   Type: {type(model).__name__}")
        print()
        return model
    else:
        raise FileNotFoundError("Aucun mod√®le disponible")

In [8]:
# Charger les donn√©es
data = load_preprocessed_data()
texts, labels = load_cleaned_texts()
model = load_best_model()

üì¶ Chargement des donn√©es
--------------------------------------------------------------------------------
‚úÖ Donn√©es charg√©es:
   Train: (2403, 5000)
   Val:   (515, 5000)
   Test:  (516, 5000)

ü§ñ Chargement du meilleur mod√®le
--------------------------------------------------------------------------------
‚úÖ Mod√®le charg√©: model_gradient_boosting.pkl
   Type: GradientBoostingClassifier



---

## üìù Cr√©ation des TextData pour Deepchecks NLP

In [9]:
def create_text_data(texts_list, labels_list, split_name='train'):
    """
    Cr√©e un TextData Deepchecks NLP √† partir de textes et labels
    """
    print(f"üìù Cr√©ation TextData NLP ({split_name})")
    print("-" * 80)
    
    # Cr√©er le TextData Deepchecks
    text_data = TextData(
        raw_text=texts_list,
        label=labels_list,
        task_type='text_classification',
        name=f'{split_name}_dataset'
    )
    
    print(f"‚úÖ TextData cr√©√©:")
    print(f"   Nombre de textes: {len(texts_list)}")
    print(f"   Distribution labels: {pd.Series(labels_list).value_counts().to_dict()}")
    print()
    
    return text_data

In [10]:
# Recr√©er le m√™me split que preprocess.py
df = pd.DataFrame({'texts': texts, 'labels': labels})
train_df, temp_df = train_test_split(
    df, test_size=0.30, random_state=42, stratify=df['labels']
)
val_df, test_df = train_test_split(
    temp_df, test_size=0.5, random_state=42, stratify=temp_df['labels']
)

print(f"Split effectu√©:")
print(f"  Train: {len(train_df)} ({len(train_df)/len(df)*100:.1f}%)")
print(f"  Val:   {len(val_df)} ({len(val_df)/len(df)*100:.1f}%)")
print(f"  Test:  {len(test_df)} ({len(test_df)/len(df)*100:.1f}%)")
print()

Split effectu√©:
  Train: 2403 (70.0%)
  Val:   515 (15.0%)
  Test:  516 (15.0%)



In [12]:
# Cr√©er les TextData NLP
train_text_data = create_text_data(
    train_df['texts'].tolist(), 
    train_df['labels'].tolist(), 
    'train'
)
test_text_data = create_text_data(
    test_df['texts'].tolist(), 
    test_df['labels'].tolist(), 
    'test'
)

üìù Cr√©ation TextData NLP (train)
--------------------------------------------------------------------------------
‚úÖ TextData cr√©√©:
   Nombre de textes: 2403
   Distribution labels: {0: 1233, 1: 1170}

üìù Cr√©ation TextData NLP (test)
--------------------------------------------------------------------------------
‚úÖ TextData cr√©√©:
   Nombre de textes: 516
   Distribution labels: {0: 265, 1: 251}



In [13]:
# Aper√ßu des TextData
print("üìä Aper√ßu des TextData:")
print(f"Train: {train_text_data}")
print(f"Test: {test_text_data}")

üìä Aper√ßu des TextData:
Train: <deepchecks.nlp.text_data.TextData object at 0x0000026E6F406770>
Test: <deepchecks.nlp.text_data.TextData object at 0x0000026E0EB21FF0>


---

## üìä NIVEAU 1 : TEXT DATA INTEGRITY

### V√©rifications d'int√©grit√© des donn√©es textuelles NLP natives

In [14]:
def run_text_integrity_checks(train_data, test_data):
    """
    NIVEAU 1: V√©rifications d'int√©grit√© des donn√©es textuelles (NLP natif)
    """
    print("\n" + "="*80)
    print("üìä NIVEAU 1: TEXT DATA INTEGRITY (NLP Natif)")
    print("="*80)
    
    # Suite d'int√©grit√© NLP
    integrity_suite = data_integrity()
    
    print("\nüîç Checks NLP ex√©cut√©s:")
    print("   1. Text Property Outliers (longueur, mots rares, etc.)")
    print("   2. Unknown Tokens (tokens jamais vus)")
    print("   3. Text Duplicates (textes dupliqu√©s)")
    print("   4. Conflicting Labels (m√™me texte, labels diff√©rents)")
    print("   5. Property Label Correlation")
    
    # Ex√©cuter la suite
    print("\n‚è≥ Ex√©cution des checks d'int√©grit√© NLP...")
    result = integrity_suite.run(train_data, test_data)
    
    # Sauvegarder le rapport
    integrity_report_path = TESTING_DIR / 'deepchecks_nlp_integrity_report.html'
    result.save_as_html(str(integrity_report_path), as_widget=False)
    
    print(f"‚úÖ Rapport d'int√©grit√© NLP sauvegard√©: {integrity_report_path.name}")
    
    # R√©sum√© des r√©sultats
    print("\nüìà R√©sum√© Int√©grit√© NLP:")
    passed = 0
    total = 0
    for check_result in result.results:
        # Skip CheckFailure objects
        if hasattr(check_result, 'passed_conditions'):
            total += 1
            if check_result.passed_conditions():
                passed += 1
    
    if total > 0:
        print(f"   Checks r√©ussies: {passed}/{total}")
    else:
        print(f"   Checks ex√©cut√©s: {len(result.results)}")
    
    # Statistiques texte
    train_texts = train_data.text
    test_texts = test_data.text
    
    print("\nüìù Statistiques Texte:")
    print(f"   Train - Longueur moyenne: {np.mean([len(t) for t in train_texts]):.1f} caract√®res")
    print(f"   Test  - Longueur moyenne: {np.mean([len(t) for t in test_texts]):.1f} caract√®res")
    print(f"   Train - Mots moyens: {np.mean([len(t.split()) for t in train_texts]):.1f}")
    print(f"   Test  - Mots moyens: {np.mean([len(t.split()) for t in test_texts]):.1f}")
    
    return result


In [15]:
# Ex√©cuter les checks d'int√©grit√© NLP
integrity_result = run_text_integrity_checks(train_text_data, test_text_data)


üìä NIVEAU 1: TEXT DATA INTEGRITY (NLP Natif)

üîç Checks NLP ex√©cut√©s:
   1. Text Property Outliers (longueur, mots rares, etc.)
   2. Unknown Tokens (tokens jamais vus)
   3. Text Duplicates (textes dupliqu√©s)
   4. Conflicting Labels (m√™me texte, labels diff√©rents)
   5. Property Label Correlation

‚è≥ Ex√©cution des checks d'int√©grit√© NLP...


‚úÖ Rapport d'int√©grit√© NLP sauvegard√©: deepchecks_nlp_integrity_report.html

üìà R√©sum√© Int√©grit√© NLP:
   Checks r√©ussies: 9/10

üìù Statistiques Texte:
   Train - Longueur moyenne: 82.3 caract√®res
   Test  - Longueur moyenne: 83.1 caract√®res
   Train - Mots moyens: 15.3
   Test  - Mots moyens: 15.4


In [16]:
# Afficher le r√©sum√© interactif
integrity_result

Accordion(children=(VBox(children=(HTML(value='\n<h1 id="summary_AAMSDME7R1GOP6HAVVG0CIGZI">Data Integrity Sui‚Ä¶

---

## üìà NIVEAU 2 : TRAIN-TEST DRIFT

### D√©tection de drift s√©mantique et de distribution

In [17]:
def run_nlp_drift_checks(train_data, test_data):
    """
    NIVEAU 2: D√©tection de drift NLP (distribution, propri√©t√©s, labels)
    """
    print("\n" + "="*80)
    print("üìä NIVEAU 2: NLP TRAIN-TEST DRIFT")
    print("="*80)
    
    # Calculer les propri√©t√©s built-in pour les checks
    print("\n‚è≥ Calcul des propri√©t√©s textuelles...")
    train_data.calculate_builtin_properties()
    test_data.calculate_builtin_properties()
    print("‚úÖ Propri√©t√©s calcul√©es")
    
    # Suite de validation train-test NLP
    drift_suite = train_test_validation()
    
    print("\nüîç Checks de Drift NLP (suite compl√®te):")
    print("   1. Label Drift (distribution des labels)")
    print("   2. Property Drift (longueur texte, vocabulaire)")
    print("   3. Text Embeddings Drift")
    print("   4. Train Test Samples Mix")
    
    # Ex√©cuter la suite
    print("\n‚è≥ Ex√©cution des checks de drift NLP...")
    result = drift_suite.run(train_data, test_data)
    
    # Sauvegarder le rapport
    drift_report_path = TESTING_DIR / 'deepchecks_nlp_drift_report.html'
    result.save_as_html(str(drift_report_path), as_widget=False)
    
    print(f"‚úÖ Rapport de drift NLP sauvegard√©: {drift_report_path.name}")
    
    # R√©sum√© des r√©sultats
    print("\nüìà R√©sum√© Drift NLP:")
    passed = 0
    total = 0
    for check_result in result.results:
        if hasattr(check_result, 'passed_conditions'):
            total += 1
            if check_result.passed_conditions():
                passed += 1
    
    if total > 0:
        print(f"   Checks r√©ussies: {passed}/{total}")
    else:
        print(f"   Checks ex√©cut√©s: {len(result.results)}")
    
    # Statistiques de distribution
    print("\nüìä Distribution des Labels:")
    train_labels = train_data.label
    test_labels = test_data.label
    
    print("   Train:")
    print(pd.Series(train_labels).value_counts(normalize=True).to_string())
    print("   Test:")
    print(pd.Series(test_labels).value_counts(normalize=True).to_string())
    
    return result


In [18]:
# Ex√©cuter les checks de drift NLP
drift_result = run_nlp_drift_checks(train_text_data, test_text_data)


üìä NIVEAU 2: NLP TRAIN-TEST DRIFT

‚è≥ Calcul des propri√©t√©s textuelles...


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 151/151 [00:01<00:00, 120.78it/s]
100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 33/33 [00:00<00:00, 133.61it/s]

‚úÖ Propri√©t√©s calcul√©es

üîç Checks de Drift NLP (suite compl√®te):
   1. Label Drift (distribution des labels)
   2. Property Drift (longueur texte, vocabulaire)
   3. Text Embeddings Drift
   4. Train Test Samples Mix

‚è≥ Ex√©cution des checks de drift NLP...







‚úÖ Rapport de drift NLP sauvegard√©: deepchecks_nlp_drift_report.html

üìà R√©sum√© Drift NLP:
   Checks r√©ussies: 3/3

üìä Distribution des Labels:
   Train:
0    0.513109
1    0.486891
   Test:
0    0.513566
1    0.486434


In [19]:
# Afficher le r√©sum√© interactif du drift
drift_result

Accordion(children=(VBox(children=(HTML(value='\n<h1 id="summary_92PAEAYRM6PEO8FB8NMJY548J">Train Test Validat‚Ä¶

---

## üèÜ NIVEAU 3 : MODEL PERFORMANCE NLP

### √âvaluation de la performance du mod√®le avec m√©triques NLP

In [20]:
def run_nlp_model_performance(model, train_data, test_data, X_train, X_test):
    """
    NIVEAU 3: √âvaluation de la performance du mod√®le NLP
    """
    print("\n" + "="*80)
    print("üìä NIVEAU 3: MODEL PERFORMANCE NLP")
    print("="*80)
    
    # Faire les pr√©dictions
    print("\nüîÆ G√©n√©ration des pr√©dictions...")
    y_train_pred = model.predict(X_train)
    y_test_pred = model.predict(X_test)
    
    # Calculer les probabilit√©s si possible
    try:
        y_train_proba = model.predict_proba(X_train)
        y_test_proba = model.predict_proba(X_test)
        has_proba = True
    except:
        y_train_proba = None
        y_test_proba = None
        has_proba = False

    # Normaliser y_true et y_pred en numpy arrays
    y_train_true = np.array(train_data.label)
    y_test_true = np.array(test_data.label)

    # Align prediction types with true labels
    def _align_preds_to_labels(preds, labels):
        preds_arr = np.array(preds).ravel()
        if len(labels) == 0:
            return preds_arr
        sample = labels[0]
        # If labels are strings, cast preds to str
        if isinstance(sample, str):
            return np.array([str(p) for p in preds_arr])
        # If labels are ints, try to cast preds to int
        if isinstance(sample, (int, np.integer)):
            try:
                return np.array([int(p) for p in preds_arr])
            except Exception:
                return preds_arr
        return preds_arr

    y_train_pred = _align_preds_to_labels(y_train_pred, y_train_true)
    y_test_pred = _align_preds_to_labels(y_test_pred, y_test_true)

    # Cr√©er TextData pour les checks de performance
    train_data_with_pred = TextData(
        raw_text=train_data.text,
        label=train_data.label,
        task_type='text_classification',
        name='train_with_predictions'
    )
    test_data_with_pred = TextData(
        raw_text=test_data.text,
        label=test_data.label,
        task_type='text_classification',
        name='test_with_predictions'
    )
    
    print("\n‚úÖ Pr√©dictions g√©n√©r√©es")
    print(f"   Train predictions shape: {y_train_pred.shape}")
    print(f"   Test predictions shape: {y_test_pred.shape}")
    
    # Calculer les propri√©t√©s pour les checks de performance
    print("\n‚è≥ Calcul des propri√©t√©s textuelles pour l'√©valuation...")
    train_data_with_pred.calculate_builtin_properties()
    test_data_with_pred.calculate_builtin_properties()
    print("‚úÖ Propri√©t√©s calcul√©es")
    
    # Suite d'√©valuation du mod√®le NLP
    performance_suite = model_evaluation()
    
    print("\nüîç Checks de Performance (suite compl√®te):")
    print("   1. Prediction Drift")
    print("   2. Train Test Performance")
    print("   3. Property Segments Performance")
    print("   4. Metrics sklearn (int√©gr√©s)")
    
    # Ex√©cuter la suite avec pr√©dictions pass√©es en arguments
    print("\n‚è≥ Ex√©cution de la suite d'√©valuation...")
    try:
        # Pass predictions directly to the run() method!
        performance_result = performance_suite.run(
            train_dataset=train_data_with_pred, 
            test_dataset=test_data_with_pred,
            train_predictions=list(y_train_pred),
            test_predictions=list(y_test_pred),
            train_probabilities=y_train_proba if has_proba else None,
            test_probabilities=y_test_proba if has_proba else None
        )
        print("‚úÖ Suite d'√©valuation ex√©cut√©e")
    except Exception as e:
        print(f"   ‚ö†Ô∏è  Suite d'√©valuation erreur: {str(e)[:150]}")
        print("   ‚ÑπÔ∏è  Les m√©triques de performance sont calcul√©es ci-dessous")
        performance_result = None
    
    # M√©triques custom
    print("\nüèÜ M√©triques du Mod√®le:")
    
    # Determine pos_label based on label type
    sample_label = train_data.label[0]
    if isinstance(sample_label, str):
        pos_label = '1'
    else:
        pos_label = 1
    
    # Train metrics
    train_acc = accuracy_score(train_data.label, y_train_pred)
    train_f1 = f1_score(train_data.label, y_train_pred, average='binary', pos_label=pos_label)
    
    # Test metrics
    test_acc = accuracy_score(test_data.label, y_test_pred)
    test_f1 = f1_score(test_data.label, y_test_pred, average='binary', pos_label=pos_label)
    test_precision = precision_score(test_data.label, y_test_pred, average='binary', pos_label=pos_label)
    test_recall = recall_score(test_data.label, y_test_pred, average='binary', pos_label=pos_label)
    
    print(f"   Train Accuracy: {train_acc:.4f}")
    print(f"   Train F1:       {train_f1:.4f}")
    print(f"   Test Accuracy:  {test_acc:.4f}")
    print(f"   Test F1:        {test_f1:.4f}")
    print(f"   Test Precision: {test_precision:.4f}")
    print(f"   Test Recall:    {test_recall:.4f}")
    
    # Overfitting check
    overfit_gap = train_acc - test_acc
    print(f"\n‚ö†Ô∏è  √âcart Train/Test: {overfit_gap:.4f}")
    if overfit_gap > 0.1:
        print("   ‚ö†Ô∏è  ATTENTION: Possible overfitting d√©tect√©!")
    else:
        print("   ‚úÖ Pas d'overfitting majeur")
    
    # Confusion matrix
    cm = confusion_matrix(test_data.label, y_test_pred)
    print(f"\nüìä Matrice de Confusion (Test):")
    print(cm)
    
    # Classification report
    print(f"\nüìã Classification Report (Test):")
    print(classification_report(test_data.label, y_test_pred, 
                                target_names=['Classe 0', 'Classe 1'], 
                                digits=4))
    
    # Sauvegarder le rapport
    performance_report_path = TESTING_DIR / 'deepchecks_nlp_performance_report.html'
    if performance_result:
        performance_result.save_as_html(str(performance_report_path), as_widget=False)
        print(f"\n‚úÖ Rapport de performance NLP sauvegard√©: {performance_report_path.name}")
    
    return performance_result, {
        'train_acc': train_acc,
        'test_acc': test_acc,
        'test_f1': test_f1,
        'test_precision': test_precision,
        'test_recall': test_recall,
        'overfit_gap': overfit_gap
    }, train_data_with_pred, test_data_with_pred


In [21]:
# Ex√©cuter les checks de performance NLP
performance_result, metrics, train_data_with_pred, test_data_with_pred = run_nlp_model_performance(
    model, train_text_data, test_text_data,
    data['X_train'], data['X_test']
)


üìä NIVEAU 3: MODEL PERFORMANCE NLP

üîÆ G√©n√©ration des pr√©dictions...

‚úÖ Pr√©dictions g√©n√©r√©es
   Train predictions shape: (2403,)
   Test predictions shape: (516,)

‚è≥ Calcul des propri√©t√©s textuelles pour l'√©valuation...


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 151/151 [00:01<00:00, 130.36it/s]
100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 33/33 [00:00<00:00, 145.16it/s]


‚úÖ Propri√©t√©s calcul√©es

üîç Checks de Performance (suite compl√®te):
   1. Prediction Drift
   2. Train Test Performance
   3. Property Segments Performance
   4. Metrics sklearn (int√©gr√©s)

‚è≥ Ex√©cution de la suite d'√©valuation...


‚úÖ Suite d'√©valuation ex√©cut√©e

üèÜ M√©triques du Mod√®le:
   Train Accuracy: 0.8556
   Train F1:       0.8436
   Test Accuracy:  0.7345
   Test F1:        0.7079
   Test Precision: 0.7615
   Test Recall:    0.6614

‚ö†Ô∏è  √âcart Train/Test: 0.1211
   ‚ö†Ô∏è  ATTENTION: Possible overfitting d√©tect√©!

üìä Matrice de Confusion (Test):
[[213  52]
 [ 85 166]]

üìã Classification Report (Test):
              precision    recall  f1-score   support

    Classe 0     0.7148    0.8038    0.7567       265
    Classe 1     0.7615    0.6614    0.7079       251

    accuracy                         0.7345       516
   macro avg     0.7381    0.7326    0.7323       516
weighted avg     0.7375    0.7345    0.7329       516


‚úÖ Rapport de performance NLP sauvegard√©: deepchecks_nlp_performance_report.html


In [22]:
# Debug: Check predictions and performance results
print("\nüîç DEBUG: Checking model performance results...")
print(f"Performance result type: {type(performance_result)}")
print(f"Train data with predictions type: {type(train_data_with_pred)}")
print(f"Test data with predictions type: {type(test_data_with_pred)}")

# Check if predictions are accessible
print(f"\n‚úÖ Train data attributes with 'pred': {[attr for attr in dir(train_data_with_pred) if 'pred' in attr.lower()]}")
print(f"‚úÖ Test data attributes with 'pred': {[attr for attr in dir(test_data_with_pred) if 'pred' in attr.lower()]}")

# Display performance result
print("\nüìä Performance Result:")
performance_result



üîç DEBUG: Checking model performance results...
Performance result type: <class 'deepchecks.core.suite.SuiteResult'>
Train data with predictions type: <class 'deepchecks.nlp.text_data.TextData'>
Test data with predictions type: <class 'deepchecks.nlp.text_data.TextData'>

‚úÖ Train data attributes with 'pred': []
‚úÖ Test data attributes with 'pred': []

üìä Performance Result:


Accordion(children=(VBox(children=(HTML(value='\n<h1 id="summary_1PNT2SBEX0WRQ38CZ14M23YW1">Model Evaluation S‚Ä¶

---

## üìä R√©sum√© Final

In [23]:
print("\n" + "="*80)
print("‚úÖ VALIDATION DEEPCHECKS NLP TERMIN√âE")
print("="*80)

print("\nüìÇ Rapports NLP g√©n√©r√©s:")
print(f"   1. {TESTING_DIR / 'deepchecks_nlp_integrity_report.html'}")
print(f"   2. {TESTING_DIR / 'deepchecks_nlp_drift_report.html'} (checks individuels)")
print(f"   3. {TESTING_DIR / 'deepchecks_nlp_performance_report.html'}")

print("\nüèÜ M√©triques Finales:")
for key, value in metrics.items():
    print(f"   {key}: {value:.4f}")

print("\nüéØ Type de validation: DEEPCHECKS NLP NATIF")
print("   ‚úÖ TextData utilis√© (pas DataFrame tabular)")
print("   ‚úÖ Analyse s√©mantique du texte brut")
print("   ‚úÖ Drift de propri√©t√©s textuelles")
print("   ‚úÖ D√©tection outliers NLP")


‚úÖ VALIDATION DEEPCHECKS NLP TERMIN√âE

üìÇ Rapports NLP g√©n√©r√©s:
   1. c:\Users\user\Downloads\Notre_Mlops\testing\deepchecks_nlp_integrity_report.html
   2. c:\Users\user\Downloads\Notre_Mlops\testing\deepchecks_nlp_drift_report.html (checks individuels)
   3. c:\Users\user\Downloads\Notre_Mlops\testing\deepchecks_nlp_performance_report.html

üèÜ M√©triques Finales:
   train_acc: 0.8556
   test_acc: 0.7345
   test_f1: 0.7079
   test_precision: 0.7615
   test_recall: 0.6614
   overfit_gap: 0.1211

üéØ Type de validation: DEEPCHECKS NLP NATIF
   ‚úÖ TextData utilis√© (pas DataFrame tabular)
   ‚úÖ Analyse s√©mantique du texte brut
   ‚úÖ Drift de propri√©t√©s textuelles
   ‚úÖ D√©tection outliers NLP


### üí° Recommandations

In [24]:
print("üí° Recommandations:")

if metrics['overfit_gap'] > 0.1:
    print("   ‚ö†Ô∏è  Overfitting d√©tect√© - envisager:")
    print("      ‚Ä¢ Augmentation des donn√©es")
    print("      ‚Ä¢ R√©gularisation plus forte")
    print("      ‚Ä¢ R√©duction de la complexit√© du mod√®le")
    print()

if metrics['test_f1'] < 0.7:
    print("   ‚ö†Ô∏è  F1-Score faible - envisager:")
    print("      ‚Ä¢ Features NLP suppl√©mentaires (n-grams, embeddings)")
    print("      ‚Ä¢ Fine-tuning TunBERT")
    print("      ‚Ä¢ Nettoyage des donn√©es")
    print()

if metrics['overfit_gap'] <= 0.1 and metrics['test_f1'] >= 0.7:
    print("   ‚úÖ Mod√®le performant et bien g√©n√©ralis√©!")
    print("      ‚Ä¢ Pr√™t pour la production")
    print("      ‚Ä¢ Consid√©rer le d√©ploiement")
    print()

print("\nüîó Ouvrez les rapports HTML pour visualiser les d√©tails")
print("="*80)

üí° Recommandations:
   ‚ö†Ô∏è  Overfitting d√©tect√© - envisager:
      ‚Ä¢ Augmentation des donn√©es
      ‚Ä¢ R√©gularisation plus forte
      ‚Ä¢ R√©duction de la complexit√© du mod√®le


üîó Ouvrez les rapports HTML pour visualiser les d√©tails


---

## üìö Documentation

Pour plus de d√©tails sur les validations NLP, consultez :
- [DEEPCHECKS_VALIDATIONS.md](DEEPCHECKS_VALIDATIONS.md)
- [Deepchecks NLP Documentation](https://docs.deepchecks.com/stable/nlp/auto_checks/index.html)

### üÜï Diff√©rences avec l'approche Tabular

| Aspect | Tabular (ancienne) | NLP (nouvelle) |
|--------|-------------------|----------------|
| **Import** | `deepchecks.tabular` | `deepchecks.nlp` |
| **Donn√©es** | `Dataset(df)` | `TextData(raw_text=...)` |
| **Input** | Features num√©riques extraites | **Texte brut** |
| **Checks** | `data_integrity()` | `text_data_integrity()` |
| **Analyse** | Statistiques colonnes | **Analyse s√©mantique** |
| **Drift** | Feature drift | **Property drift + vocabulaire** |