In [None]:
!pip install pytest



In [None]:
# Si pas encore monté
# from google.colab import drive
# drive.mount('/content/drive')

In [None]:
# Ajouter le chemin vers nos modules personnalisés
import sys
sys.path.append('/content/drive/MyDrive/projet-datascience/src/preprocessing')

In [None]:
import os

file_path = '/content/drive/MyDrive/projet-datascience/src/preprocessing/custom_transformers.py'
print(f"\n\nLe fichier existe ? {os.path.exists(file_path)}")



Le fichier existe ? True


In [None]:
# Import des transformers personnalisés
from custom_transformers import (
    DataCleaningTransformer,
    FeatureEngineeringTransformer,
    SmartImputerTransformer,
    CustomEncodingTransformer,
    MultiStrategyScaler
)

In [None]:
import warnings

warnings.filterwarnings("ignore", category=FutureWarning, module='sklearn.pipeline')

In [None]:
"""
Tests unitaires pour les transformers personnalisés du projet.

Usage:
    pytest test_transformers.py -v
"""

import pytest
import pandas as pd
import numpy as np
from sklearn.pipeline import Pipeline


# ============================================
# FIXTURES (données de test)
# ============================================

@pytest.fixture
def sample_data_raw():
    """Données brutes avec valeurs manquantes."""
    return pd.DataFrame({
        'Age': [25, 30, np.nan, 40],
        'Salaire_Annuel': [50000, 60000, 55000, 70000],
        'Epargne_Totale': [5000, 12000, 8000, 15000],
        'Score_Credit_Externe': [50, 75, 60, 80],
        'Niveau_Etude': ['Licence', 'Master', 'Bac', 'Doctorat'],
        'Ville': ['Paris', 'Lyon', 'Marseille', 'Paris']
    })


@pytest.fixture
def sample_data_with_intrus():
    """Données avec valeurs intruses."""
    return pd.DataFrame({
        'Age': [25, '-', '?', 40],
        'Salaire_Annuel': [50000, 60000, 'null', 70000],
        'Score_Credit_Externe': [50, 75, 'none', 80],
        'Niveau_Etude': ['Licence', 'Master', '--', 'Doctorat'],
        'Ville': ['Paris', 'Lyon', '', 'Paris']
    })


@pytest.fixture
def config_ordinale():
    """Configuration pour variables ordinales."""
    return {'Niveau_Etude': ['bac', 'licence', 'master', 'doctorat']}


# ============================================
# TEST 1 : DataCleaningTransformer
# ============================================

class TestDataCleaningTransformer:
    """Tests pour le nettoyage des données."""

    def test_cleaning_replaces_intrus_with_nan(self, sample_data_with_intrus):
        """Vérifie que les valeurs intruses sont remplacées par NaN."""
        from transformers import DataCleaningTransformer

        cleaner = DataCleaningTransformer()
        result = cleaner.fit_transform(sample_data_with_intrus)

        # Vérifier que les intrus sont devenus NaN
        assert pd.isna(result['Age'].iloc[1])
        assert pd.isna(result['Salaire_Annuel'].iloc[2])
        assert pd.isna(result['Score_Credit_Externe'].iloc[2])
        assert pd.isna(result['Niveau_Etude'].iloc[2])
        assert pd.isna(result['Ville'].iloc[2])

    def test_cleaning_preserves_valid_values(self, sample_data_raw):
        """Vérifie que les valeurs valides sont préservées."""
        from transformers import DataCleaningTransformer

        cleaner = DataCleaningTransformer()
        result = cleaner.fit_transform(sample_data_raw)

        # Les valeurs valides doivent rester inchangées
        assert result['Age'].iloc[0] == 25
        assert result['Salaire_Annuel'].iloc[1] == 60000

    def test_cleaning_is_stateless(self, sample_data_with_intrus):
        """Vérifie que le transformer n'a pas d'état (fit ne change rien)."""
        from transformers import DataCleaningTransformer

        cleaner = DataCleaningTransformer()

        # Fit puis transform
        cleaner.fit(sample_data_with_intrus)
        result1 = cleaner.transform(sample_data_with_intrus)

        # Transform direct sans fit
        result2 = cleaner.transform(sample_data_with_intrus)

        pd.testing.assert_frame_equal(result1, result2)


# ============================================
# TEST 2 : FeatureEngineeringTransformer
# ============================================

class TestFeatureEngineeringTransformer:
    """Tests pour le feature engineering."""

    def test_creates_ratio_column(self, sample_data_raw):
        """Vérifie que le ratio Epargne/Salaire est créé."""
        from transformers import FeatureEngineeringTransformer

        fe = FeatureEngineeringTransformer()
        result = fe.fit_transform(sample_data_raw)

        assert 'Ratio_Epargne_Salaire' in result.columns

    def test_ratio_calculation_correct(self, sample_data_raw):
        """Vérifie que le calcul du ratio est correct."""
        from transformers import FeatureEngineeringTransformer

        fe = FeatureEngineeringTransformer()
        result = fe.fit_transform(sample_data_raw)

        expected_ratio = 5000 / 50000
        assert np.isclose(result['Ratio_Epargne_Salaire'].iloc[0], expected_ratio)

    def test_removes_epargne_totale(self, sample_data_raw):
        """Vérifie que Epargne_Totale est supprimée."""
        from transformers import FeatureEngineeringTransformer

        fe = FeatureEngineeringTransformer()
        result = fe.fit_transform(sample_data_raw)

        assert 'Epargne_Totale' not in result.columns

    def test_handles_infinite_values(self):
        """Vérifie que les valeurs infinies sont gérées."""
        from transformers import FeatureEngineeringTransformer

        # Données avec salaire = 0 (génère inf)
        data = pd.DataFrame({
            'Salaire_Annuel': [0, 50000],
            'Epargne_Totale': [1000, 5000]
        })

        fe = FeatureEngineeringTransformer()
        result = fe.fit_transform(data)

        # Les inf doivent devenir NaN
        assert pd.isna(result['Ratio_Epargne_Salaire'].iloc[0])

    def test_transform_without_fit_works(self, sample_data_raw):
        """Vérifie que transform fonctionne sans fit (stateless)."""
        from transformers import FeatureEngineeringTransformer

        fe = FeatureEngineeringTransformer()

        # Transform sans fit
        result = fe.transform(sample_data_raw)

        assert 'Ratio_Epargne_Salaire' in result.columns


# ============================================
# TEST 3 : SmartImputerTransformer
# ============================================

class TestSmartImputerTransformer:
    """Tests pour l'imputation intelligente."""

    def test_imputes_missing_values(self, sample_data_raw, config_ordinale):
        """Vérifie que les valeurs manquantes sont imputées."""
        from transformers import SmartImputerTransformer

        imputer = SmartImputerTransformer(dict_ordinals=config_ordinale)
        imputer.fit(sample_data_raw)
        result = imputer.transform(sample_data_raw)

        # Vérifier qu'il n'y a plus de NaN dans Age
        assert result['Age'].notna().all()

    def test_uses_median_for_skewed_data(self):
        """Vérifie que la médiane est utilisée pour données asymétriques."""
        from transformers import SmartImputerTransformer

        # Données très asymétriques
        data = pd.DataFrame({
            'Valeur': [1, 2, 2, 3, 3, 3, 100, np.nan]  # 100 = outlier
        })

        imputer = SmartImputerTransformer()
        imputer.fit(data)
        result = imputer.transform(data)

        # La valeur imputée doit être proche de la médiane (3)
        imputed_value = result['Valeur'].iloc[-1]
        assert 2 <= imputed_value <= 4  # Proche de la médiane

    def test_preserves_learned_values_on_new_data(self, sample_data_raw, config_ordinale):
        """Vérifie que les valeurs apprises sont appliquées sur nouvelles données."""
        from transformers import SmartImputerTransformer

        # Fit sur données train
        imputer = SmartImputerTransformer(dict_ordinals=config_ordinale)
        imputer.fit(sample_data_raw)

        # Nouvelles données avec NaN
        new_data = pd.DataFrame({
            'Age': [np.nan],
            'Salaire_Annuel': [np.nan],
            'Epargne_Totale': [np.nan],
            'Score_Credit_Externe': [np.nan],
            'Niveau_Etude': [np.nan],
            'Ville': [np.nan]
        })

        result = imputer.transform(new_data)

        # Toutes les valeurs doivent être imputées
        assert result.notna().all().all()


# ============================================
# TEST 4 : CustomEncodingTransformer
# ============================================

class TestCustomEncodingTransformer:
    """Tests pour l'encodage personnalisé."""

    def test_encodes_niveau_etude_ordinally(self, sample_data_raw):
        """Vérifie que Niveau_Etude est encodé ordinalement."""
        from transformers import CustomEncodingTransformer

        encoder = CustomEncodingTransformer()
        encoder.fit(sample_data_raw)
        result = encoder.transform(sample_data_raw)

        assert 'Niveau_Etude_encoded' in result.columns
        assert 'Niveau_Etude' not in result.columns  # Original supprimé

    def test_ordinal_encoding_respects_order(self, sample_data_raw):
        """Vérifie que l'ordre ordinal est respecté."""
        from transformers import CustomEncodingTransformer

        encoder = CustomEncodingTransformer(
            ordre_etudes=['bac', 'licence', 'master', 'doctorat']
        )
        encoder.fit(sample_data_raw)
        result = encoder.transform(sample_data_raw)

        # Bac < Licence < Master < Doctorat
        bac_idx = result[result.index == 2]['Niveau_Etude_encoded'].iloc[0]
        licence_idx = result[result.index == 0]['Niveau_Etude_encoded'].iloc[0]
        master_idx = result[result.index == 1]['Niveau_Etude_encoded'].iloc[0]
        doctorat_idx = result[result.index == 3]['Niveau_Etude_encoded'].iloc[0]

        assert bac_idx < licence_idx < master_idx < doctorat_idx

    def test_onehot_encodes_ville(self, sample_data_raw):
        """Vérifie que Ville est encodée en OneHot."""
        from transformers import CustomEncodingTransformer

        encoder = CustomEncodingTransformer()
        encoder.fit(sample_data_raw)
        result = encoder.transform(sample_data_raw)

        # Doit créer des colonnes Ville_*
        ville_cols = [col for col in result.columns if col.startswith('Ville_')]
        assert len(ville_cols) > 0
        assert 'Ville' not in result.columns  # Original supprimé

    def test_handles_unknown_ville_as_autre(self):
        """Vérifie que les villes inconnues deviennent 'autre'."""
        from transformers import CustomEncodingTransformer

        # Train sur Paris, Lyon
        train_data = pd.DataFrame({
            'Niveau_Etude': ['Licence'] * 10,
            'Ville': ['Paris'] * 5 + ['Lyon'] * 5
        })

        encoder = CustomEncodingTransformer()
        encoder.fit(train_data)

        # Test avec Marseille (inconnu)
        test_data = pd.DataFrame({
            'Niveau_Etude': ['Licence'],
            'Ville': ['Marseille']  # Inconnu
        })

        result = encoder.transform(test_data)

        # Marseille doit être traité comme 'autre'
        assert 'Ville_autre' in result.columns or result.filter(like='Ville_').sum(axis=1).iloc[0] == 0


# ============================================
# TEST 5 : MultiStrategyScaler
# ============================================

class TestMultiStrategyScaler:
    """Tests pour le scaling multi-stratégie."""

    def test_applies_different_scalers_to_different_features(self, sample_data_raw):
        """Vérifie que différentes stratégies sont appliquées."""
        from transformers import MultiStrategyScaler, FeatureEngineeringTransformer

        # Créer le ratio d'abord
        fe = FeatureEngineeringTransformer()
        data_with_ratio = fe.fit_transform(sample_data_raw)

        scaler = MultiStrategyScaler()
        scaler.fit(data_with_ratio)
        result = scaler.transform(data_with_ratio)

        # Les colonnes doivent être scalées
        assert result['Age'].mean() != sample_data_raw['Age'].mean()
        assert result['Salaire_Annuel'].mean() != sample_data_raw['Salaire_Annuel'].mean()

    def test_scaling_is_reversible_in_theory(self, sample_data_raw):
        """Vérifie que le scaling transforme bien les données."""
        from transformers import MultiStrategyScaler, FeatureEngineeringTransformer

        fe = FeatureEngineeringTransformer()
        data_with_ratio = fe.fit_transform(sample_data_raw)

        scaler = MultiStrategyScaler()
        scaler.fit(data_with_ratio)
        result = scaler.transform(data_with_ratio)

        # Les valeurs scalées doivent être différentes
        assert not np.allclose(result['Age'].values, sample_data_raw['Age'].dropna().values)


# ============================================
# TEST 6 : Pipeline Complet
# ============================================

class TestCompletePipeline:
    """Tests d'intégration du pipeline complet."""

    def test_full_pipeline_runs_without_error(self, sample_data_raw, config_ordinale):
        """Vérifie que le pipeline complet s'exécute."""
        from transformers import (
            DataCleaningTransformer,
            FeatureEngineeringTransformer,
            SmartImputerTransformer,
            CustomEncodingTransformer,
            MultiStrategyScaler
        )

        pipeline = Pipeline([
            ('cleaning', DataCleaningTransformer()),
            ('feature_eng', FeatureEngineeringTransformer()),
            ('imputation', SmartImputerTransformer(dict_ordinals=config_ordinale)),
            ('encoding', CustomEncodingTransformer()),
            ('scaling', MultiStrategyScaler())
        ])

        # Fit + Transform
        pipeline.fit(sample_data_raw)
        result = pipeline.transform(sample_data_raw)

        assert result is not None
        assert len(result) == len(sample_data_raw)

    def test_pipeline_produces_no_nan_after_imputation(self, sample_data_raw, config_ordinale):
        """Vérifie qu'il n'y a plus de NaN après imputation."""
        from transformers import (
            DataCleaningTransformer,
            FeatureEngineeringTransformer,
            SmartImputerTransformer,
            CustomEncodingTransformer,
            MultiStrategyScaler
        )

        pipeline = Pipeline([
            ('cleaning', DataCleaningTransformer()),
            ('feature_eng', FeatureEngineeringTransformer()),
            ('imputation', SmartImputerTransformer(dict_ordinals=config_ordinale)),
            ('encoding', CustomEncodingTransformer()),
            ('scaling', MultiStrategyScaler())
        ])

        pipeline.fit(sample_data_raw)
        result = pipeline.transform(sample_data_raw)

        # Aucune valeur manquante après le pipeline complet
        assert result.notna().all().all()

    def test_pipeline_transform_preserves_row_count(self, sample_data_raw, config_ordinale):
        """Vérifie que le nombre de lignes est préservé."""
        from transformers import (
            DataCleaningTransformer,
            FeatureEngineeringTransformer,
            SmartImputerTransformer,
            CustomEncodingTransformer,
            MultiStrategyScaler
        )

        pipeline = Pipeline([
            ('cleaning', DataCleaningTransformer()),
            ('feature_eng', FeatureEngineeringTransformer()),
            ('imputation', SmartImputerTransformer(dict_ordinals=config_ordinale)),
            ('encoding', CustomEncodingTransformer()),
            ('scaling', MultiStrategyScaler())
        ])

        pipeline.fit(sample_data_raw)
        result = pipeline.transform(sample_data_raw)

        assert len(result) == len(sample_data_raw)


# ============================================
# TEST 7 : Edge Cases
# ============================================

class TestEdgeCases:
    """Tests des cas limites."""

    def test_handles_empty_dataframe(self):
        """Vérifie la gestion d'un DataFrame vide."""
        from transformers import DataCleaningTransformer

        empty_df = pd.DataFrame()
        cleaner = DataCleaningTransformer()

        result = cleaner.fit_transform(empty_df)
        assert len(result) == 0

    def test_handles_single_row(self, config_ordinale):
        """Vérifie la gestion d'une seule ligne."""
        from transformers import SmartImputerTransformer

        single_row = pd.DataFrame({
            'Age': [25],
            'Salaire_Annuel': [50000]
        })

        imputer = SmartImputerTransformer(dict_ordinals=config_ordinale)
        imputer.fit(single_row)
        result = imputer.transform(single_row)

        assert len(result) == 1

    def test_handles_all_nan_column(self, config_ordinale):
        """Vérifie la gestion d'une colonne entièrement NaN."""
        from transformers import SmartImputerTransformer

        all_nan = pd.DataFrame({
            'Age': [np.nan, np.nan, np.nan],
            'Salaire_Annuel': [50000, 60000, 70000]
        })

        imputer = SmartImputerTransformer(dict_ordinals=config_ordinale)
        imputer.fit(all_nan)
        result = imputer.transform(all_nan)

        # La colonne Age doit avoir une valeur (même si arbitraire)
        assert result['Age'].notna().any()


# ============================================
# RUN TESTS
# ============================================

# if __name__ == "__main__":
#     pytest.main([__file__, "-v", "--tb=short"])

In [None]:
# ==============================================================================
# EXÉCUTION DES TESTS DANS LE NOTEBOOK (VERSION CORRIGÉE)
# ==============================================================================

import pandas as pd
import numpy as np
from sklearn.pipeline import Pipeline

# Préparer les fixtures (données de test)
sample_data_raw = pd.DataFrame({
    'Age': [25, 30, np.nan, 40],
    'Salaire_Annuel': [50000, 60000, 55000, 70000],
    'Epargne_Totale': [5000, 12000, 8000, 15000],
    'Score_Credit_Externe': [50, 75, 60, 80],
    'Niveau_Etude': ['Licence', 'Master', 'Bac', 'Doctorat'],
    'Ville': ['Paris', 'Lyon', 'Marseille', 'Paris']
})

sample_data_with_intrus = pd.DataFrame({
    'Age': [25, '-', '?', 40],
    'Salaire_Annuel': [50000, 60000, 'null', 70000],
    'Score_Credit_Externe': [50, 75, 'none', 80],
    'Niveau_Etude': ['Licence', 'Master', '--', 'Doctorat'],
    'Ville': ['Paris', 'Lyon', '', 'Paris']
})

config_ordinale = {'Niveau_Etude': ['bac', 'licence', 'master', 'doctorat']}

print("="*80)
print("EXÉCUTION DES TESTS")
print("="*80)

total_tests = 0
passed_tests = 0
failed_tests = 0

# ==============================================================================
# TEST 1 : DataCleaningTransformer
# ==============================================================================
print("\nTestDataCleaningTransformer")
print("-"*80)

# Test 1.1
total_tests += 1
try:
    cleaner = DataCleaningTransformer()
    result = cleaner.fit_transform(sample_data_with_intrus)
    assert pd.isna(result['Age'].iloc[1])
    assert pd.isna(result['Salaire_Annuel'].iloc[2])
    print("  OK test_cleaning_replaces_intrus_with_nan")
    passed_tests += 1
except Exception as e:
    print(f"  FAIL test_cleaning_replaces_intrus_with_nan : {e}")
    failed_tests += 1

# Test 1.2
total_tests += 1
try:
    cleaner = DataCleaningTransformer()
    result = cleaner.fit_transform(sample_data_raw)
    assert result['Age'].iloc[0] == 25
    assert result['Salaire_Annuel'].iloc[1] == 60000
    print("  OK test_cleaning_preserves_valid_values")
    passed_tests += 1
except Exception as e:
    print(f"  FAIL test_cleaning_preserves_valid_values : {e}")
    failed_tests += 1

# Test 1.3
total_tests += 1
try:
    cleaner = DataCleaningTransformer()
    cleaner.fit(sample_data_with_intrus)
    result1 = cleaner.transform(sample_data_with_intrus)
    result2 = cleaner.transform(sample_data_with_intrus)
    pd.testing.assert_frame_equal(result1, result2)
    print("  OK test_cleaning_is_stateless")
    passed_tests += 1
except Exception as e:
    print(f"  FAIL test_cleaning_is_stateless : {e}")
    failed_tests += 1

# ==============================================================================
# TEST 2 : FeatureEngineeringTransformer
# ==============================================================================
print("\nTestFeatureEngineeringTransformer")
print("-"*80)

# Test 2.1
total_tests += 1
try:
    fe = FeatureEngineeringTransformer()
    result = fe.fit_transform(sample_data_raw)
    assert 'Ratio_Epargne_Salaire' in result.columns
    print("  OK test_creates_ratio_column")
    passed_tests += 1
except Exception as e:
    print(f"  FAIL test_creates_ratio_column : {e}")
    failed_tests += 1

# Test 2.2
total_tests += 1
try:
    fe = FeatureEngineeringTransformer()
    result = fe.fit_transform(sample_data_raw)
    expected_ratio = 5000 / 50000
    assert np.isclose(result['Ratio_Epargne_Salaire'].iloc[0], expected_ratio)
    print("  OK test_ratio_calculation_correct")
    passed_tests += 1
except Exception as e:
    print(f"  FAIL test_ratio_calculation_correct : {e}")
    failed_tests += 1

# Test 2.3
total_tests += 1
try:
    fe = FeatureEngineeringTransformer()
    result = fe.fit_transform(sample_data_raw)
    assert 'Epargne_Totale' not in result.columns
    print("  OK test_removes_epargne_totale")
    passed_tests += 1
except Exception as e:
    print(f"  FAIL test_removes_epargne_totale : {e}")
    failed_tests += 1

# Test 2.4
total_tests += 1
try:
    data_with_zero = pd.DataFrame({
        'Salaire_Annuel': [0, 50000],
        'Epargne_Totale': [1000, 5000]
    })
    fe = FeatureEngineeringTransformer()
    result = fe.fit_transform(data_with_zero)
    assert pd.isna(result['Ratio_Epargne_Salaire'].iloc[0])
    print("  OK test_handles_infinite_values")
    passed_tests += 1
except Exception as e:
    print(f"  FAIL test_handles_infinite_values : {e}")
    failed_tests += 1

# ==============================================================================
# TEST 3 : SmartImputerTransformer
# ==============================================================================
print("\nTestSmartImputerTransformer")
print("-"*80)

# Test 3.1
total_tests += 1
try:
    imputer = SmartImputerTransformer(dict_ordinals=config_ordinale)
    imputer.fit(sample_data_raw)
    result = imputer.transform(sample_data_raw)
    assert result['Age'].notna().all()
    print("  OK test_imputes_missing_values")
    passed_tests += 1
except Exception as e:
    print(f"  FAIL test_imputes_missing_values : {e}")
    failed_tests += 1

# Test 3.2
total_tests += 1
try:
    data_skewed = pd.DataFrame({'Valeur': [1, 2, 2, 3, 3, 3, 100, np.nan]})
    imputer = SmartImputerTransformer()
    imputer.fit(data_skewed)
    result = imputer.transform(data_skewed)
    imputed_value = result['Valeur'].iloc[-1]
    assert 2 <= imputed_value <= 4
    print("  OK test_uses_median_for_skewed_data")
    passed_tests += 1
except Exception as e:
    print(f"  FAIL test_uses_median_for_skewed_data : {e}")
    failed_tests += 1

# ==============================================================================
# TEST 4 : CustomEncodingTransformer
# ==============================================================================
print("\nTestCustomEncodingTransformer")
print("-"*80)

# Test 4.1
total_tests += 1
try:
    encoder = CustomEncodingTransformer()
    encoder.fit(sample_data_raw)
    result = encoder.transform(sample_data_raw)
    assert 'Niveau_Etude_encoded' in result.columns
    assert 'Niveau_Etude' not in result.columns
    print("  OK test_encodes_niveau_etude_ordinally")
    passed_tests += 1
except Exception as e:
    print(f"  FAIL test_encodes_niveau_etude_ordinally : {e}")
    failed_tests += 1

# Test 4.2
total_tests += 1
try:
    encoder = CustomEncodingTransformer()
    encoder.fit(sample_data_raw)
    result = encoder.transform(sample_data_raw)
    ville_cols = [col for col in result.columns if col.startswith('Ville_')]
    assert len(ville_cols) > 0
    assert 'Ville' not in result.columns
    print("  OK test_onehot_encodes_ville")
    passed_tests += 1
except Exception as e:
    print(f"  FAIL test_onehot_encodes_ville : {e}")
    failed_tests += 1

# ==============================================================================
# TEST 5 : MultiStrategyScaler
# ==============================================================================
print("\nTestMultiStrategyScaler")
print("-"*80)

# Test 5.1
total_tests += 1
try:
    fe = FeatureEngineeringTransformer()
    data_with_ratio = fe.fit_transform(sample_data_raw)
    scaler = MultiStrategyScaler()
    scaler.fit(data_with_ratio)
    result = scaler.transform(data_with_ratio)
    assert result['Age'].mean() != sample_data_raw['Age'].mean()
    print("  OK test_applies_different_scalers")
    passed_tests += 1
except Exception as e:
    print(f"  FAIL test_applies_different_scalers : {e}")
    failed_tests += 1

# ==============================================================================
# TEST 6 : Pipeline Complet
# ==============================================================================
print("\nTestCompletePipeline")
print("-"*80)

# Test 6.1
total_tests += 1
try:
    pipeline = Pipeline([
        ('cleaning', DataCleaningTransformer()),
        ('feature_eng', FeatureEngineeringTransformer()),
        ('imputation', SmartImputerTransformer(dict_ordinals=config_ordinale)),
        ('encoding', CustomEncodingTransformer()),
        ('scaling', MultiStrategyScaler())
    ])
    pipeline.fit(sample_data_raw)
    result = pipeline.transform(sample_data_raw)
    assert result is not None
    assert len(result) == len(sample_data_raw)
    print("  OK test_full_pipeline_runs_without_error")
    passed_tests += 1
except Exception as e:
    print(f"  FAIL test_full_pipeline_runs_without_error : {e}")
    failed_tests += 1

# Test 6.2
total_tests += 1
try:
    pipeline = Pipeline([
        ('cleaning', DataCleaningTransformer()),
        ('feature_eng', FeatureEngineeringTransformer()),
        ('imputation', SmartImputerTransformer(dict_ordinals=config_ordinale)),
        ('encoding', CustomEncodingTransformer()),
        ('scaling', MultiStrategyScaler())
    ])
    pipeline.fit(sample_data_raw)
    result = pipeline.transform(sample_data_raw)
    assert result.notna().all().all()
    print("  OK test_pipeline_produces_no_nan")
    passed_tests += 1
except Exception as e:
    print(f"  FAIL test_pipeline_produces_no_nan : {e}")
    failed_tests += 1

# Test 6.3
total_tests += 1
try:
    pipeline = Pipeline([
        ('cleaning', DataCleaningTransformer()),
        ('feature_eng', FeatureEngineeringTransformer()),
        ('imputation', SmartImputerTransformer(dict_ordinals=config_ordinale)),
        ('encoding', CustomEncodingTransformer()),
        ('scaling', MultiStrategyScaler())
    ])
    pipeline.fit(sample_data_raw)
    result = pipeline.transform(sample_data_raw)
    assert len(result) == len(sample_data_raw)
    print("  OK test_pipeline_preserves_row_count")
    passed_tests += 1
except Exception as e:
    print(f"  FAIL test_pipeline_preserves_row_count : {e}")
    failed_tests += 1

# ==============================================================================
# TEST 7 : Edge Cases
# ==============================================================================
print("\nTestEdgeCases")
print("-"*80)

# Test 7.1
total_tests += 1
try:
    empty_df = pd.DataFrame()
    cleaner = DataCleaningTransformer()
    result = cleaner.fit_transform(empty_df)
    assert len(result) == 0
    print("  OK test_handles_empty_dataframe")
    passed_tests += 1
except Exception as e:
    print(f"  FAIL test_handles_empty_dataframe : {e}")
    failed_tests += 1

# Test 7.2
total_tests += 1
try:
    single_row = pd.DataFrame({'Age': [25], 'Salaire_Annuel': [50000]})
    imputer = SmartImputerTransformer(dict_ordinals=config_ordinale)
    imputer.fit(single_row)
    result = imputer.transform(single_row)
    assert len(result) == 1
    print("  OK test_handles_single_row")
    passed_tests += 1
except Exception as e:
    print(f"  FAIL test_handles_single_row : {e}")
    failed_tests += 1

# ==============================================================================
# RÉSUMÉ FINAL
# ==============================================================================
print("\n" + "="*80)
print("RÉSUMÉ")
print("="*80)
print(f"Total : {total_tests} tests")
print(f"Réussis : {passed_tests}")
print(f"Échoués : {failed_tests}")
print(f"Taux de réussite : {passed_tests/total_tests*100:.1f}%")

if failed_tests == 0:
    print("\nTOUS LES TESTS SONT PASSÉS !")
else:
    print(f"\n{failed_tests} test(s) ont échoué")

EXÉCUTION DES TESTS

TestDataCleaningTransformer
--------------------------------------------------------------------------------
  OK test_cleaning_replaces_intrus_with_nan
  OK test_cleaning_preserves_valid_values
  OK test_cleaning_is_stateless

TestFeatureEngineeringTransformer
--------------------------------------------------------------------------------
  OK test_creates_ratio_column
  OK test_ratio_calculation_correct
  OK test_removes_epargne_totale
  OK test_handles_infinite_values

TestSmartImputerTransformer
--------------------------------------------------------------------------------
  OK test_imputes_missing_values
  OK test_uses_median_for_skewed_data

TestCustomEncodingTransformer
--------------------------------------------------------------------------------
  OK test_encodes_niveau_etude_ordinally
  OK test_onehot_encodes_ville

TestMultiStrategyScaler
--------------------------------------------------------------------------------
  OK test_applies_different_sca

  X_copy[col] = X_copy[col].replace(self.generic_missing_values, np.nan)


  OK test_full_pipeline_runs_without_error
  OK test_pipeline_produces_no_nan
  OK test_pipeline_preserves_row_count

TestEdgeCases
--------------------------------------------------------------------------------
  OK test_handles_empty_dataframe
  OK test_handles_single_row

RÉSUMÉ
Total : 17 tests
Réussis : 17
Échoués : 0
Taux de réussite : 100.0%

TOUS LES TESTS SONT PASSÉS !
