In [779]:
# Cellule 1 - Imports et configuration
from sklearn.ensemble import IsolationForest
from sklearn.preprocessing import StandardScaler
import joblib
import os
import pandas as pd
import numpy as np
import logging

# Configuration du logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# Chemins des modèles
MODEL_DIR = "models"
MODEL_PATH = os.path.join(MODEL_DIR, "isolation_forest_v1.joblib")
SCALER_PATH = os.path.join(MODEL_DIR, "scaler_v1.joblib")

# Création du dossier models si inexistant
os.makedirs(MODEL_DIR, exist_ok=True)

In [791]:
import pandas as pd
import numpy as np

train_data = {
    "transaction_id": [f"TX{i:04d}" for i in range(1, 26)],
    "debtor_iban": ["FR7630001007941234567890185"] * 5 + ["MA64123456789012345678901234"] * 5 + ["DE50100701000000000001"] * 5 + ["ES9121000418450200051332"] * 5 + ["GB29NWBK60161331926819"] * 5,
    "debtor_country": ["FR"] * 5 + ["MA"] * 5 + ["DE"] * 5 + ["ES"] * 5 + ["GB"] * 5,
    "creditor_country": ["MA", "FR", "TR", "US", "MA"] * 5,
    "intrbk_sttlm_amt": [200, 50000, 100000, 3000, 15000, 
                       2000000, 750000, 500, 1200000, 250,
                       400000, 600000, 5000000, 10000, 90000,
                       300000, 800000, 70000, 1500000, 950000,
                       10000, 200, 7500000, 50000, 2000],
    "distance_km": [2000, 100, 5001, 8000, 300, 
                    100, 150, 6000, 1200, 50,
                    1200, 4500, 8000, 100, 600,
                    150, 4500, 8000, 5000, 1200,
                    2000, 100, 12000, 1000, 500],
    "debtor_city": ["Paris", "Paris", "Paris", "Paris", "Paris",
                    "Casablanca", "Casablanca", "Casablanca", "Casablanca", "Casablanca",
                    "Berlin", "Berlin", "Berlin", "Berlin", "Berlin",
                    "Madrid", "Madrid", "Madrid", "Madrid", "Madrid",
                    "Londres", "Londres", "Londres", "Londres", "Londres"],
    "creditor_city": ["Casablanca", "Paris", "Istanbul", "New York", "Casablanca",
                      "Casablanca", "Paris", "New York", "Paris", "Casablanca",
                      "Casablanca", "Casablanca", "Pékin", "Berlin", "Casablanca",
                      "Casablanca", "Casablanca", "Casablanca", "Casablanca", "Casablanca",
                      "Casablanca", "Londres", "Tanger", "Casablanca", "Casablanca"],
    "debtor_postcode": ["75001", "75001", "75001", "75001", "75001",
                        "20000", "20000", "20000", "20000", "20000",
                        "10115", "10115", "10115", "10115", "10115",
                        "28001", "28001", "28001", "28001", "28001",
                        "SW1A 0AA", "SW1A 0AA", "SW1A 0AA", "SW1A 0AA", "SW1A 0AA"],
    "debtor_name": ["BNP PARIBAS"] * 5 + ["ATTIJARIWAFA BANK"] * 5 + ["DEUTSCHE BANK"] * 5 + ["SANTANDER"] * 5 + ["HSBC"] * 5,
    "creditor_name": ["BANQUE POPULAIRE", "BNP PARIBAS", "FAKE BANK", "Citi Bank", "BANQUE POPULAIRE"] * 5
}

train_df = pd.DataFrame(train_data)

# Feature Engineering
train_df['is_international'] = train_df['debtor_country'] != train_df['creditor_country']
train_df['intrbk_sttlm_amt_log'] = np.log1p(train_df['intrbk_sttlm_amt'])

In [792]:
from app.ml_model import FraudModel  # Import de votre classe

fraud_model = FraudModel()

# Vérification et création du scaler si nécessaire
if fraud_model.scaler is None:
    print("Création d'un nouveau scaler...")
    fraud_model.scaler = StandardScaler()
    features = train_df[fraud_model.expected_features]
    fraud_model.scaler.fit(features)
    joblib.dump(fraud_model.scaler, SCALER_PATH)
    print(f"Scaler enregistré à {SCALER_PATH}")

# Entraînement du modèle
if fraud_model.model is None or not hasattr(fraud_model.model, 'estimators_'):
    print("Entraînement du modèle...")
    features = train_df[fraud_model.expected_features]
    scaled_features = fraud_model.scaler.transform(features) if fraud_model.scaler else features
    
    fraud_model.model = IsolationForest(contamination=0.03, random_state=42)
    fraud_model.model.fit(scaled_features)
    joblib.dump(fraud_model.model, MODEL_PATH)
    print(f"Modèle enregistré à {MODEL_PATH}")

# Vérification finale
print("\nÉtat des composants:")
print(f"- Scaler chargé: {fraud_model.scaler is not None}")
print(f"- Modèle chargé: {fraud_model.model is not None}")
if fraud_model.model is not None:
    print(f"- Modèle entraîné: {hasattr(fraud_model.model, 'estimators_')}")


État des composants:
- Scaler chargé: True
- Modèle chargé: True
- Modèle entraîné: True


In [793]:
def prepare_for_detection(df, model):
    """Prépare les données pour la détection d'anomalies"""
    df = model.standardize_amount_column(df)
    
    # Vérification des features
    missing = [f for f in model.expected_features if f not in df.columns]
    if missing:
        available = [c for c in df.columns if c in model.expected_features]
        raise ValueError(
            f"Features manquantes: {missing}\n"
            f"Features disponibles: {available}\n"
            f"Features attendues: {model.expected_features}"
        )
    return df

try:
    # Préparation des données
    train_df = prepare_for_detection(train_df, fraud_model)
    
    # Détection des anomalies
    results = fraud_model.detect_anomalies(train_df)
    print("\n✅ Détection terminée avec succès")
    display(results.head())
    
except Exception as e:
    print("\n❌ Erreur:", str(e))
    print("Détails de débogage:")
    print(f"- Scaler disponible: {fraud_model.scaler is not None}")
    print(f"- Modèle disponible: {fraud_model.model is not None}")
    if fraud_model.model:
        print(f"- Modèle entraîné: {hasattr(fraud_model.model, 'estimators_')}")


✅ Détection terminée avec succès


Unnamed: 0,transaction_id,debtor_iban,debtor_country,creditor_country,intrbk_sttlm_amt,distance_km,debtor_city,creditor_city,debtor_postcode,debtor_name,...,amount_high,postal_incoherence,bank_country_mismatch,ai_score,ai_anomaly,ai_score_normalized,rule_score_norm,ai_score_norm,combined_score,is_anomaly
0,TX0001,FR7630001007941234567890185,FR,MA,200,2000,Paris,Casablanca,75001,BNP PARIBAS,...,False,False,False,0.138112,0,0.0,0.285714,0.430944,0.329283,True
1,TX0002,FR7630001007941234567890185,FR,FR,50000,100,Paris,Paris,75001,BNP PARIBAS,...,False,False,False,0.091405,0,1.0,0.285714,0.454297,0.336289,True
2,TX0003,FR7630001007941234567890185,FR,TR,100000,5001,Paris,Istanbul,75001,BNP PARIBAS,...,False,False,False,0.138112,0,0.0,0.285714,0.430944,0.329283,True
3,TX0004,FR7630001007941234567890185,FR,US,3000,8000,Paris,New York,75001,BNP PARIBAS,...,False,False,False,0.138112,0,0.0,0.285714,0.430944,0.329283,True
4,TX0005,FR7630001007941234567890185,FR,MA,15000,300,Paris,Casablanca,75001,BNP PARIBAS,...,False,False,False,0.138112,0,0.0,0.0,0.430944,0.129283,False


In [794]:
anomalies = results[results['is_anomaly'] == 1]
print(f"\nNombre d'anomalies détectées : {len(anomalies)}")

# Afficher un tableau complet avec les colonnes importantes
display(anomalies[['transaction_id', 'intrbk_sttlm_amt', 'debtor_country', 
                  'creditor_country', 'is_international', 'rule_based_score', 
                  'ai_score', 'combined_score']])


Nombre d'anomalies détectées : 11


Unnamed: 0,transaction_id,intrbk_sttlm_amt,debtor_country,creditor_country,is_international,rule_based_score,ai_score,combined_score
0,TX0001,200,FR,MA,True,1.0,0.138112,0.329283
1,TX0002,50000,FR,FR,False,1.0,0.091405,0.336289
2,TX0003,100000,FR,TR,True,1.0,0.138112,0.329283
3,TX0004,3000,FR,US,True,1.0,0.138112,0.329283
7,TX0008,500,MA,TR,True,1.0,0.138112,0.329283
11,TX0012,600000,DE,FR,True,1.0,0.138112,0.329283
12,TX0013,5000000,DE,TR,True,1.0,0.138112,0.329283
16,TX0017,800000,ES,FR,True,1.0,0.138112,0.329283
17,TX0018,70000,ES,TR,True,1.0,0.138112,0.329283
18,TX0019,1500000,ES,US,True,1.0,0.138112,0.329283
