# Test de l'API Credit Scoring

Ce notebook permet de tester l'API de prédiction de score crédit de manière interactive.

## Objectifs
- Tester tous les endpoints de l'API
- Valider les prédictions
- Analyser les SHAP values
- Benchmarker les performances

In [None]:
import requests
import json
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from typing import Dict, List
import time

# Configuration
API_URL = "http://localhost:8080"  # Modifier selon votre déploiement
# API_URL = "https://your-cloud-run-url.run.app"  # Pour Cloud Run

print(f"API URL: {API_URL}")

## 1. Health Check

In [None]:
def check_health():
    """Vérifier l'état de l'API"""
    response = requests.get(f"{API_URL}/health")
    return response.json()

health = check_health()
print(json.dumps(health, indent=2))

assert health["status"] == "healthy", "API is not healthy!"
assert health["model_loaded"] == True, "Model is not loaded!"
print("\n✅ API is healthy and ready!")

## 2. Single Prediction Test

In [None]:
# Données de test
test_client = {
    "features": {
        "EXT_SOURCE_2": 0.5,
        "EXT_SOURCE_3": 0.6,
        "DAYS_BIRTH": -15000,
        "AMT_CREDIT": 500000.0,
        "AMT_ANNUITY": 25000.0,
        "AMT_GOODS_PRICE": 450000.0,
        "DAYS_EMPLOYED": -3000,
        "DAYS_ID_PUBLISH": -2000,
        "REGION_POPULATION_RELATIVE": 0.02,
        "DAYS_LAST_PHONE_CHANGE": -1000
    },
    "client_id": "TEST_001"
}

# Faire la prédiction
response = requests.post(f"{API_URL}/predict", json=test_client)
prediction = response.json()

print("Prediction Result:")
print(json.dumps(prediction, indent=2))

# Visualisation
print(f"\n{'='*50}")
print(f"Client ID: {prediction['client_id']}")
print(f"Decision: {prediction['decision']}")
print(f"Probability of Default: {prediction['probability_default']:.2%}")
print(f"Probability of No Default: {prediction['probability_no_default']:.2%}")
print(f"Threshold Used: {prediction['threshold_used']:.2%}")
print(f"{'='*50}")

## 3. Feature Importance (SHAP)

In [None]:
# Obtenir les SHAP values
response = requests.post(f"{API_URL}/feature-importance", json=test_client)
shap_data = response.json()

print("SHAP Analysis:")
print(f"Base Value: {shap_data['base_value']:.4f}")
print(f"Prediction Value: {shap_data['prediction_value']:.4f}")

print("\nTop Positive Features (contribute to approval):")
for feat in shap_data['top_positive_features'][:5]:
    print(f"  {feat['feature']}: {feat['value']:.4f}")

print("\nTop Negative Features (contribute to rejection):")
for feat in shap_data['top_negative_features'][:5]:
    print(f"  {feat['feature']}: {feat['value']:.4f}")

In [None]:
# Visualisation des SHAP values
top_features = (shap_data['top_positive_features'][:10] + 
                shap_data['top_negative_features'][:10])

# Trier par valeur absolue
top_features.sort(key=lambda x: abs(x['value']), reverse=True)

features = [f['feature'] for f in top_features]
values = [f['value'] for f in top_features]
colors = ['green' if v > 0 else 'red' for v in values]

plt.figure(figsize=(12, 8))
plt.barh(features, values, color=colors)
plt.xlabel('SHAP Value')
plt.title('Feature Importance (SHAP Values)')
plt.axvline(x=0, color='black', linestyle='--', linewidth=0.5)
plt.tight_layout()
plt.show()

## 4. Batch Predictions Test

In [None]:
# Créer plusieurs clients de test
batch_request = {
    "clients": [
        {
            "features": {**test_client["features"], "EXT_SOURCE_2": 0.3},
            "client_id": "BATCH_001"
        },
        {
            "features": {**test_client["features"], "EXT_SOURCE_2": 0.5},
            "client_id": "BATCH_002"
        },
        {
            "features": {**test_client["features"], "EXT_SOURCE_2": 0.7},
            "client_id": "BATCH_003"
        },
        {
            "features": {**test_client["features"], "EXT_SOURCE_3": 0.8},
            "client_id": "BATCH_004"
        },
        {
            "features": {**test_client["features"], "EXT_SOURCE_3": 0.4},
            "client_id": "BATCH_005"
        }
    ]
}

# Faire les prédictions batch
response = requests.post(f"{API_URL}/predict/batch", json=batch_request)
batch_results = response.json()

print(f"Total Clients: {batch_results['total_clients']}")
print(f"Approved: {batch_results['approved_count']}")
print(f"Rejected: {batch_results['rejected_count']}")

# Créer un DataFrame pour analyse
df_results = pd.DataFrame(batch_results['predictions'])
print("\nResults:")
print(df_results[['client_id', 'decision', 'probability_default', 'probability_no_default']])

In [None]:
# Visualisation des résultats batch
fig, axes = plt.subplots(1, 2, figsize=(14, 5))

# Distribution des décisions
decision_counts = df_results['decision'].value_counts()
axes[0].pie(decision_counts.values, labels=decision_counts.index, autopct='%1.1f%%',
            colors=['green', 'red'])
axes[0].set_title('Distribution des Décisions')

# Distribution des probabilités
axes[1].hist(df_results['probability_default'], bins=10, edgecolor='black')
axes[1].axvline(x=df_results['threshold_used'].iloc[0], color='red', 
                linestyle='--', label='Threshold')
axes[1].set_xlabel('Probability of Default')
axes[1].set_ylabel('Count')
axes[1].set_title('Distribution des Probabilités de Défaut')
axes[1].legend()

plt.tight_layout()
plt.show()

## 5. Performance Benchmarking

In [None]:
# Test de performance
n_requests = 100
latencies = []

print(f"Running {n_requests} prediction requests...")

for i in range(n_requests):
    start = time.time()
    response = requests.post(f"{API_URL}/predict", json=test_client)
    latency = (time.time() - start) * 1000  # Convert to ms
    latencies.append(latency)
    
    if (i + 1) % 20 == 0:
        print(f"  Completed {i + 1}/{n_requests}")

# Statistiques
print(f"\n{'='*50}")
print("Performance Statistics:")
print(f"  Mean Latency: {sum(latencies)/len(latencies):.2f} ms")
print(f"  Median Latency: {sorted(latencies)[len(latencies)//2]:.2f} ms")
print(f"  Min Latency: {min(latencies):.2f} ms")
print(f"  Max Latency: {max(latencies):.2f} ms")
print(f"  P95 Latency: {sorted(latencies)[int(len(latencies)*0.95)]:.2f} ms")
print(f"  P99 Latency: {sorted(latencies)[int(len(latencies)*0.99)]:.2f} ms")
print(f"{'='*50}")

In [None]:
# Visualisation des latences
plt.figure(figsize=(12, 5))

plt.subplot(1, 2, 1)
plt.plot(latencies)
plt.xlabel('Request Number')
plt.ylabel('Latency (ms)')
plt.title('Latency Over Time')
plt.grid(True, alpha=0.3)

plt.subplot(1, 2, 2)
plt.hist(latencies, bins=30, edgecolor='black')
plt.xlabel('Latency (ms)')
plt.ylabel('Frequency')
plt.title('Latency Distribution')
plt.axvline(x=sum(latencies)/len(latencies), color='red', 
            linestyle='--', label='Mean')
plt.legend()

plt.tight_layout()
plt.show()

## 6. Error Handling Tests

In [None]:
# Test avec données invalides
print("Testing error handling...\n")

# Test 1: Empty features
try:
    response = requests.post(f"{API_URL}/predict", json={"features": {}})
    print(f"Empty features: Status {response.status_code}")
    print(f"Response: {response.json()}\n")
except Exception as e:
    print(f"Empty features error: {e}\n")

# Test 2: Missing features field
try:
    response = requests.post(f"{API_URL}/predict", json={"client_id": "TEST"})
    print(f"Missing features: Status {response.status_code}")
    print(f"Response: {response.json()}\n")
except Exception as e:
    print(f"Missing features error: {e}\n")

# Test 3: Invalid endpoint
try:
    response = requests.get(f"{API_URL}/invalid-endpoint")
    print(f"Invalid endpoint: Status {response.status_code}")
except Exception as e:
    print(f"Invalid endpoint error: {e}\n")

print("✅ Error handling tests completed")

## 7. Summary Report

In [None]:
print("="*60)
print("API TEST SUMMARY REPORT")
print("="*60)
print(f"\nAPI URL: {API_URL}")
print(f"API Status: {health['status']}")
print(f"Model Loaded: {health['model_loaded']}")
print(f"API Version: {health['version']}")
print(f"\nPerformance:")
print(f"  Average Latency: {sum(latencies)/len(latencies):.2f} ms")
print(f"  P95 Latency: {sorted(latencies)[int(len(latencies)*0.95)]:.2f} ms")
print(f"\nBatch Test Results:")
print(f"  Total Clients: {batch_results['total_clients']}")
print(f"  Approved: {batch_results['approved_count']}")
print(f"  Rejected: {batch_results['rejected_count']}")
print(f"  Approval Rate: {batch_results['approved_count']/batch_results['total_clients']*100:.1f}%")
print("\n" + "="*60)
print("✅ All tests completed successfully!")
print("="*60)