# CatBoost Formal Feature Attribution Analysis

This notebook processes CatBoost JSON model files to perform formal feature attribution analysis, including:

- **Model Loading**: Load CatBoost models from JSON format
- **Rule Extraction**: Extract decision rules from tree structures
- **Anchored Explanations (AXP)**: Generate explanations for model predictions
- **Feature Importance**: Calculate feature importance from explanations
- **Causal Analysis**: Measure causal responsibility of features
- **Visualization**: Create comprehensive visualizations and reports

**ðŸ“– Documentation**: See [`README_ffa_analysis.md`](README_ffa_analysis.md) for detailed documentation.


## 1. Setup and Configuration


In [None]:
import sys
import os
import json
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path
from typing import Dict, List, Optional, Tuple, Any
from collections import defaultdict, Counter
import warnings
warnings.filterwarnings('ignore')

# CatBoost and ML libraries
from catboost import CatBoostClassifier
from sklearn.metrics import (
    roc_auc_score, accuracy_score, precision_score, recall_score,
    f1_score, log_loss, confusion_matrix, roc_curve, precision_recall_curve
)

# FFA Analysis modules
try:
    from ffa_analysis import (
        validate_explainer_structure,
        analyze_ctr_hash_maps,
        print_json_key_structure
    )
    from catboost_axp_explainer import CatBoostAXPExplainer, PathConfig, AnalysisConfig
except ImportError as e:
    print(f"Warning: Could not import FFA modules: {e}")
    print("Some functionality may be limited.")

# AWS S3 (if needed)
try:
    import boto3
except ImportError:
    print("Warning: boto3 not available. S3 functionality disabled.")

# Set plotting style
plt.style.use('seaborn-v0_8-darkgrid')
sns.set_palette("husl")

print("âœ“ All libraries imported successfully")


In [None]:
# ============================================================
# CONFIGURATION
# ============================================================

# Model paths (update these to your actual paths)
MODEL_CONFIG = {
    'model_json_path': 'catboost_models/opioid_ed/25_44_2016/catboost_model.json',  # Local or S3 path
    'model_cbm_path': 'catboost_models/opioid_ed/25_44_2016/model.cbm',  # Optional: CBM format
    'model_info_json': 'catboost_models/opioid_ed/25_44_2016/model_info.json',  # Optional: metadata
    'feature_importance_csv': 'catboost_models/opioid_ed/25_44_2016/feature_importance.csv'  # Optional
}

# Data paths (for explanations and causal analysis)
DATA_CONFIG = {
    'train_data_path': None,  # Optional: path to training data
    'test_data_path': None,   # Optional: path to test data
    'use_s3': False,          # Set to True if using S3 paths
    's3_bucket': 'pgxdatalake',
    's3_prefix': 'catboost_models/opioid_ed/age_band=25-44/event_year=2016'
}

# Analysis configuration
ANALYSIS_CONFIG = {
    'target_class': 1,              # Class to explain (1 for positive predictions)
    'target_threshold': 0.5,        # Probability threshold for positive predictions
    'top_k_features': 20,           # Number of top features to display
    'min_coverage': 0.01,           # Minimum rule coverage
    'n_permutations': 100,          # Number of permutations for causal analysis
    'random_seed': 1997
}

# Output configuration
OUTPUT_CONFIG = {
    'output_dir': 'ffa_results',
    'save_plots': True,
    'save_results': True,
    'plot_format': 'png',
    'plot_dpi': 300
}

# Create output directory
os.makedirs(OUTPUT_CONFIG['output_dir'], exist_ok=True)

print("âœ“ Configuration loaded")


## 2. Load CatBoost Model from JSON


In [None]:
def load_catboost_json(model_json_path: str) -> Dict[str, Any]:
    """
    Load CatBoost model from JSON file.
    
    Args:
        model_json_path: Path to the CatBoost JSON model file
        
    Returns:
        Dictionary containing model structure
    """
    print(f"Loading CatBoost model from: {model_json_path}")
    
    if DATA_CONFIG['use_s3']:
        # Load from S3
        s3 = boto3.client('s3')
        bucket = DATA_CONFIG['s3_bucket']
        key = model_json_path.replace(f"s3://{bucket}/", "")
        
        # Download to temp file
        import tempfile
        with tempfile.NamedTemporaryFile(mode='w+', suffix='.json', delete=False) as tmp:
            s3.download_fileobj(bucket, key, tmp)
            tmp_path = tmp.name
        
        with open(tmp_path, 'r') as f:
            model_json = json.load(f)
        
        os.unlink(tmp_path)
    else:
        # Load from local file
        if not os.path.exists(model_json_path):
            raise FileNotFoundError(f"Model file not found: {model_json_path}")
        with open(model_json_path, 'r') as f:
            model_json = json.load(f)
    
    print(f"âœ“ Model loaded successfully")
    print(f"  - Keys: {list(model_json.keys())}")
    
    # Validate structure
    required_keys = ['oblivious_trees', 'features_info']
    for key in required_keys:
        if key not in model_json:
            raise ValueError(f"Missing required key: {key}")
    
    print(f"  - Number of trees: {len(model_json.get('oblivious_trees', []))}")
    print(f"  - Features info present: {bool(model_json.get('features_info'))}")
    print(f"  - CTR data present: {bool(model_json.get('ctr_data'))}")
    
    return model_json

# Load the model
model_json = load_catboost_json(MODEL_CONFIG['model_json_path'])


In [None]:
# Inspect model structure
def inspect_model_structure(model_json: Dict[str, Any]):
    """Inspect and print model structure details."""
    print("\n=== Model Structure Inspection ===\n")
    
    # Trees
    trees = model_json.get('oblivious_trees', [])
    print(f"Trees: {len(trees)} total")
    if trees:
        first_tree = trees[0]
        print(f"  - First tree keys: {list(first_tree.keys())}")
        print(f"  - First tree splits: {len(first_tree.get('splits', []))}")
        print(f"  - First tree leaf values: {len(first_tree.get('leaf_values', []))}")
    
    # Features info
    features_info = model_json.get('features_info', {})
    print(f"\nFeatures Info:")
    print(f"  - Float features: {len(features_info.get('float_features', []))}")
    print(f"  - Categorical features: {len(features_info.get('cat_features', []))}")
    
    if features_info.get('float_features'):
        float_feat = features_info['float_features'][0]
        print(f"  - First float feature keys: {list(float_feat.keys())}")
    
    if features_info.get('cat_features'):
        cat_feat = features_info['cat_features'][0]
        print(f"  - First cat feature keys: {list(cat_feat.keys())}")
    
    # CTR data
    ctr_data = model_json.get('ctr_data', {})
    print(f"\nCTR Data:")
    print(f"  - CTR entries: {len(ctr_data)}")
    if ctr_data:
        first_ctr_key = list(ctr_data.keys())[0]
        print(f"  - First CTR key: {first_ctr_key[:100]}...")
        print(f"  - First CTR value keys: {list(ctr_data[first_ctr_key].keys())}")

inspect_model_structure(model_json)


## 3. Extract Feature Information and CTR Mappings


In [None]:
def extract_feature_mappings(model_json: Dict[str, Any]) -> Dict[str, Any]:
    """
    Extract feature name mappings from model JSON.
    
    Returns:
        Dictionary with feature mappings
    """
    features_info = model_json.get('features_info', {})
    
    # Float features
    float_features = features_info.get('float_features', [])
    float_idx_to_name = {}
    float_borders = {}
    
    for feat in float_features:
        feat_idx = feat.get('float_feature_index')
        feat_name = feat.get('feature_name', f'float_feature_{feat_idx}')
        borders = feat.get('borders', [])
        float_idx_to_name[feat_idx] = feat_name
        float_borders[feat_idx] = borders
    
    # Categorical features
    cat_features = features_info.get('cat_features', [])
    cat_idx_to_name = {}
    
    for feat in cat_features:
        feat_idx = feat.get('cat_feature_index')
        feat_name = feat.get('feature_name', f'cat_feature_{feat_idx}')
        cat_idx_to_name[feat_idx] = feat_name
    
    # CTR mappings
    ctr_data = model_json.get('ctr_data', {})
    ctr_mappings = {}
    
    for ctr_key, ctr_value in ctr_data.items():
        try:
            ctr_info = json.loads(ctr_key)
            if 'identifier' in ctr_info:
                for identifier in ctr_info['identifier']:
                    if 'cat_feature_index' in identifier:
                        cat_idx = identifier['cat_feature_index']
                        if cat_idx not in ctr_mappings:
                            ctr_mappings[cat_idx] = {
                                'hash_map': ctr_value.get('hash_map', []),
                                'borders': ctr_value.get('borders', []),
                                'feature_name': cat_idx_to_name.get(cat_idx, f'cat_feature_{cat_idx}')
                            }
        except (json.JSONDecodeError, KeyError):
            continue
    
    mappings = {
        'float_idx_to_name': float_idx_to_name,
        'float_borders': float_borders,
        'cat_idx_to_name': cat_idx_to_name,
        'ctr_mappings': ctr_mappings
    }
    
    print(f"âœ“ Feature mappings extracted:")
    print(f"  - Float features: {len(float_idx_to_name)}")
    print(f"  - Categorical features: {len(cat_idx_to_name)}")
    print(f"  - CTR mappings: {len(ctr_mappings)}")
    
    return mappings

# Extract feature mappings
feature_mappings = extract_feature_mappings(model_json)


In [None]:
# Analyze CTR hash maps if present
if model_json.get('ctr_data'):
    try:
        ctr_analysis = analyze_ctr_hash_maps(model_json['ctr_data'])
        print("\n=== CTR Hash Map Analysis ===")
        print(f"Total entries: {ctr_analysis['total_entries']}")
        print(f"Feature stats: {len(ctr_analysis['feature_stats'])} features analyzed")
    except Exception as e:
        print(f"âš  Could not analyze CTR hash maps: {e}")


## 4. Load Model Info and Feature Importance (if available)


In [None]:
# Load model info JSON if available
model_info = None
feature_importance_df = None

if MODEL_CONFIG.get('model_info_json') and os.path.exists(MODEL_CONFIG['model_info_json']):
    with open(MODEL_CONFIG['model_info_json'], 'r') as f:
        model_info = json.load(f)
    print(f"âœ“ Model info loaded")
    print(f"  - Model type: {model_info.get('model_type')}")
    print(f"  - Age band: {model_info.get('age_band')}")
    print(f"  - Event year: {model_info.get('event_year')}")
    print(f"  - Metrics: {model_info.get('metrics', {})}")

# Load feature importance CSV if available
if MODEL_CONFIG.get('feature_importance_csv') and os.path.exists(MODEL_CONFIG['feature_importance_csv']):
    feature_importance_df = pd.read_csv(MODEL_CONFIG['feature_importance_csv'])
    print(f"\nâœ“ Feature importance loaded: {len(feature_importance_df)} features")
    print(f"\nTop 10 features:")
    print(feature_importance_df.head(10).to_string(index=False))


## 5. Initialize FFA Explainer (if available)


In [None]:
# Initialize FFA Explainer if available
explainer = None

try:
    # Initialize path configuration
    path_config = PathConfig(
        model_path=MODEL_CONFIG['model_json_path'],
        data_dir=DATA_CONFIG.get('test_data_path', ''),
        output_dir=OUTPUT_CONFIG['output_dir'],
        tree_rules_path=None,
        age_band=None
    )
    
    # Initialize analysis configuration
    analysis_config = AnalysisConfig(
        top_k=ANALYSIS_CONFIG['top_k_features'],
        min_coverage=ANALYSIS_CONFIG['min_coverage'],
        significance_threshold=0.05,
        n_permutations=ANALYSIS_CONFIG['n_permutations']
    )
    
    # Initialize explainer
    explainer = CatBoostAXPExplainer(path_config)
    
    # Load model into explainer
    explainer.load_model_json(MODEL_CONFIG['model_json_path'])
    
    print("âœ“ FFA Explainer initialized")
    
    # Validate explainer structure
    if hasattr(explainer, 'feature_names'):
        validation_passed = validate_explainer_structure(explainer)
        if not validation_passed:
            print("\nâš  Warning: Explainer validation failed. Proceed with caution.")
except Exception as e:
    print(f"âš  Could not initialize FFA Explainer: {e}")
    print("Continuing with basic analysis...")


## 6. Load Test Data (if available)


In [None]:
# Load test data if available
X_test = None
y_test = None

if DATA_CONFIG.get('test_data_path') and os.path.exists(DATA_CONFIG['test_data_path']):
    print(f"Loading test data from: {DATA_CONFIG['test_data_path']}")
    
    # Try to load as CSV or Parquet
    if DATA_CONFIG['test_data_path'].endswith('.parquet'):
        test_data = pd.read_parquet(DATA_CONFIG['test_data_path'])
    else:
        test_data = pd.read_csv(DATA_CONFIG['test_data_path'])
    
    # Separate features and target
    if 'target' in test_data.columns:
        y_test = test_data['target'].values
        X_test = test_data.drop('target', axis=1)
    elif 'is_target_case' in test_data.columns:
        y_test = test_data['is_target_case'].values
        X_test = test_data.drop('is_target_case', axis=1)
    else:
        print("âš  No target column found. Using all columns as features.")
        X_test = test_data
        y_test = None
    
    print(f"âœ“ Test data loaded: {X_test.shape[0]} samples, {X_test.shape[1]} features")
    if y_test is not None:
        print(f"  - Target distribution: {Counter(y_test)}")
else:
    print("âš  Test data not available. Skipping data-dependent analyses.")


me

In [None]:
explanations = []

if X_test is not None and y_test is not None and explainer is not None:
    print("\n=== Generating Anchored Explanations (AXP) ===\n")
    
    try:
        # Generate explanations for target class
        explanations = explainer.explain_dataset(
            X_test,
            y_test,
            target_class=ANALYSIS_CONFIG['target_class'],
            strategy='AXP'
        )
        
        print(f"âœ“ Generated {len(explanations)} explanations")
        
        # Analyze unmatched instances
        if hasattr(explainer, 'unmatched'):
            unmatched_count = len(explainer.unmatched)
            print(f"  - Matched instances: {len(explanations) - unmatched_count}")
            print(f"  - Unmatched instances: {unmatched_count}")
            if unmatched_count > 0:
                unmatched_pct = (unmatched_count / len(explanations)) * 100
                print(f"  - Unmatched percentage: {unmatched_pct:.2f}%")
    except Exception as e:
        print(f"âš  Error generating explanations: {e}")
        explanations = []
else:
    print("âš  Test data or explainer not available. Skipping AXP generation.")


## 8. Calculate Feature Importance


In [None]:
def calculate_feature_importance_from_explanations(explanations: List[Dict], 
                                                   top_k: int = 20) -> pd.DataFrame:
    """
    Calculate feature importance from explanations.
    
    Args:
        explanations: List of explanation dictionaries
        top_k: Number of top features to return
        
    Returns:
        DataFrame with feature importance scores
    """
    feature_counts = Counter()
    
    for explanation in explanations:
        if 'conditions' in explanation:
            for condition in explanation['conditions']:
                if 'feature_name' in condition:
                    feature_counts[condition['feature_name']] += 1
                elif 'feature' in condition:
                    feature_counts[condition['feature']] += 1
    
    # Convert to DataFrame
    if not feature_counts:
        return pd.DataFrame(columns=['feature', 'count', 'importance'])
    
    importance_df = pd.DataFrame([
        {'feature': feat, 'count': count, 'importance': count / len(explanations)}
        for feat, count in feature_counts.most_common(top_k)
    ])
    
    return importance_df

# Calculate feature importance
if explanations:
    feature_importance_axp = calculate_feature_importance_from_explanations(
        explanations,
        top_k=ANALYSIS_CONFIG['top_k_features']
    )
    
    print("\n=== Feature Importance from AXP Explanations ===\n")
    print(feature_importance_axp.to_string(index=False))
else:
    print("âš  No explanations available. Using model's built-in feature importance.")
    if feature_importance_df is not None:
        feature_importance_axp = feature_importance_df.head(ANALYSIS_CONFIG['top_k_features']).copy()
        if 'importance' not in feature_importance_axp.columns:
            # Use first numeric column as importance
            numeric_cols = feature_importance_axp.select_dtypes(include=[np.number]).columns
            if len(numeric_cols) > 0:
                feature_importance_axp['importance'] = feature_importance_axp[numeric_cols[0]]
    else:
        feature_importance_axp = None


## 9. Causal Analysis


In [None]:
def calculate_causal_importance(X_test: pd.DataFrame,
                               explanations: List[Dict],
                               model: CatBoostClassifier,
                               n_permutations: int = 100) -> pd.DataFrame:
    """
    Calculate causal importance by measuring prediction changes when features are modified.
    
    Args:
        X_test: Test features
        explanations: List of explanations
        model: Trained CatBoost model
        n_permutations: Number of permutations for causal analysis
        
    Returns:
        DataFrame with causal importance scores
    """
    print("\n=== Calculating Causal Importance ===\n")
    
    feature_causal_scores = defaultdict(list)
    
    # Get baseline predictions
    baseline_probs = model.predict_proba(X_test)[:, ANALYSIS_CONFIG['target_class']]
    
    # For each feature mentioned in explanations
    all_features = set()
    for explanation in explanations:
        if 'conditions' in explanation:
            for condition in explanation['conditions']:
                feat_name = condition.get('feature_name') or condition.get('feature')
                if feat_name:
                    all_features.add(feat_name)
    
    print(f"Analyzing {len(all_features)} features...")
    
    for feat_name in list(all_features)[:ANALYSIS_CONFIG['top_k_features']]:
        if feat_name not in X_test.columns:
            continue
        
        # Create counterfactual by modifying this feature
        X_counterfactual = X_test.copy()
        
        # For numerical features: add/subtract standard deviation
        if X_test[feat_name].dtype in ['float64', 'int64']:
            std_dev = X_test[feat_name].std()
            if std_dev > 0:
                X_counterfactual[feat_name] = X_test[feat_name] + std_dev
            else:
                X_counterfactual[feat_name] = X_test[feat_name] + 1
        # For binary features: flip values
        elif X_test[feat_name].dtype == 'bool' or X_test[feat_name].nunique() == 2:
            X_counterfactual[feat_name] = 1 - X_test[feat_name]
        
        # Calculate new predictions
        counterfactual_probs = model.predict_proba(X_counterfactual)[:, ANALYSIS_CONFIG['target_class']]
        
        # Calculate average change in probability
        prob_change = np.mean(np.abs(counterfactual_probs - baseline_probs))
        feature_causal_scores[feat_name].append(prob_change)
    
    # Aggregate scores
    if not feature_causal_scores:
        return pd.DataFrame(columns=['feature', 'causal_importance', 'std'])
    
    causal_importance_df = pd.DataFrame([
        {
            'feature': feat,
            'causal_importance': np.mean(scores),
            'std': np.std(scores)
        }
        for feat, scores in feature_causal_scores.items()
    ]).sort_values('causal_importance', ascending=False)
    
    return causal_importance_df

# Calculate causal importance if model and data are available
causal_importance_df = None

if X_test is not None and explanations and MODEL_CONFIG.get('model_cbm_path'):
    # Load CatBoost model
    if os.path.exists(MODEL_CONFIG['model_cbm_path']):
        try:
            model = CatBoostClassifier()
            model.load_model(MODEL_CONFIG['model_cbm_path'])
            
            causal_importance_df = calculate_causal_importance(
                X_test,
                explanations,
                model,
                n_permutations=ANALYSIS_CONFIG['n_permutations']
            )
            
            print("\n=== Causal Importance Results ===\n")
            print(causal_importance_df.head(20).to_string(index=False))
        except Exception as e:
            print(f"âš  Error in causal analysis: {e}")
    else:
        print("âš  Model CBM file not found. Skipping causal analysis.")
else:
    print("âš  Test data or explanations not available. Skipping causal analysis.")


## 10. Visualizations


In [None]:
# Feature Importance Visualization
if feature_importance_axp is not None and len(feature_importance_axp) > 0:
    fig, ax = plt.subplots(figsize=(12, 8))
    
    top_features = feature_importance_axp.head(ANALYSIS_CONFIG['top_k_features'])
    
    ax.barh(range(len(top_features)), top_features['importance'].values)
    ax.set_yticks(range(len(top_features)))
    ax.set_yticklabels(top_features['feature'].values)
    ax.set_xlabel('Importance Score', fontsize=12)
    ax.set_ylabel('Feature', fontsize=12)
    ax.set_title('Feature Importance from AXP Explanations', fontsize=14, fontweight='bold')
    ax.invert_yaxis()
    
    plt.tight_layout()
    
    if OUTPUT_CONFIG['save_plots']:
        plot_path = os.path.join(OUTPUT_CONFIG['output_dir'], 'feature_importance_axp.png')
        plt.savefig(plot_path, dpi=OUTPUT_CONFIG['plot_dpi'], bbox_inches='tight')
        print(f"âœ“ Saved plot to: {plot_path}")
    
    plt.show()
else:
    print("âš  No feature importance data available for visualization.")


In [None]:
# Causal Importance Visualization
if causal_importance_df is not None and len(causal_importance_df) > 0:
    fig, ax = plt.subplots(figsize=(12, 8))
    
    top_causal = causal_importance_df.head(ANALYSIS_CONFIG['top_k_features'])
    
    x_pos = range(len(top_causal))
    ax.barh(x_pos, top_causal['causal_importance'].values, 
            xerr=top_causal['std'].values, capsize=5)
    ax.set_yticks(x_pos)
    ax.set_yticklabels(top_causal['feature'].values)
    ax.set_xlabel('Causal Importance Score', fontsize=12)
    ax.set_ylabel('Feature', fontsize=12)
    ax.set_title('Causal Feature Importance', fontsize=14, fontweight='bold')
    ax.invert_yaxis()
    
    plt.tight_layout()
    
    if OUTPUT_CONFIG['save_plots']:
        plot_path = os.path.join(OUTPUT_CONFIG['output_dir'], 'causal_importance.png')
        plt.savefig(plot_path, dpi=OUTPUT_CONFIG['plot_dpi'], bbox_inches='tight')
        print(f"âœ“ Saved plot to: {plot_path}")
    
    plt.show()
else:
    print("âš  No causal importance data available for visualization.")


## 11. Save Results


In [None]:
if OUTPUT_CONFIG['save_results']:
    print("\n=== Saving Results ===\n")
    
    # Save feature importance
    if feature_importance_axp is not None:
        output_path = os.path.join(OUTPUT_CONFIG['output_dir'], 'feature_importance_axp.csv')
        feature_importance_axp.to_csv(output_path, index=False)
        print(f"âœ“ Saved feature importance to: {output_path}")
    
    # Save causal importance
    if causal_importance_df is not None:
        output_path = os.path.join(OUTPUT_CONFIG['output_dir'], 'causal_importance.csv')
        causal_importance_df.to_csv(output_path, index=False)
        print(f"âœ“ Saved causal importance to: {output_path}")
    
    # Save explanations summary
    if explanations:
        explanations_summary = pd.DataFrame([
            {
                'instance_id': i,
                'num_conditions': len(exp.get('conditions', [])),
                'prediction': exp.get('prediction', 'N/A'),
                'rule_id': exp.get('rule_id', 'N/A')
            }
            for i, exp in enumerate(explanations)
        ])
        
        output_path = os.path.join(OUTPUT_CONFIG['output_dir'], 'explanations_summary.csv')
        explanations_summary.to_csv(output_path, index=False)
        print(f"âœ“ Saved explanations summary to: {output_path}")
    
    # Save model info
    if model_info:
        output_path = os.path.join(OUTPUT_CONFIG['output_dir'], 'model_info.json')
        with open(output_path, 'w') as f:
            json.dump(model_info, f, indent=2)
        print(f"âœ“ Saved model info to: {output_path}")
    
    print(f"\nâœ“ All results saved to: {OUTPUT_CONFIG['output_dir']}")
else:
    print("âš  Results saving disabled in configuration.")


## 12. Summary Report


In [None]:
print("\n" + "="*80)
print("FORMAL FEATURE ATTRIBUTION ANALYSIS SUMMARY")
print("="*80)

if model_info:
    print(f"\nModel Information:")
    print(f"  - Type: {model_info.get('model_type', 'N/A')}")
    print(f"  - Age Band: {model_info.get('age_band', 'N/A')}")
    print(f"  - Event Year: {model_info.get('event_year', 'N/A')}")
    if 'metrics' in model_info:
        print(f"  - Metrics: {model_info['metrics']}")

print(f"\nModel Structure:")
print(f"  - Number of trees: {len(model_json.get('oblivious_trees', []))}")
print(f"  - Float features: {len(feature_mappings['float_idx_to_name'])}")
print(f"  - Categorical features: {len(feature_mappings['cat_idx_to_name'])}")
print(f"  - CTR mappings: {len(feature_mappings['ctr_mappings'])}")

if X_test is not None:
    print(f"\nTest Data:")
    print(f"  - Samples: {X_test.shape[0]}")
    print(f"  - Features: {X_test.shape[1]}")
    if y_test is not None:
        print(f"  - Target distribution: {Counter(y_test)}")

if explanations:
    print(f"\nExplanations:")
    print(f"  - Total explanations: {len(explanations)}")
    if hasattr(explainer, 'unmatched') and explainer:
        print(f"  - Unmatched instances: {len(explainer.unmatched)}")

if feature_importance_axp is not None and len(feature_importance_axp) > 0:
    print(f"\nTop 5 Features (AXP Importance):")
    for idx, row in feature_importance_axp.head(5).iterrows():
        print(f"  {idx+1}. {row['feature']}: {row['importance']:.4f}")

if causal_importance_df is not None and len(causal_importance_df) > 0:
    print(f"\nTop 5 Features (Causal Importance):")
    for idx, row in causal_importance_df.head(5).iterrows():
        print(f"  {idx+1}. {row['feature']}: {row['causal_importance']:.4f}")

print("\n" + "="*80)
print("Analysis Complete!")
print("="*80)
