In [6]:
import shap
import matplotlib.pyplot as plt
import pickle
import os
import pandas as pd
import numpy as np
import logging
import sys

In [7]:
# Configure logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

def load_model_and_data(model_path, data_path):
    """Load the best model and preprocessed data."""
    try:
        with open(model_path, 'rb') as file:
            model = pickle.load(file)
        logger.info(f"Model loaded successfully from {model_path}")
        
        data = pd.read_csv(data_path)
        logger.info(f"Data loaded successfully from {data_path}")
        return model, data
    except FileNotFoundError as e:
        logger.error(f"File not found: {e}")
        sys.exit(1)
    except Exception as e:
        logger.error(f"Error loading model or data: {e}")
        sys.exit(1)

In [8]:
def prepare_data(data):
    """Prepare features and target for SHAP analysis."""
    try:
        X = data.drop(columns=['class'])
        y = data['class']
        logger.info("Data prepared for SHAP analysis")
        return X, y
    except Exception as e:
        logger.error(f"Error preparing data: {e}")
        sys.exit(1)

def generate_shap_plots(model, X, output_dir='plots'):
    """Generate and save SHAP summary and force plots."""
    try:
        # Ensure output directory exists
        os.makedirs(output_dir, exist_ok=True)
        
        # Initialize SHAP explainer
        explainer = shap.TreeExplainer(model)
        shap_values = explainer.shap_values(X)
        
        # Summary Plot
        plt.figure(figsize=(10, 6))
        shap.summary_plot(shap_values[1], X, feature_names=X.columns, show=False)
        plt.title("SHAP Summary Plot for Random Forest Model")
        summary_plot_path = os.path.join(output_dir, 'shap_summary_plot.png')
        plt.savefig(summary_plot_path)
        plt.close()
        logger.info(f"SHAP summary plot saved to {summary_plot_path}")
        
        # Force Plot for a single prediction
        instance_to_explain = X.iloc[0]
        shap_value_instance = explainer.shap_values(instance_to_explain)
        plt.figure()
        shap.force_plot(explainer.expected_value[1], shap_value_instance[1], instance_to_explain, matplotlib=True, show=False)
        plt.title("SHAP Force Plot for a Single Prediction")
        force_plot_path = os.path.join(output_dir, 'shap_force_plot.png')
        plt.savefig(force_plot_path)
        plt.close()
        logger.info(f"SHAP force plot saved to {force_plot_path}")
        
        return shap_values
    except Exception as e:
        logger.error(f"Error generating SHAP plots: {e}")
        sys.exit(1)

In [9]:

def interpret_shap_results(shap_values, X):
    """Interpret SHAP results for key drivers of fraud."""
    try:
        feature_importance = pd.DataFrame({
            'feature': X.columns,
            'mean_abs_shap': np.abs(shap_values[1]).mean(axis=0)
        })
        feature_importance = feature_importance.sort_values(by='mean_abs_shap', ascending=False)
        
        interpretation = (
            "# SHAP Analysis Interpretation\n\n"
            "## Key Drivers of Fraud\n"
            "The SHAP summary plot highlights the most influential features driving fraud predictions:\n"
        )
        for i, row in feature_importance.head(5).iterrows():
            interpretation += f"- **{row['feature']}**: This feature has a significant impact on fraud prediction, "
            interpretation += f"with a mean absolute SHAP value of {row['mean_abs_shap']:.4f}. "
            interpretation += "Higher or lower values of this feature push the model toward or away from predicting fraud, "
            interpretation += "depending on the feature's relationship with the target variable.\n"
        
        interpretation += (
            "\n## Local Interpretation\n"
            "The SHAP force plot for a single instance shows how specific feature values contribute to the model's prediction for that instance. "
            "Features with positive SHAP values increase the likelihood of fraud, while those with negative values decrease it.\n"
        )
        
        output_path = os.path.join('reports', 'shap_interpretation.md')
        os.makedirs('reports', exist_ok=True)
        with open(output_path, 'w') as f:
            f.write(interpretation)
        logger.info(f"SHAP interpretation saved to {output_path}")
        
        return feature_importance
    except Exception as e:
        logger.error(f"Error interpreting SHAP results: {e}")
        sys.exit(1)

In [11]:
if __name__ == "__main__":
    # Configuration
    model_path = os.path.join('src', 'best_fraud_model.pkl')
    data_path = os.path.join('data', 'processed_fraud_data.csv')
    
    # Log current working directory and files
    logger.info(f"Current working directory: {os.getcwd()}")
    logger.info(f"Files in directory: {os.listdir()}")
    
    # Load model and data
    best_model, data = load_model_and_data(model_path, data_path)
    
    # Prepare data
    X, y = prepare_data(data)
    
    # Generate SHAP plots
    shap_values = generate_shap_plots(best_model, X)
    
    # Interpret results
    feature_importance = interpret_shap_results(shap_values, X)
    
    logger.info("SHAP analysis completed successfully.")

2025-08-21 16:45:34,743 - INFO - Current working directory: /home/g/ALL/10/week 12/Improved-detection-of-fraud-cases-for-e-commerce-and-bank-transactions/notebooks
2025-08-21 16:45:34,745 - INFO - Files in directory: ['Explainability.ipynb', 'shap_analysis.py', 'all.py', 'data_analysis_and_preprocessing.ipynb', 'Model_Building_and_Training.ipynb', 'main.py']
2025-08-21 16:45:34,746 - ERROR - File not found: [Errno 2] No such file or directory: 'src/best_fraud_model.pkl'


SystemExit: 1