In [33]:
import shap
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import pickle

# Load trained model
model = pickle.load(open("fraud_model.pkl", "rb"))

# Load test dataset
X_test = pd.read_csv("Processed_test_data.csv")

# Initialize SHAP Explainer
explainer = shap.TreeExplainer(model)

def generate_shap_image(index, save_path="shap_explanation.png"):
    """Generate and save SHAP waterfall plot for a given index."""
    try:
        test_sample = X_test.iloc[[index]]  # Ensure DataFrame format

        # Compute SHAP values
        shap_values = explainer.shap_values(test_sample)

        # Handle classification models properly
        if isinstance(shap_values, list):  
            if len(shap_values) > 1:
                print("Multi-class classification detected. Using first class for visualization.")
                shap_values = shap_values[0]  # Use first class (modify as needed)
            else:
                shap_values = shap_values[0]  # Extract if binary classification

        # Convert to numpy array if not already
        shap_values = np.array(shap_values)
        
        # Ensure correct expected_value format
        base_value = explainer.expected_value
        if isinstance(base_value, np.ndarray):  
            base_value = base_value[0]  # Take first expected value if classification

        # Ensure values are 1D
        shap_values_1d = shap_values[0]  # Extract values for single instance

        # Create SHAP Explanation object
        shap_exp = shap.Explanation(
            values=shap_values_1d,
            base_values=base_value,
            feature_names=X_test.columns
        )

        # Generate SHAP waterfall plot
        fig, ax = plt.subplots(figsize=(10, 6))  # Explicitly create a figure & axis
        shap.waterfall_plot(shap_exp, show=False)  # Disable automatic display

        # Save figure explicitly
        fig.savefig(save_path, bbox_inches="tight", dpi=300)  
        plt.close(fig)  # Close figure to free memory
        
        print(f"SHAP explanation saved at: {save_path}")
        return save_path

    except Exception as e:
        return f"Error: {str(e)}"

# Example usage
index = int(input("Enter the transaction index: "))  # Ensure integer input
shap_image_path = generate_shap_image(index)




    E.g. tree_method = "hist", device = "cuda"



Enter the transaction index: 5000
SHAP explanation saved at: shap_explanation.png


In [15]:
#!pip install google-generativeai


In [34]:
import google.generativeai as genai
import base64

# Initialize Gemini API (Replace with your actual API Key)
genai.configure(api_key="")

def analyze_shap_with_gemini(image_path):
    """Send SHAP image to Gemini 2.0 Flash Experimental and get fraud explanation."""
    
    # Read the SHAP image
    with open(image_path, "rb") as image_file:
        image_bytes = image_file.read()

    # Define the prompt for explanation
    prompt = """
    This is a SHAP force plot explaining a model's fraud classification decision from the IEEE-CIS Fraud Detection dataset on Kaggle.
    - Features in **red** increase the probability of fraud.
    - Features in **blue** decrease the probability of fraud.
    Analyze this image and explain why the model classified the transaction as fraud or not fraud.
    """

    # Initialize the correct Gemini model
    model = genai.GenerativeModel("gemini-2.0-flash-exp")

    # Send request to Gemini 2.0 Flash Experimental
    response = model.generate_content(
        [prompt, {"mime_type": "image/png", "data": image_bytes}]
    )

    return response.text

# Example usage
shap_image_path = "shap_explanation.png"  # Ensure this file exists
explanation = analyze_shap_with_gemini(shap_image_path)
print("Gemini Explanation:\n", explanation)


Gemini Explanation:
 Based on the SHAP force plot, the model classified the transaction as **not fraud**. Here's why:

*   **Overall Prediction:** The final value `f(x) = -4.929` is less than the base value `E[f(x)] = -3.522`. In the context of fraud detection, a negative SHAP value typically indicates a lower probability of fraud (and therefore, a classification leaning towards "not fraud").

*   **Feature Contributions:**
    *   Several features pushed the prediction towards *lower* fraud probability (blue features):
        *   `C5`: Contributed the most, decreasing the log odds of fraud by 0.26.
        *   `C13`: Decreased the log odds of fraud by 0.16.
        *   `M4`: Decreased the log odds of fraud by 0.15.
        *   `C14`: Decreased the log odds of fraud by 0.11.
        *   `PCA_V_4`: Decreased the log odds of fraud by 0.11.
        *   `card6`: Decreased the log odds of fraud by 0.08.
        *   `PCA_V_5`: Decreased the log odds of fraud by 0.07.
        *   `124 other 