In [17]:
# Add project root to sys.path (only once at the top)
import sys
from pathlib import Path

project_root = Path.cwd().parent  # one level up from notebooks/
if str(project_root) not in sys.path:
    sys.path.insert(0, str(project_root))
    


In [18]:
# vehicle_insurance_fraud_detection/plots.py

import plotly.express as px
import plotly.graph_objects as go
import pandas as pd


def plot_class_distribution(y: pd.Series, title="Class Distribution"):
    """Bar plot of fraud vs non-fraud counts."""
    counts = y.value_counts().rename({0: "Non-Fraud", 1: "Fraud"})
    fig = px.bar(
        x=counts.index,
        y=counts.values,
        labels={"x": "Class", "y": "Count"},
        title=title,
        color=counts.index,
        color_discrete_map={"Non-Fraud": "green", "Fraud": "red"},
    )
    return fig


def plot_feature_importance(importance_df: pd.DataFrame, top_n=20):
    """
    Plots top N features by importance.
    Expects a DataFrame with ['Feature', 'Importance'].
    """
    df = importance_df.sort_values(by="Importance", ascending=False).head(top_n)

    fig = px.bar(
        df,
        x="Importance",
        y="Feature",
        orientation="h",
        title=f"Top {top_n} Important Features",
        color="Importance",
        color_continuous_scale="Bluered"
    )
    fig.update_layout(yaxis=dict(autorange="reversed"))
    return fig


def plot_confusion_matrix(cm, labels=["Non-Fraud", "Fraud"]):
    """Plot a confusion matrix heatmap using Plotly."""
    fig = go.Figure(
        data=go.Heatmap(
            z=cm,
            x=labels,
            y=labels,
            hoverongaps=False,
            colorscale="Blues",
            text=cm,
            texttemplate="%{text}",
        )
    )
    fig.update_layout(
        title="Confusion Matrix",
        xaxis_title="Predicted Label",
        yaxis_title="True Label"
    )
    return fig


def plot_roc_curve(fpr, tpr, auc_score):
    """Plot ROC curve."""
    fig = go.Figure()
    fig.add_trace(go.Scatter(x=fpr, y=tpr, mode="lines", name="ROC Curve"))
    fig.add_trace(go.Scatter(x=[0, 1], y=[0, 1], mode="lines", name="Random", line=dict(dash="dash")))

    fig.update_layout(
        title=f"ROC Curve (AUC = {auc_score:.2f})",
        xaxis_title="False Positive Rate",
        yaxis_title="True Positive Rate",
        showlegend=True
    )
    return fig


In [19]:
from vehicle_insurance_fraud_detection.plots import (
    plot_class_distribution,
    plot_feature_importance,
    plot_confusion_matrix,
    plot_roc_curve
)

import pandas as pd
import numpy as np
from sklearn.metrics import confusion_matrix, roc_curve, roc_auc_score

# Simulated binary labels and predictions
y_true = pd.Series([0]*90 + [1]*10)
y_pred = [0]*85 + [1]*5 + [0]*5 + [1]*5  # simulate a model prediction
y_proba = np.linspace(0, 1, 100)  # fake probabilities just for demo

# 1. Class Distribution
fig1 = plot_class_distribution(y_true)
fig1.show()

# 2. Feature Importance (fake example)
importance_df = pd.DataFrame({
    "Feature": [f"feature_{i}" for i in range(10)],
    "Importance": np.random.rand(10)
})
fig2 = plot_feature_importance(importance_df)
fig2.show()

# 3. Confusion Matrix
cm = confusion_matrix(y_true, y_pred)
fig3 = plot_confusion_matrix(cm)
fig3.show()

# 4. ROC Curve
fpr, tpr, _ = roc_curve(y_true, y_proba)
auc_score = roc_auc_score(y_true, y_proba)
fig4 = plot_roc_curve(fpr, tpr, auc_score)
fig4.show()
