# Double Segmentation Analysis Example
This notebook demonstrates how to perform double segmentation analysis with interactive Plotly visualizations using the `tab-right` package, using dummy data.

In [None]:
# Install dependencies if running in Colab or a fresh environment
# !pip install plotly pandas scikit-learn tab-right numpy

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import numpy as np
import plotly.io as pio
from sklearn.datasets import fetch_openml

# Import required modules from tab_right

pio.renderers.default = "notebook"

## Load Example Dataset & Create Dummy Data
We'll use the UCI Adult dataset for features and generate dummy target and prediction columns.

In [None]:
data = fetch_openml("adult", version=2, as_frame=True)
df = data.frame.copy()
df = df.sample(n=5000, random_state=42).reset_index(drop=True)  # Use a sample
df = df.dropna()  # Drop missing for simplicity

# Create dummy target and prediction columns
np.random.seed(42)
df["target"] = np.random.randint(0, 2, size=len(df))
df["prediction"] = np.random.rand(len(df))  # Dummy probability prediction

# Select relevant columns for analysis
df_analysis = df[
    ["age", "education-num", "hours-per-week", "target", "prediction"]
].copy()  # Add more features if needed
df_analysis.head()

### Double Feature Segmentation
Analyze how a dummy metric varies across segments defined by two features.

In [None]:
# Analyze categorical features
import plotly.express as px
from sklearn.metrics import accuracy_score, roc_auc_score  # Import accuracy_score


def safe_roc_auc_score(y_true, y_pred):
    """Calculate ROC AUC score with error handling for single-class data."""
    try:
        if len(set(y_true)) < 2:
            return None  # Not enough classes for ROC AUC
        return roc_auc_score(y_true, y_pred)
    except ValueError:
        return None

In [None]:
# Select some categorical features to analyze
cat_features_to_analyze = ["workclass", "education", "marital-status", "occupation", "relationship"]

for cat_feature in cat_features_to_analyze:
    # Group by the categorical feature
    grouped = df_analysis.groupby(cat_feature).agg({
        "target": ["count", "mean"],
        # Use accuracy_score correctly
        "pred_class": lambda x: accuracy_score(df_analysis.loc[x.index, "target"], x),
        "pred_prob_1": lambda x: safe_roc_auc_score(df_analysis.loc[x.index, "target"], x),
    })

    # Flatten the column hierarchy
    grouped.columns = [f"{col[0]}_{col[1]}" if col[1] else col[0] for col in grouped.columns]
    grouped = grouped.rename(columns={"pred_class_<lambda>": "accuracy", "pred_prob_1_<lambda>": "auc"})
    grouped = grouped.reset_index()

    # Filter out categories with no AUC score for better visualization
    grouped_filtered = grouped[grouped["auc"].notna()]

    # Plot accuracy by category
    if not grouped_filtered.empty:
        fig = px.bar(
            grouped_filtered,
            x=cat_feature,
            y="accuracy",
            color="target_count",
            hover_data=["target_mean", "auc", "target_count"],
            labels={
                "accuracy": "Accuracy",
                "target_count": "Sample Count",
                "target_mean": "Positive Rate",
                "auc": "AUC",
            },
            title=f"Model Performance by {cat_feature}",
        )
        fig.update_layout(xaxis_tickangle=-45)
        fig.show()

End of demonstration.