In [None]:
from __future__ import annotations
from typing import Sequence, Tuple, Dict, Any
import numpy as np
import pandas as pd
import plotly.express as px

from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression, LinearRegression
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.neural_network import MLPClassifier

def run_bias_curve(
    df: pd.DataFrame,
    percent_points: int = 20,
    classifiers: Sequence[str] = ("SVC", "LR", "LinearRegression",
                                  "RandomForest", "GradientBoost", "MLP"),
    model_kwargs: Dict[str, Dict[str, Any]] | None = None,
    random_state: int | None = 0,
    verbose: bool = False,
):
    """
    Compute bias vs. sampling-percent curves for a set of models.

    DataFrame requirements:
      df columns = ['dataSet','gender','sentiment','embedding']
      - 'dataSet' in {'train','test'}
      - 'gender' in {'boys','girls'}
      - 'sentiment' in {'positive','negative'}
      - 'embedding' is an array-like vector (same length per row)

    Returns
    -------
    bias_df : pd.DataFrame columns ['percent','bias','classifier']
    fig     : plotly Figure
    """
    if model_kwargs is None:
        model_kwargs = {}

    # --- helpers --------------------------------------------------------------
    def _to_X_y(sliced_df: pd.DataFrame) -> Tuple[np.ndarray, np.ndarray]:
        X = np.stack(sliced_df["embedding"].values)
        # map -> numpy to avoid FutureWarning
        y = sliced_df["sentiment"].map({"positive": 1, "negative": 0}).to_numpy(np.int8)
        return X, y

    def _logistic(x: np.ndarray) -> np.ndarray:
        z = np.clip(x, -50, 50)
        return 1.0 / (1.0 + np.exp(-z))

    def _minmax01(x: np.ndarray) -> np.ndarray:
        mn, mx = np.min(x), np.max(x)
        if mx == mn:
            return np.full_like(x, 0.5, dtype=float)
        return (x - mn) / (mx - mn)

    def _get_model(name: str):
        key = name.lower()
        if key in {"lr", "logreg", "logistic", "logisticregression"}:
            kw = {**model_kwargs.get("LR", {}), **model_kwargs.get("LogisticRegression", {})}
            return LogisticRegression(max_iter=200, random_state=random_state, **kw)
        if key == "svc":
            # default probability=False for speed; user can override via model_kwargs["SVC"]["probability"]=True
            default = {"probability": False}
            kw = {**default, **model_kwargs.get("SVC", {})}
            return SVC(random_state=random_state, **kw)
        if key in {"rf", "randomforest"}:
            default = {"n_jobs": -1}
            kw = {**default, **model_kwargs.get("RandomForest", {})}
            return RandomForestClassifier(random_state=random_state, **kw)
        if key in {"gb", "gradientboost", "gradientboosting"}:
            return GradientBoostingClassifier(random_state=random_state, **model_kwargs.get("GradientBoost", {}))
        if key in {"mlp", "mlpclassifier"}:
            default = {"max_iter": 200}
            kw = {**default, **model_kwargs.get("MLP", {})}
            return MLPClassifier(random_state=random_state, **kw)
        if key in {"linear", "linearregression"}:
            return LinearRegression(**model_kwargs.get("LinearRegression", {}))
        raise ValueError(f"Unknown classifier '{name}'.")

    def _scores_from_model(model, X_train, y_train, X_test) -> np.ndarray:
        """
        Try predict_proba -> decision_function (logistic) -> predict (minmax+clip).
        Returns scores in [0,1] aligned with 'positive' class.
        """
        model.fit(X_train, y_train)

        if hasattr(model, "predict_proba"):
            probs = model.predict_proba(X_test)
            if probs.ndim == 2 and probs.shape[1] >= 2:
                return probs[:, 1]

        if hasattr(model, "decision_function"):
            df_out = model.decision_function(X_test)
            if np.ndim(df_out) == 1:
                return _logistic(df_out)
            return _logistic(df_out[:, -1])

        preds = model.predict(X_test).astype(float)
        if np.any((preds < 0) | (preds > 1)):
            preds = _minmax01(preds)
        return np.clip(preds, 0.0, 1.0)

    # --- data splits ----------------------------------------------------------
    train_data = df[df["dataSet"] == "train"].copy()
    test_data_master = df[df["dataSet"] == "test"].copy()

    # confirm all four cells exist in TRAIN
    counts = (
        train_data
        .groupby(["gender", "sentiment"])
        .size()
        .unstack(fill_value=0)
    )
    for g in ("boys", "girls"):
        if g not in counts.index:
            counts.loc[g] = 0
    for s in ("positive", "negative"):
        if s not in counts.columns:
            counts[s] = 0
    counts = (
        counts.sort_index()
              .reindex(index=["boys", "girls"], columns=["positive", "negative"])
              .fillna(0)
    )

    min_per_cell = int(counts.min().min()) if counts.size else 0
    if verbose:
        print("Per-cell TRAIN counts:\n", counts)
        print("min_per_cell:", min_per_cell)
    if min_per_cell == 0:
        raise ValueError("Training data must include all four (gender, sentiment) cells to sample.")

    rng = np.random.default_rng(random_state)

    def sample_training_data(train_df: pd.DataFrame, pct: float) -> pd.DataFrame:
        """
        boys:  positive = p, negative = 1-p
        girls: positive = 1-p, negative = p
        """
        n_pos_boys = int(round(pct * min_per_cell))
        n_neg_boys = min_per_cell - n_pos_boys
        n_pos_girls = n_neg_boys
        n_neg_girls = n_pos_boys

        def pick(gender, sentiment, n):
            pool = train_df[(train_df.gender == gender) & (train_df.sentiment == sentiment)]
            return pool.sample(n=n, replace=False, random_state=int(rng.integers(0, 2**31 - 1)))

        return pd.concat(
            [
                pick("boys", "positive", n_pos_boys),
                pick("boys", "negative", n_neg_boys),
                pick("girls", "positive", n_pos_girls),
                pick("girls", "negative", n_neg_girls),
            ],
            ignore_index=True,
        )

    # Precompute fixed TEST arrays & boolean masks to avoid per-iter DataFrame work
    test_X, test_y = _to_X_y(test_data_master)  # y unused for scoring but kept for completeness
    t_gender = test_data_master["gender"].to_numpy()
    t_sent   = test_data_master["sentiment"].to_numpy()

    mask_neg_boys  = (t_sent == "negative") & (t_gender == "boys")
    mask_neg_girls = (t_sent == "negative") & (t_gender == "girls")
    mask_pos_boys  = (t_sent == "positive") & (t_gender == "boys")
    mask_pos_girls = (t_sent == "positive") & (t_gender == "girls")

    def _mean_or_zero(arr: np.ndarray, mask: np.ndarray) -> float:
        if not np.any(mask):
            return 0.0
        return float(arr[mask].mean())

    # --- main loop ------------------------------------------------------------
    bias_rows = []
    percents = np.linspace(0.0, 1.0, percent_points)

    for pct in percents:
        train_sample = sample_training_data(train_data, float(pct))
        train_X, train_y = _to_X_y(train_sample)

        for name in classifiers:
            model = _get_model(name)
            scores = _scores_from_model(model, train_X, train_y, test_X)

            neg_boys  = _mean_or_zero(scores, mask_neg_boys)
            neg_girls = _mean_or_zero(scores, mask_neg_girls)
            pos_boys  = _mean_or_zero(scores, mask_pos_boys)
            pos_girls = _mean_or_zero(scores, mask_pos_girls)

            num_bias = (neg_boys + pos_boys) - (neg_girls + pos_girls)
            bias_rows.append({"percent": float(pct), "bias": num_bias, "classifier": name})

        if verbose:
            print(f"pct={pct:.2f} done")

    bias_df = pd.DataFrame(bias_rows)
    fig = px.line(
        bias_df, x="percent", y="bias", color="classifier",
        title="Bias vs. Sampling Percent across Models (boys: p pos; girls: 1-p)"
    )
    return bias_df, fig

# --- usage -------------------------------------------------------------------
gender_bias_df = pd.read_pickle("data_set_with_embeddings.pickle").dropna()
bias_df, fig = run_bias_curve(
    gender_bias_df,
    percent_points=21,
    classifiers=("SVC","LR","LinearRegression","RandomForest","GradientBoost","MLP"),
    model_kwargs={
        "SVC": {"C": 1.0, "kernel": "rbf"},                   # add "probability": True only if you need it
        "RandomForest": {"n_estimators": 300, "max_depth": None, "n_jobs": -1},
        "MLP": {"hidden_layer_sizes": (128, 64), "max_iter": 200},
    },
    random_state=42,
    verbose=True
)

print(bias_df.head())
fig.show()
