In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
from sklearn.datasets import make_classification
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split

In [None]:
import mmu
from mmu.viz.utils import _set_plot_style
from mmu.lib._mmu_core import pr_multn_sim_curve_error_mt

In [None]:
%matplotlib inline

### Generate data

Here we use scikit's make_classifaction to generate a toy data set that we fit with Logistic regression.
We use 500 points for the train set and 500 for the test set.

In [None]:
# generate seeds to be used by sklearn
# do not use this in real scenarios,
# it's a convenience only used in the tutorial notebooks
seeds = mmu.commons.utils.SeedGenerator(234)

# generate 2 class dataset
X, y = make_classification(
    n_samples=1000, n_classes=2, random_state=seeds()
)

# split into train/test sets
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.5, random_state=seeds()
)
# fit a model
model = LogisticRegression(solver='lbfgs')
model.fit(X_train, y_train)

# predict probabilities, for the positive outcome only
y_score = model.predict_proba(X_test)[:, 1]


In [None]:
pr_curve_err = mmu.PrecisionRecallCurveUncertainty.from_scores(y_test, y_score)

In [None]:
%%time

sim_scores = pr_multn_sim_curve_error_mt(
    n_sims=10000,
    n_conf_mats=pr_curve_err.n_conf_mats,
    precs_grid=pr_curve_err.prec_grid,
    recs_grid=pr_curve_err.rec_grid,
    conf_mats=pr_curve_err.conf_mats,
    n_sigmas=pr_curve_err.n_sigmas,
    epsilon=pr_curve_err.epsilon,
    seed=897345926,
    n_threads=15
)

In [None]:
np.save('sim_scores', sim_scores)