In [58]:
import numpy as np
import pandas as pd
from sklearn.decomposition import PCA
from sklearn.linear_model import LogisticRegression



# Metrics

In [None]:
def classification_metrics(y_true, y_pred):
    """
    computes conf matrix + acc, prec, rec, and f1
    
    """
    y_true = np.asarray(y_true)
    y_pred = np.asarray(y_pred)

    # conf matrix
    tp = np.sum((y_true==1) & (y_pred==1))
    tn = np.sum((y_true==0) & (y_pred==0))
    fp = np.sum((y_true==0) & (y_pred==1))
    fn = np.sum((y_true==1) & (y_pred==0))

    acc  = (tp + tn) / max((tp + tn + fp + fn), 1)
    prec = tp / max((tp + fp), 1)
    rec  = tp / max((tp + fn), 1)
    f1   = (2*prec*rec / max((prec+rec), 1e-12)) if (prec+rec)>0 else 0.0

    # Specificity (True Negative Rate)
    spec = tn / max((tn + fp), 1)

    # Balanced accuracy
    bal_acc = 0.5 * (rec + spec)

    metrics = {
        "n": len(y_true),
        "tp": tp, "tn": tn, "fp": fp, "fn": fn,
        "acc": acc, "bal_acc": bal_acc, "prec": prec, "rec": rec, "spec": spec,
        "f1": f1
    }
    return metrics


def roc_auc_from_probs(y_true, y_prob):
    
    desc_sort_indices = np.argsort(-y_prob)
    y_true = np.array(y_true)[desc_sort_indices]
    y_prob = np.array(y_prob)[desc_sort_indices]
    pos = np.sum(y_true == 1)
    neg = np.sum(y_true == 0)

    # running totals for TPR/FPR
    tpr = [0.0]
    fpr = [0.0]
    tp = fp = 0
    for i in range(len(y_true)):
        if y_true[i] == 1:
            tp += 1
        else:
            fp += 1
        tpr.append(tp / pos)
        fpr.append(fp / neg)

    # get auc
    auc = np.trapz(tpr, fpr)
    return auc


def pr_auc_from_probs(y_true, y_prob):
    # Sort by predicted probability descending
    desc_sort_indices = np.argsort(-y_prob)
    y_true = np.array(y_true)[desc_sort_indices]
    y_prob = np.array(y_prob)[desc_sort_indices]
    
    tp = 0
    fp = 0
    pos = np.sum(y_true == 1)
    
    precision = [1.0]  # starts at 1 when recall=0
    recall = [0.0]
    
    for i in range(len(y_true)):
        if y_true[i] == 1:
            tp += 1
        else:
            fp += 1
        prec = tp / (tp + fp)
        rec = tp / pos
        precision.append(prec)
        recall.append(rec)
    
    # ensure it ends at recall=1
    precision = np.array(precision)
    recall = np.array(recall)
    
    # integrate area under curve
    auc_pr = np.trapz(precision, recall)
    return auc_pr

# LR Cross Validation

In [60]:
def cv_lr(data, feature_cols, target_col, threshold = 0.5, params = None):
    if params == None:
        params = {}
    
    fold_metrics = []
    all_preds = []
    for f in data.fold.unique():

        #split data into train and test splits based on folds
        train = data[data.fold != f]
        test = data[data.fold == f]
        X_train, y_train = train[feature_cols], train[target_col]
        X_test, y_test = test[feature_cols], test[target_col]

        pca = PCA(n_components = 0.95)
        pca.fit(X_train)

        #Create the logistic regression model with equal class weights
        lr_model = LogisticRegression(class_weight="balanced", 
                                      max_iter=500
                                      )
        
        lr_model.fit(X_train, y_train)
        y_prob = lr_model.predict_proba(X_test)[:, 1]
        y_train_prob = lr_model.predict_proba(X_train)[:, 1]

        #Set the default threshold to 0.5, will below
        y_pred = (y_prob > threshold).astype(int)

        metrics = classification_metrics(y_test, y_pred)
        metrics['roc_auc'] = roc_auc_from_probs(y_test, y_prob)
        metrics['train_roc_auc'] = roc_auc_from_probs(y_train, y_train_prob)
        metrics['fold'] = int(f)
        metrics['threshold'] = threshold
        fold_metrics.append(metrics)

        #Save the probabilities to later tune the threshold
        fold_preds = pd.DataFrame({
            'fold': f,
            'y_true': y_test.values,
            'y_prob': y_prob
        })
        all_preds.append(fold_preds)


    results_df = pd.DataFrame(fold_metrics).sort_values("fold").reset_index(drop=True)
    preds_df = pd.concat(all_preds, ignore_index=True)

    return results_df, preds_df

        


# Import Data

In [61]:
#Stratified data set
apps_cv_strat = pd.read_csv("data/apps_cv_strat.txt")
apps_holdout_strat = pd.read_csv("data/apps_holdout_strat.txt")

#Random data set
apps_cv_rand = pd.read_csv("data/apps_cv_random.txt")
apps_holdout_rand = pd.read_csv("data/apps_holdout_random.txt")

#Multi-stratified data set
apps_cv_multi = pd.read_csv("data/apps_cv_multi.txt")
apps_holdout_multi = pd.read_csv("data/apps_holdout_multi.txt")

target_col = 'TARGET'
feature_cols_strat = [col for col in apps_cv_strat.columns if col not in
                [target_col, 'SK_ID_CURR', 'fold', 'neighbors_target_mean_500', 'AGE_INT', 'CODE_GENDER_M',
                 'CODE_GENDER_XNA', 'DAYS_BIRTH',
                 'NAME_FAMILY_STATUS_Previously Married', 'NAME_FAMILY_STATUS_Single']]

feature_cols_rand = [col for col in apps_cv_rand.columns if col not in
                [target_col, 'SK_ID_CURR', 'fold', 'neighbors_target_mean_500', 'AGE_INT', 'CODE_GENDER_M',
                 'CODE_GENDER_XNA', 'DAYS_BIRTH',
                 'NAME_FAMILY_STATUS_Previously Married', 'NAME_FAMILY_STATUS_Single']]


# Stratified Results

In [62]:
strat_results, strat_preds = cv_lr(apps_cv_strat, feature_cols_strat, target_col, params = None)
strat_results

STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT

Increase the number of iterations to improve the convergence (max_iter=500).
You might also want to scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
  auc = np.trapz(tpr, fpr)
STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT

Increase the number of iterations to improve the convergence (max_iter=500).
You might also want to scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
  auc = np.trapz(tpr, fpr)
STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT

Increase the number of iterations to improve the 

Unnamed: 0,n,tp,tn,fp,fn,acc,bal_acc,prec,rec,spec,f1,roc_auc,train_roc_auc,fold,threshold
0,49156,2111,29749,15438,1858,0.648141,0.595113,0.120292,0.531872,0.658353,0.196208,0.631752,0.634319,1,0.5
1,49156,2157,29403,15784,1812,0.642038,0.597079,0.120227,0.543462,0.650696,0.196896,0.633031,0.633121,2,0.5
2,49156,2098,29639,15548,1871,0.645638,0.592258,0.118894,0.528597,0.655919,0.194124,0.622576,0.634662,3,0.5
3,49155,2113,29459,15727,1856,0.642295,0.592163,0.118442,0.532376,0.65195,0.193773,0.633369,0.632836,4,0.5
4,49154,2149,30404,14782,1819,0.662266,0.607223,0.126927,0.541583,0.672863,0.205656,0.643903,0.633555,5,0.5


# Random Results

In [63]:
rand_results, rand_preds = cv_lr(apps_cv_rand, feature_cols_rand, target_col, params = None)
rand_results

STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT

Increase the number of iterations to improve the convergence (max_iter=500).
You might also want to scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
  auc = np.trapz(tpr, fpr)
STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT

Increase the number of iterations to improve the convergence (max_iter=500).
You might also want to scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
  auc = np.trapz(tpr, fpr)
STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT

Increase the number of iterations to improve the 

Unnamed: 0,n,tp,tn,fp,fn,acc,bal_acc,prec,rec,spec,f1,roc_auc,train_roc_auc,fold,threshold
0,49156,2172,28995,16183,1806,0.634043,0.593899,0.118333,0.546003,0.641795,0.19451,0.628376,0.635106,1,0.5
1,49155,2240,28565,16650,1700,0.626691,0.600144,0.118581,0.568528,0.631759,0.196233,0.635423,0.631582,2,0.5
2,49155,2055,29768,15504,1828,0.647401,0.593383,0.117034,0.52923,0.657537,0.19168,0.634575,0.634603,3,0.5
3,49155,2239,28838,16360,1718,0.632225,0.601935,0.120383,0.565833,0.638037,0.198528,0.638369,0.633428,4,0.5
4,49155,2227,29171,15964,1793,0.638755,0.600143,0.122423,0.55398,0.646306,0.200531,0.632413,0.636317,5,0.5


# Multi-Stratified Results

In [64]:
multi_results, multi_preds = cv_lr(apps_cv_multi, feature_cols_strat, target_col, params = None)
multi_results

STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT

Increase the number of iterations to improve the convergence (max_iter=500).
You might also want to scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
  auc = np.trapz(tpr, fpr)
STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT

Increase the number of iterations to improve the convergence (max_iter=500).
You might also want to scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
  auc = np.trapz(tpr, fpr)
STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT

Increase the number of iterations to improve the 

Unnamed: 0,n,tp,tn,fp,fn,acc,bal_acc,prec,rec,spec,f1,roc_auc,train_roc_auc,fold,threshold
0,49158,2247,28883,16305,1723,0.633264,0.602585,0.121119,0.565995,0.639174,0.199538,0.635863,0.634664,1,0.5
1,49157,2188,28903,16284,1782,0.632484,0.595382,0.11845,0.551134,0.639631,0.194992,0.629954,0.634339,2,0.5
2,49155,2158,29608,15579,1810,0.646241,0.599542,0.121667,0.543851,0.655233,0.198848,0.637161,0.63714,3,0.5
3,49154,2067,30256,14930,1901,0.657586,0.595253,0.12161,0.520917,0.669588,0.197186,0.632107,0.636869,4,0.5
4,49154,2200,29689,15497,1768,0.648757,0.605738,0.124315,0.554435,0.65704,0.203093,0.642218,0.63456,5,0.5


# Comparison Between Results

In [65]:
print(f"Stratified Avg ROC-AUC: {strat_results['roc_auc'].mean():.4f}")
print(f"Random Avg ROC-AUC: {rand_results['roc_auc'].mean():.4f}")
print(f"Multi-Stratified Avg ROC-AUC: {multi_results['roc_auc'].mean():.4f}")

Stratified Avg ROC-AUC: 0.6329
Random Avg ROC-AUC: 0.6338
Multi-Stratified Avg ROC-AUC: 0.6355


# Tune Threshold
Since the highest ROC-AUC was with the multi-stratified set, we will tune the threshold on that set.

In [66]:
thresholds = np.linspace(0, 1, 200)
scores = []
for t in thresholds:
    y_pred = (multi_preds["y_prob"] >= t).astype(int)
    tp = ((y_pred == 1) & (multi_preds["y_true"] == 1)).sum()
    fp = ((y_pred == 1) & (multi_preds["y_true"] == 0)).sum()
    tn = ((y_pred == 0) & (multi_preds["y_true"] == 0)).sum()
    fn = ((y_pred == 0) & (multi_preds["y_true"] == 1)).sum()

    prec = tp / max(tp + fp, 1)
    rec  = tp / max(tp + fn, 1)
    f1   = 2 * prec * rec / max(prec + rec, 1e-12)
    spec = tn / max(tn + fp, 1)
    bal_acc = (rec + spec) / 2

    scores.append((t, prec, rec, f1, bal_acc))

#Sort by F1-Score to determine the best threshold
scores_df = pd.DataFrame(scores, columns=["threshold", "precision", "recall", "f1", "bal_acc"]).sort_values("f1", ascending = False).reset_index(drop=True)
best_threshold = scores_df.loc[scores_df["f1"].idxmax(), "threshold"]
scores_df

Unnamed: 0,threshold,precision,recall,f1,bal_acc
0,0.522613,0.128315,0.445676,0.199261,0.589879
1,0.512563,0.124902,0.489770,0.199044,0.594191
2,0.507538,0.123465,0.513102,0.199036,0.596577
3,0.527638,0.129962,0.423856,0.198929,0.587316
4,0.517588,0.126332,0.466690,0.198838,0.591609
...,...,...,...,...,...
195,0.979899,0.371429,0.000655,0.001308,0.500279
196,0.989950,0.407407,0.000554,0.001107,0.500242
197,0.984925,0.366667,0.000554,0.001107,0.500235
198,0.994975,0.400000,0.000504,0.001007,0.500219


# LR With Penalty

In [None]:
def cv_lr_penalty(data, feature_cols, target_col, threshold, penalty, params = None):
    if params == None:
        params = {}
    
    fold_metrics = []
    for f in data.fold.unique():

        #split data into train and test splits based on folds
        train = data[data.fold != f]
        test = data[data.fold == f]
        X_train, y_train = train[feature_cols], train[target_col]
        X_test, y_test = test[feature_cols], test[target_col]

        pca = PCA(n_components = 0.95)
        pca.fit(X_train)

        lr_model = LogisticRegression(penalty="l2", 
                                      C=penalty,
                                      class_weight="balanced", 
                                      max_iter=500
                                      )
        
        lr_model.fit(X_train, y_train)
        y_prob = lr_model.predict_proba(X_test)[:, 1]
        y_train_prob = lr_model.predict_proba(X_train)[:, 1]

        y_pred = (y_prob > threshold).astype(int)

        metrics = classification_metrics(y_test, y_pred)
        metrics['roc_auc'] = roc_auc_from_probs(y_test, y_prob)
        metrics['train_roc_auc'] = roc_auc_from_probs(y_train, y_train_prob)
        metrics['fold'] = int(f)

        fold_metrics.append(metrics)

    return pd.DataFrame(fold_metrics).sort_values("fold").reset_index(drop=True)

        


# Hyperparameter Tuning

In [73]:
lambda_values = [0.001, 0.01, 0.1, 1, 10, 100, 1000]

lam_results = []

for lam in lambda_values:
    results = cv_lr_penalty(apps_cv_multi, feature_cols_strat, target_col, threshold = best_threshold, penalty = lam)
    avg_roc_auc = results['roc_auc'].mean()

    lam_results.append({
        "Lambda_Value": lam,
        "avg_roc_auc": avg_roc_auc
    })

results_df = pd.DataFrame(lam_results).sort_values("avg_roc_auc", ascending = False).reset_index(drop=True)
best_lambda = results_df.loc[results_df["avg_roc_auc"].idxmax(), "Lambda_Value"]
results_df






STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT

Increase the number of iterations to improve the convergence (max_iter=500).
You might also want to scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
  auc = np.trapz(tpr, fpr)
STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT

Increase the number of iterations to improve the convergence (max_iter=500).
You might also want to scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
  auc = np.trapz(tpr, fpr)
STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT

Increase the number of iterations to improve the 

Unnamed: 0,Lambda_Value,avg_roc_auc
0,0.1,0.635764
1,1000.0,0.63566
2,100.0,0.63564
3,10.0,0.635593
4,1.0,0.63546
5,0.01,0.635188
6,0.001,0.634078


# Holdout Evaluation

In [None]:
X_train, y_train = apps_cv_multi[feature_cols_strat], apps_cv_multi[target_col]
X_test, y_test = apps_holdout_multi[feature_cols_strat], apps_holdout_multi[target_col]

pca = PCA(n_components = 0.95)
pca.fit(X_train)

lr_model = LogisticRegression(penalty="l2", 
                                C=0.1,
                                class_weight="balanced", 
                                max_iter=500
                                )
        
lr_model.fit(X_train, y_train)
y_prob = lr_model.predict_proba(X_test)[:, 1]
y_train_prob = lr_model.predict_proba(X_train)[:, 1]

y_pred = (y_prob > best_threshold).astype(int)

metrics = classification_metrics(y_test, y_pred)
metrics['roc_auc'] = roc_auc_from_probs(y_test, y_prob)
metrics['train_roc_auc'] = roc_auc_from_probs(y_train, y_train_prob)
metrics['pr_roc_auc'] = pr_auc_from_probs(y_test, y_prob)
 
preds = pd.DataFrame({
            'y_true': y_test.values,
            'y_prob': y_prob
        })

metrics



    

STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT

Increase the number of iterations to improve the convergence (max_iter=500).
You might also want to scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
  auc = np.trapz(tpr, fpr)


{'n': 61443,
 'tp': np.int64(2325),
 'tn': np.int64(40427),
 'fp': np.int64(16056),
 'fn': np.int64(2635),
 'acc': np.float64(0.6957993587552691),
 'bal_acc': np.float64(0.592243739266682),
 'prec': np.float64(0.12648930961318752),
 'rec': np.float64(0.46875),
 'spec': np.float64(0.715737478533364),
 'f1': np.float64(0.19922025620153377),
 'roc_auc': np.float64(0.631595047439338),
 'train_roc_auc': np.float64(0.6298182307772959)}

# Data Leakage Check

In [76]:
shuffled = apps_cv_strat.copy()
shuffled['TARGET'] = np.random.permutation(shuffled['TARGET'].values)
fold_results_shuffled = cv_lr_penalty(shuffled, feature_cols_strat, target_col, threshold = best_threshold, penalty = best_lambda)
print("Shuffled mean AUC:", fold_results_shuffled.roc_auc.mean())

STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT

Increase the number of iterations to improve the convergence (max_iter=500).
You might also want to scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
  auc = np.trapz(tpr, fpr)
STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT

Increase the number of iterations to improve the convergence (max_iter=500).
You might also want to scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
  auc = np.trapz(tpr, fpr)
STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT

Increase the number of iterations to improve the 

Shuffled mean AUC: 0.49946210883976283


# Tuning Threshold Part 2

In [None]:
def thresh_pair_metrics(y_true, y_prob, t_low, t_high):

    # make decisions (0=Approve, Deny=1, Review=2)
    decision = np.where(y_prob <= t_low, 0, np.where(y_prob >= t_high, 1, -1)) 

    # compute some metrics for each class
    metrics = {}
    for name, k in [("approve",0), ("review",-1), ("deny",1)]:

        # how many applicants in this group (and %)
        idx = (decision==k)
        n = idx.sum() 
        metrics[f"{name}_n"] = n
        metrics[f"{name}_rate"] = n / len(y_true)

        # default rate in this group
        metrics[f"{name}_default_rate"] = y_true[idx].mean() if n else np.nan

    # how often approved loans did not default (precision, high=good)
    metrics["approve_nondefault_precision"] = 1 - metrics["approve_default_rate"]  

    # how often denied loans did default (precision, high=good)
    metrics["deny_default_precision"] = metrics["deny_default_rate"]    

    # recall of defaults caught by deny
    pos = (y_true==1)
    metrics["deny_default_recall"] = ((decision==1) & pos).sum() / max(pos.sum(),1)

    return metrics

In [99]:
def search_thresholds(y_true, y_prob, target_approve_prec=0.95, target_deny_prec=0.45, target_review_rate=0.25):

    # loop through possible threshold pairs
    rows = []

    for t_low in np.linspace(0.05, 0.40, 71):      # approve threshold grid
        for t_high in np.linspace(0.50, 0.90, 81):  # deny threshold grid
            if t_low >= t_high: 
                continue

            # get metrics for this threshold pair
            metrics = thresh_pair_metrics(y_true, y_prob, t_low, t_high)

            # check if meets each target constraint
            approve_ok = (metrics["approve_nondefault_precision"] >= target_approve_prec)
            deny_ok = (metrics["deny_default_precision"] >= target_deny_prec)
            review_ok = (metrics["review_rate"] <= target_review_rate)

            # if all are good, compute a overall score of how good it did (kind of arbitrary)
            ## weight good denials heaviest because want to avoid approving bad loans
            if approve_ok and deny_ok and review_ok:
                rows.append({"t_low":t_low, "t_high":t_high, **metrics})
                
    return pd.DataFrame(rows)

In [None]:
thresh_pair_results = search_thresholds(preds.y_true,
                                        preds.y_prob,
                                        target_approve_prec = 0.95,
                                        target_deny_prec = 0.5,
                                        target_review_rate = 0.22)

print(thresh_pair_results)


    t_low  t_high  approve_n  approve_rate  approve_default_rate  review_n  \
0   0.350   0.500       7289      0.118630              0.036356     30297   
1   0.355   0.500       7813      0.127159              0.036094     29773   
2   0.360   0.500       8384      0.136452              0.038287     29202   
3   0.360   0.505       8384      0.136452              0.038287     30458   
4   0.365   0.500       8991      0.146331              0.039818     28595   
5   0.365   0.505       8991      0.146331              0.039818     29851   
6   0.370   0.500       9638      0.156861              0.040672     27948   
7   0.370   0.505       9638      0.156861              0.040672     29204   
8   0.370   0.510       9638      0.156861              0.040672     30385   
9   0.375   0.500      10359      0.168595              0.042089     27227   
10  0.375   0.505      10359      0.168595              0.042089     28483   
11  0.375   0.510      10359      0.168595              0.042089

In [None]:
apps_holdout_multi = apps_holdout_multi.iloc[:len(y_prob)].copy()
apps_holdout_multi['y_prob'] = y_prob

thresh_pair_metrics(apps_holdout_multi.TARGET, apps_holdout_multi.y_prob, 0.11, 0.57)