In [71]:
import sklearn.metrics
import numpy as np
from sklearn.datasets import load_breast_cancer
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import cross_validate
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
import matplotlib.pyplot as plt
import math

%matplotlib inline

In [72]:
data = load_breast_cancer()
X = data.data
y = data.target

In [73]:
def recall_cond(y_true, y_pred, inp = None, alpha = None):
    classes = []
    
    if inp == None: inp = len(y_pred)
    if alpha == None: alpha = len(y_pred)
        
    if alpha / inp < 1: 
        threshold = math.ceil((alpha / inp) * len(y_true))
    else: threshold = len(y_pred)
    
    if np.shape(sum(y_pred)) == () : 
        pred = y_pred
    else: 
        classes = np.shape(sum(y_pred))[0] - 1
        pred = y_pred[:, classes]
    
    ind = pred.argsort()[-threshold:][::-1]
    mask = np.array([(i in ind) for i in range(len(pred))])
    pred[mask] = 1
    pred[~mask] = 0
    
    tp = sum(pred[y_true == 1])
    return(tp / sum(y_true))

In [74]:
def precision_cond(y_true, y_pred, recall = 0.99):
    classes = []
    
    if recall > 1: recall = 1
    elif recall < 0: recall = 0
        
    if np.shape(sum(y_pred)) == () : 
        pred = y_pred
    else: 
        classes = np.shape(sum(y_pred))[0] - 1
        pred = y_pred[:, classes]

    return(recall * sum(y_true) / sum(pred))

In [76]:
lr = LogisticRegression(random_state = 13)
params = {'C': np.arange(0.5, 10, 0.5), 'penalty':['l1','l2']}
gscv = GridSearchCV(lr, params, cv = 3)

cv_recall = cross_val_score(gscv, X, y, cv = 3, scoring = sklearn.metrics.make_scorer(recall_cond, needs_proba = True, alpha=120))
cv_precision = cross_val_score(gscv, X, y, cv = 3, scoring = sklearn.metrics.make_scorer(precision_cond, recall=0.99))
cv_accuracy = cross_val_score(gscv, X, y, cv = 3, scoring = 'accuracy')

print(cv_recall, cv_precision, cv_accuracy)
print(np.mean(cv_recall), np.mean(cv_precision), np.mean(cv_accuracy))

[ 0.95798319  0.98319328  0.98319328] [ 0.96565574  0.95008065  1.02443478] [ 0.94210526  0.96315789  0.94708995]
0.974789915966 0.980057055158 0.950784368328


In [79]:
gscv.fit(X, y)
gscv.best_estimator_

LogisticRegression(C=9.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,
          penalty='l2', random_state=13, solver='liblinear', tol=0.0001,
          verbose=0, warm_start=False)