In [17]:
#Import statements
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn import tree
from sklearn.metrics import confusion_matrix
from itertools import product

In [21]:
def thresholdTuner(clf, sens_target, X, y, display = False):
    sensArr = []
    specArr = []
    thresholds = []
    conf_matrices = []
    for i in range(1,11):
        if display:
            print(f'Run: {i} / 10', end='\r')
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=i, stratify = y)
        clf.fit(X_train, y_train)
        threshold = 0.00
        sensitivity = 0
        specificity = 0
        while((sensitivity < (sens_target - 0.003) or sensitivity > (sens_target + 0.003)) and threshold < 0.5):
            threshold = threshold + 0.01
            clf_pred = (clf.predict_proba(X_test) >= threshold)
            clf_pred = [item[1] for item in clf_pred]
            for i in range(len(clf_pred)):
                if clf_pred[i] == False:
                    clf_pred[i] = 'F'
                else:
                    clf_pred[i] = 'T'   
            tp, fp, fn, tn = confusion_matrix(y_test, clf_pred).ravel()
            conf_matrix = [tp, fp, fn, tn]
            if (tp + fn) != 0:
                sensitivity = tp/(tp+fn)
            if (tn + fp) != 0:
                specificity = tn/(tn+fp)
        if sensitivity > (sens_target - 0.01):
            if sensitivity < (sens_target + 0.01):
                sensArr.append(sensitivity)
                specArr.append(specificity)
                thresholds.append(threshold)
                conf_matrices.append(conf_matrix)
    sensArr = np.array(sensArr)
    specArr = np.array(specArr)
    thresholds = np.array(thresholds)
    return sensArr, specArr, thresholds, conf_matrices

In [26]:
def gridSearchCustom(param_grid, X, y, display = False):
    best_specificity = 0
    param_grid_list = [dict(zip(param_grid, v)) for v in product(*param_grid.values())]
    for params in param_grid_list:
        if display:
            print(params)
        clf = tree.DecisionTreeClassifier(random_state = 42, **params)
        sensArr, specArr, thresholds, conf_matrices = thresholdTuner(clf, 0.95, display)
        if len(specArr) == 0:
            specAvg = 0
        else:
            specAvg = specArr.sum(axis=0) / len(specArr)
        if specAvg > best_specificity:
            best_specificity = specAvg
            best_clf = clf
            best_params = params
            best_sensArr = sensArr
            best_specArr = specArr
            best_thresholds = thresholds
            best_conf_matrices = conf_matrices
        if display:
            print(f'Specificity: {round(specAvg, 4)}\n')
    return best_clf, best_params, best_sensArr, best_specArr, best_thresholds, best_conf_matrices