# Label-Imbalanced Dataset

In [None]:
class MyDummyClassifier(BaseEstimator):
    def fit(self, X, y = None):
        pass
    def predict(self, X):
        pred = np.zeros((X.shape[0], 1))
        for i in range(X.shape[0]):
            if X["Sex"].iloc[i] == 1:
                pred[i] = 0
            else:
                pred[i] = 1
        return pred
    
class MyFakeClassifier(BaseEstimator):
    def fit(self, X, y):
        pass
    def predict(self, X):
        return np.zeros((len(X), 1), dtype = bool)
digits = load_digits()
y = (digits.target == 7).astype(int)
X_train, X_test, y_train, y_test = train_test_split(digits.data, y, random_state = 42)
fake = MyFakeClassifier()
fake.fit(X_train, y_train)
fake.predict(X_test)

# Recall and Precision

In [None]:
X = [[1, -1, 2],
    [2, 0, 0],
    [0, 1.1, 1.2]]
binarizer = Binarizer(threshold = 1.1)

custom_threshold = 0.5
pred_proba_1 = pred_proba[:, 1].reshape(-1, 1)
binarizer = Binarizer(threshold = custom_threshold).fit(pred_proba_1)
custom_predict = binarizer.transfrom9pred_proba_1

custom_threshold = 0.4
pred_proba_2 = pred_proba[:, 1].reshape(-1, 1)
binarizer = Binarizer(threshold = custom_threshold).fit(pred_proba_2)
custom_predict = binarizer.transfrom9pred_proba_2

# Controlling Threshold

In [None]:
thresholds = [0.4, 0.45, 0.50, 0.55, 0.60]

def get_eval_by_threshold(y_test, pred_proba_c1, thresholds):
    for custom_threshold in thresholds:
        binarizer = Binarizer(threshold = custom_threshold).fit(pred_proba_c1)
        custom_predict = binarizer.transform(pred_proba_c1)
        get_clf_eval(y_test, custom_predict)
    
def precision_recall_curve_plot(y_test, pred_proba_c1):
    precisions, recalls, thresholds = precision_revall_curve(y_test, pred_proba_c1)
    plt.figure(figsize = (8, 8))
    plt.plot(thresholds, precisions[0:threshold_boundary], linestyle = "--", label = "precision")
    plt.plot(thresholds, recalls[0:threshold_boundary], label = "recall")
    
    strt, end = plt.xlim()
    plt.xticks(np.round(np.arange(strt, end, 0.1), 2))
    
    plt.xlabel("Threshold Value"); plt.ylabel("Precision and Recall Curve")
    plt.legend(); plt.grid()
    plt.show()

# ROC and AUC Graph

In [None]:
def roc_curve_plot(y_test, pred_proba_c1):
    fprs, tprs, thresholds = roc_curve(y_test, pred_proba_c1)
    
    plt.plot(fprs, tprs, label = "ROC")
    plt.plot([0, 1], [0, 1], "k--", label = "Random")
    
    strt, end = plt.xlim()
    plt.xticks(np.round(np.arange(strt, end, 0.1), 2))
    plt.xlim(0, 1); plt.ylim(0, 1)
    plt.xlabel("FPR(1 - Sensitivity)"); plt.ylabel("RPT(Recall)")
    plt.legend()
    plt.show()

# Pima Indian Example

In [None]:
X = diabetes_data.iloc[:, :-1]
y = diabetes_data.iloc[:, -1]

X_train, X_test, y_trian, y_test = train_test_split(X, y, test_size = 0.2, random_state = 42, stratify = y)

lr_clf = LogisticRegression()
lr_clf.fit(X_train, y_train)
pred = lr_clf.predict(X_test)
pred_proba = lr_clf.predict_proba(X_test)[:, 1]

thresholds = [0.3, 0.33, 0.36, 0.39, 0.42, 0.45, 0.48, 0.5]
get_eval_by_threshold(y_test, pred_proba[:, 1].reshape(-1, 1), thresholds)

binarizer = Binarizer(threshold = thresholds[-2])
pred_th_048 = binarizer.fit_transform(pred_proba[:, 1].reshape(-1, 1))
get_clf_eval(y_test, pred_th_048, pred_proba[:, 1])