In [None]:
import time
start_time = time.time()
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import sklearn
from sklearn.model_selection import train_test_split
import tensorflow as tf
import warnings
warnings.filterwarnings('ignore') 

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Dropout
from tensorflow.keras.metrics import Recall
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
from sklearn.metrics import roc_auc_score
from sklearn.pipeline import Pipeline
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import RepeatedStratifiedKFold

In [None]:
# set appropriate style for all plots
sns.set(style='whitegrid')

In [None]:
# read the dataset from a csv file into a pandas dataframe
breast_cancer = pd.read_csv('wbcd.csv')

In [None]:
# look at the first 5 rows
breast_cancer.head(5)

In [None]:
cols = breast_cancer.columns

In [None]:
# Drop the id column as it serves no purposes with respect to classification
breast_cancer.drop([cols[-1], cols[0]], inplace=True, axis=1)

In [None]:
cols = cols[1:-1]

In [None]:
breast_cancer.head(5)

According to the aggregate below, there are 357 benign records and 212 malignant records.

In [None]:
target_values = list(breast_cancer['diagnosis'].value_counts().keys())
breast_cancer['diagnosis'].value_counts()

In [None]:
# Target variable must be categorical with only 2 possible values: 'M' and 'B'
if (len(target_values)==2 and ('B' in target_values) and ('M' in target_values)):
    print('Target Variable is in the Correct Format.')
else:
    print('Error! Please Verify The Dataset.')

In [None]:
data = pd.DataFrame(breast_cancer['diagnosis'].value_counts())

In [None]:
breast_cancer.shape

In [None]:
cols[1:]

In [None]:
breast_cancer.info()

In [None]:
breast_cancer['area_se'].mean()

In [None]:
for i in cols[1:]:
    if breast_cancer[i].dtype != np.float64:
        print('Error! Please Verify The Dataset.')
else:
    print('All Features are in the Correct Format')

In [None]:
for k in cols[1:]:
    if breast_cancer[k].min() < 0:
        print('Error! Please Verify The Dataset.')
else:
    print('All Features are in the Correct Format')

In [None]:
features = breast_cancer[cols[1:]]
features.head(2)

In [None]:
target = breast_cancer['diagnosis']
target[:5]

In [None]:
breast_cancer.describe().transpose()

In [None]:
fig = plt.figure(figsize=(12,15))

for i,j in enumerate(breast_cancer.columns[1:11], start=1):
    ax = fig.add_subplot(5,2,i)
    sns.histplot(x=j, data=breast_cancer, hue='diagnosis', kde=True, ax=ax)

plt.tight_layout()
plt.show()  

In [None]:
fig = plt.figure(figsize=(12,15))
benign = breast_cancer[target==0]
malignant = breast_cancer[target==1]

for i,j in enumerate(breast_cancer.columns[1:11], start=1):
    ax = fig.add_subplot(5,2,i)
    sns.boxplot(x=j, y='diagnosis', data=breast_cancer, ax=ax)

plt.tight_layout()
plt.show()  

In [None]:
sns.countplot(x='diagnosis', data=breast_cancer, alpha=0.75)

In [None]:
le = LabelEncoder()
breast_cancer['diagnosis'] = le.fit_transform(breast_cancer['diagnosis'])
target = le.fit_transform(target)
target[:5]

In [None]:
breast_corr = breast_cancer.corr()
mask = np.zeros_like(breast_corr)
mask[np.triu_indices_from(mask)] = True
plt.figure(figsize=(15, 10))
sns.heatmap(breast_corr, mask=mask, cmap="Reds", linewidths=0.25)
plt.tight_layout()

In [None]:
scaler = StandardScaler()
scaled_features = scaler.fit_transform(features)

In [None]:
breast_cancer.isnull().sum() # checking for missing values/null cells

### Initial Data Splitting (80% Train and 20% Test)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(scaled_features, target, test_size=0.20, random_state=23)

### Secondary Data Splitting for Validation Set in NN (60% Train and 20% Validation)

In [None]:
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.25, random_state=23)

### Dummy Classifier Classification Code

In [None]:
from sklearn.dummy import DummyClassifier
# Create A Baseline using a simple rule- 
# Predict values based on distribution of training set.
dummy = DummyClassifier(strategy='stratified')
dummy.fit(X_train, y_train)
y_pred_dc = dummy.predict(y_test)

In [None]:
def perf_scores(y_true, y_hat):
    c_m = confusion_matrix(y_true, y_hat)
    acc_score = accuracy_score(y_true, y_hat)
    roc_score = roc_auc_score(y_true, y_hat)
    tn = c_m[0,0]
    fn = c_m[1,0]
    tp = c_m[1,1]
    fp = c_m[0,1]
    tpr = c_m[1,1]*100/np.sum(c_m[1,:])
    tnr = c_m[0,0]*100/np.sum(c_m[0,:])
    ppv = c_m[1,1]*100/np.sum(c_m[:,1])
    return (acc_score, roc_score, tpr, tnr, ppv)

In [None]:
def conf_mat(y_true, y_hat, model):
    c_m = confusion_matrix(y_true, y_hat)
    print("Performance Metrics for: {:s}".format(model))
    print("Accuracy Score: {:.3f}".format(accuracy_score(y_true, y_hat)))
    print("Receiving Operating Characteristics Score: {:.3f}".format(roc_auc_score(y_true, y_hat)))
    print("True Negatives: {}".format(c_m[0,0]))
    print("False Negatives: {}".format(c_m[1,0]))
    print("True Positives: {}".format(c_m[1,1]))
    print("False Positives: {}".format(c_m[0,1]))
    print("TPR: {:.3f}".format(c_m[1,1]*100/np.sum(c_m[1,:])))
    print("TNR: {:.3f}".format(c_m[0,0]*100/np.sum(c_m[0,:])))
    print("PPV (Precision): {:.3f}".format(c_m[1,1]*100/np.sum(c_m[:,1])))

In [None]:
print('Confusion Matrix for Dummy Classifier:\n')
print(confusion_matrix(y_test, y_pred_dc))

In [None]:
conf_mat(y_test, y_pred_dc, 'Dummy Classifier')

In [None]:
print('Classification Report for Dummy Classifier:\n')
print(classification_report(y_test, y_pred_dc))

### Neural Network Classification Code with TF and Keras

#### First a quick understanding of the TF library

In [None]:
epochs = 25
batch_size = 20
# convert x_test to tensor to pass through model (train data will be converted to
# tensors on the fly)
X_train_tf = X_train.copy()
X_test_tf = tf.Variable(X_test)

In [None]:
def get_batch(x_data, y_data, batch_size):
    idxs = np.random.randint(0, len(y_data), batch_size)
    return x_data[idxs,:], y_data[idxs]

In [None]:
# now declare the weights connecting the input to the hidden layer
W1 = tf.Variable(tf.random.normal([30, 15], stddev=0.03), name='W1')
b1 = tf.Variable(tf.random.normal([15]), name='b1')
# and the weights connecting the hidden layer to the output layer
W2 = tf.Variable(tf.random.normal([15, 2], stddev=0.03), name='W2')
b2 = tf.Variable(tf.random.normal([2]), name='b2')

In [None]:
def nn_model(x_input, W1, b1, W2, b2):
    x_input = tf.reshape(x_input, (x_input.shape[0], -1))
    x = tf.add(tf.matmul(tf.cast(x_input, tf.float32), W1), b1)
    x = tf.nn.relu(x)
    logits = tf.add(tf.matmul(x, W2), b2)
    return logits

In [None]:
def loss_fn(logits, labels):
    cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=labels, logits=logits))
    return cross_entropy

In [None]:
# setup the optimizer
optimizer = tf.keras.optimizers.Adam()

In [None]:
total_batch = int(len(y_train)/batch_size)
for epoch in range(epochs):
    avg_loss = 0
    for i in range(total_batch):
        batch_x, batch_y = get_batch(X_train, y_train, batch_size=batch_size)
        # create tensors
        batch_x = tf.Variable(batch_x)
        batch_y = tf.Variable(batch_y)
        # create a one hot vector
        batch_y = tf.one_hot(batch_y, 2)
        with tf.GradientTape() as tape:
            logits = nn_model(batch_x, W1, b1, W2, b2)
            loss = loss_fn(logits, batch_y)
        gradients = tape.gradient(loss, [W1, b1, W2, b2])
        optimizer.apply_gradients(zip(gradients, [W1, b1, W2, b2]))
        avg_loss += loss/total_batch
    test_logits = nn_model(X_test, W1, b1, W2, b2)
    max_idxs = tf.argmax(test_logits, axis=1)
    test_acc = np.sum(max_idxs.numpy() == y_test)/len(y_test)
    print('Epoch: {:d}, loss= {:.3f}, test set accuracy= {:.3f}'.format((epoch+1), (avg_loss), (test_acc*100)))
print("\nTraining complete!")

#### Implementation of actual NN Architecture with Keras

In [None]:
# Final Architecture Obtained Empirically. Start Point: Research Papers.

tf.keras.backend.set_floatx('float64')
nn_model = Sequential()

nn_model.add(Dense(30, activation='relu'))
nn_model.add(Dropout(0.1))

nn_model.add(Dense(15, activation='relu'))
nn_model.add(Dropout(0.1))

nn_model.add(Dense(units=1, activation='sigmoid'))

In [None]:
nn_model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy', Recall()])

In [None]:
history = nn_model.fit(X_train, y_train, epochs=50, verbose=1, validation_data=(X_val, y_val))

In [None]:
nn_model.summary()

In [None]:
loss_accuracy = history.history

In [None]:
loss_accuracy.keys()

In [None]:
plt.figure(figsize=(12,5))
plt.plot(loss_accuracy['loss'], 'r-', label='Training Loss')
plt.plot(loss_accuracy['val_loss'], 'g-', label='Validation Loss')
plt.ylabel('Training and Validation Loss')
plt.xlabel('Number of Epochs')
plt.title('Examining Loss Data on Neural Net Model')
plt.legend()
plt.tight_layout
plt.show()

In [None]:
print('Validation Accuracy: {:.3f}, Validation Loss: {:.3f}, Validation Recall: {:.3f}'.format(
    loss_accuracy['val_accuracy'][-1], loss_accuracy['val_loss'][-1],\
    loss_accuracy[list(loss_accuracy.keys())[-1]][-1]))

In [None]:
nn_model.evaluate(X_test,  y_test, verbose=1) # Examine testing accuracy and loss

In [None]:
nn_model.evaluate(X_train,  y_train, verbose=1) # Examine tendency to overfit

In [None]:
y_pred_nn = (nn_model.predict(X_test) > 0.5).astype("int32")
print('Confusion Matrix for NN with TF and Keras:\n')
print(confusion_matrix(y_test, y_pred_nn))

In [None]:
conf_mat(y_test, y_pred_nn, 'Neural Nets with TF and Keras')

In [None]:
print('Classification Report for NN with TF and Keras:\n')
print(classification_report(y_test, y_pred_nn))

### Neural Network Classification Code with sklearn MLP Classifier

In [None]:
from sklearn.neural_network import MLPClassifier

In [None]:
nn_mlp = MLPClassifier(max_iter=1000, hidden_layer_sizes= (15,1), 
                       alpha=0.001, activation='relu', solver='adam', random_state=23)
nn_mlp.fit(X_train, y_train)
y_pred_mlp = nn_mlp.predict(X_test)

In [None]:
print(confusion_matrix(y_test, y_pred_mlp))

In [None]:
conf_mat(y_test, y_pred_mlp, 'Neural Nets with MLP')

In [None]:
print('Classification Report for NN with MLP:\n')
print(classification_report(y_test, y_pred_mlp))

### Decision Tree Classification Code

In [None]:
dtc = DecisionTreeClassifier(criterion='entropy', random_state=23)
tree = dtc.fit(X_train, y_train)
y_pred_dtc = dtc.predict(X_test)
tree_plot = sklearn.tree.plot_tree(tree)

In [None]:
from sklearn.tree import export_text
r = export_text(dtc, feature_names=list(features.columns))
print(r)

In [None]:
print('Confusion Matrix for Decision Tree Classifier:\n')
print(confusion_matrix(y_test, y_pred_dtc))

In [None]:
conf_mat(y_test, y_pred_dtc, 'Decision Tree Classifier')

In [None]:
print('Classification Report for Decision Tree Classifier:\n')
print(classification_report(y_test, y_pred_dtc))

### Random Forest Classification Code

In [None]:
rfc = RandomForestClassifier(n_estimators=128, random_state=23)
rfc.fit(X_train, y_train)
y_pred_rfc = rfc.predict(X_test)

In [None]:
print('Confusion Matrix for Random Forest Classifier:\n')
print(confusion_matrix(y_test, y_pred_rfc))

In [None]:
conf_mat(y_test, y_pred_rfc, 'Random Forest Classifier')

In [None]:
print('Classification Report for Random Forest Classifier:\n')
print(classification_report(y_test, y_pred_rfc))

### Recursive Feature Elimination

In [None]:
from sklearn.feature_selection import RFE
from sklearn.svm import SVC
#SVC(kernel='linear')

rfe_features = scaled_features.copy()

# Explore the right number of features to select
def get_models():
    models = dict()
    for i in range(3, 30): # using a range of features found in various WBCD research papers
        rfe = RFE(estimator = DecisionTreeClassifier(), n_features_to_select = i)
        rfe.fit(rfe_features, target)
        model = DecisionTreeClassifier()
        models[str(i)] = Pipeline(steps=[('RFE',rfe),('DTC',model)])
    return models

def evaluate_model(model, X, y):
    cv = RepeatedStratifiedKFold(n_splits=10, n_repeats=10, random_state=0)
    scores = cross_val_score(model, X, y, scoring='accuracy', cv=cv, n_jobs=-1, error_score='raise')
    return scores

models = get_models()

# evaluate the models and store results
results, names = list(), list()
for name, model in models.items():
    scores = evaluate_model(model, rfe_features, target)
    results.append(scores)
    names.append(name)
    print('{} Accuracy: {:.3f} and Standard Deviation: {:.3f}'.format(name, np.mean(scores), np.std(scores)))
    
# plot model performance for comparison
plt.figure(figsize=(12,10))
plt.boxplot(results, labels=names, showmeans=True)
plt.show()



In [None]:
models['27'].fit(rfe_features, target)
pipe_predict = models['27'].predict([rfe_features[0]])
if pipe_predict == [1]:
    print("Prediction is Malignant")
else:
    print("Prediction is Benign")

### Automatic RFE using Decision Trees (RFECV)

In [None]:
from sklearn.feature_selection import RFECV

rfecv = RFECV(estimator=DecisionTreeClassifier())
rfecv.fit(rfe_features, target)
model = DecisionTreeClassifier()
pipeline_dtc = Pipeline(steps=[('RFECV',rfecv),('DTC',model)])

cv = RepeatedStratifiedKFold(n_splits=10, n_repeats=10, random_state=0)
n_scores = cross_val_score(pipeline_dtc, rfe_features, target, scoring='accuracy', cv=cv,\
                           n_jobs=-1, error_score='raise')

print('Accuracy: {:.3f} and Standard Deviation: {:.3f}'.format(np.mean(n_scores), np.std(n_scores)))

In [None]:
pipeline_dtc.fit(rfe_features, target)
pipe_predict = pipeline_dtc.predict([rfe_features[0]])
if pipe_predict == [1]:
    print("Prediction is Malignant")
else:
    print("Prediction is Benign")

In [None]:
# Display Automatically Selected Features
for k in range(len(features.columns)):
    print('{}, Chosen: {}, Rank: {}'.format(features.columns[k], rfecv.support_[k], rfecv.ranking_[k]))

In [None]:
num_rfe_features = len(features.columns[rfecv.support_])
features.columns[rfecv.support_]

### Decision Tree Classification with RFE

In [None]:
X_train_df = pd.DataFrame(X_train, columns=features.columns)
X_train_rfe = np.array(X_train_df[features.columns[rfecv.support_]])
X_val_df = pd.DataFrame(X_val, columns=features.columns)
X_val_rfe = np.array(X_val_df[features.columns[rfecv.support_]])
X_test_df = pd.DataFrame(X_test, columns=features.columns)
X_test_rfe = np.array(X_test_df[features.columns[rfecv.support_]])

dtc_rfe = DecisionTreeClassifier(criterion='entropy', random_state=23)
tree_rfe = dtc_rfe.fit(X_train_rfe, y_train)
y_pred_dtc_rfe = dtc_rfe.predict(X_test)

In [None]:
from sklearn.tree import export_text
r = export_text(dtc_rfe, feature_names=list(features.columns[rfecv.support_]))
print(r)

In [None]:
print('Confusion Matrix for Decision Tree Classifier with RFE:\n')
print(confusion_matrix(y_test, y_pred_dtc_rfe))

In [None]:
conf_mat(y_test, y_pred_dtc_rfe, 'Decision Tree Classifier with RFE')

In [None]:
print('Classification Report for Decision Tree Classifier with RFE:\n')
print(classification_report(y_test, y_pred_dtc_rfe))

### Random Forest Classification with RFE

In [None]:
rfc_rfe = RandomForestClassifier(n_estimators=32, random_state=23)
rfc_rfe.fit(X_train_rfe, y_train)
y_pred_rfc_rfe = rfc_rfe.predict(X_test_rfe)

In [None]:
print('Confusion Matrix for Random Forest Classifier:\n')
print(confusion_matrix(y_test, y_pred_rfc_rfe))

In [None]:
conf_mat(y_test, y_pred_rfc_rfe, 'Random Forest Classifier with RFE')

In [None]:
print('Classification Report for Random Forest Classifier:\n')
print(classification_report(y_test, y_pred_rfc_rfe))

### Neural Networks Classification with RFE

In [None]:
tf.keras.backend.set_floatx('float64')
nn_model_rfe = Sequential()

nn_model_rfe.add(Dense(num_rfe_features, activation='relu'))
nn_model_rfe.add(Dropout(0.1))

nn_model_rfe.add(Dense(2, activation='relu'))
nn_model_rfe.add(Dropout(0.1))

nn_model_rfe.add(Dense(units=1, activation='sigmoid'))

In [None]:
nn_model_rfe.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy', Recall()])

In [None]:
history_rfe = nn_model_rfe.fit(X_train_rfe, y_train, epochs=50, verbose=1, validation_data=(X_val_rfe, y_val))

In [None]:
nn_model_rfe.summary()

In [None]:
loss_accuracy_rfe = history_rfe.history

In [None]:
loss_accuracy_rfe.keys()

In [None]:
plt.figure(figsize=(12,5))
plt.plot(loss_accuracy['loss'], 'r-', label='Training Loss')
plt.plot(loss_accuracy['val_loss'], 'g-', label='Validation Loss')
plt.ylabel('Training and Validation Loss', fontsize=16)
plt.xlabel('Number of Epochs', fontsize=16)
plt.title('Examining Loss Data on Neural Net Model with RFE', fontsize=20)
plt.legend(fontsize=18)
plt.show()

In [None]:
print('Validation Accuracy: {:.3f}, Validation Loss: {:.3f}, Validation Recall: {:.3f}'.format(
    loss_accuracy_rfe['val_accuracy'][-1], loss_accuracy_rfe['val_loss'][-1], \
    loss_accuracy_rfe[list(loss_accuracy_rfe.keys())[-1]][-1]))

In [None]:
nn_model_rfe.evaluate(X_test_rfe,  y_test, verbose=1) # Examine testing accuracy and loss

In [None]:
nn_model_rfe.evaluate(X_train_rfe,  y_train, verbose=1)

In [None]:
y_pred_nn_rfe = (nn_model_rfe.predict(X_test_rfe) > 0.5).astype("int32")
print('Confusion Matrix for NN with TF and RFE:\n')
print(confusion_matrix(y_test, y_pred_nn_rfe))

In [None]:
conf_mat(y_test, y_pred_nn_rfe, 'Neural Nets with TF and RFE')

In [None]:
print('Classification Report for NN with TF and RFE:\n')
print(classification_report(y_test, y_pred_nn_rfe))

In [None]:
(nn_tf_acc, nn_tf_roc, nn_tf_tpr, nn_tf_tnr, nn_tf_ppv) = perf_scores(y_test, y_pred_nn)
(dtc_acc, dtc_roc, dtc_tpr, dtc_tnr, dtc_ppv) = perf_scores(y_test, y_pred_dtc)
(rfc_acc, rfc_roc, rfc_tpr, rfc_tnr, rfc_ppv) = perf_scores(y_test, y_pred_rfc)
(nn_tf_rfe_acc, nn_tf_rfe_roc, nn_tf_rfe_tpr, nn_tf_rfe_tnr, nn_tf_rfe_ppv) = perf_scores(y_test, y_pred_nn_rfe)
(dtc_rfe_acc, dtc_rfe_roc, dtc_rfe_tpr, dtc_rfe_tnr, dtc_rfe_ppv) = perf_scores(y_test, y_pred_dtc_rfe)
(rfc_rfe_acc, rfc_rfe_roc, rfc_rfe_tpr, rfc_rfe_tnr, rfc_rfe_ppv) = perf_scores(y_test, y_pred_rfc_rfe)

In [None]:
model_names = ['Dec_Tree_RFE', 'Rand_Forest_RFE', 'Neural_Nets_RFE', 'Decision_Tree', 'Random_Forest', 'Neural_Nets']
test_acc = np.array([dtc_rfe_acc, rfc_rfe_acc, nn_tf_rfe_acc, dtc_acc, rfc_acc,\
            nn_tf_acc]).reshape(6,1)* 100

model_acc = pd.DataFrame(data=test_acc, index=model_names, 
                    columns=['Testing Accuracies(%)'])

fig = plt.figure(figsize=(10,4))
ax = model_acc.plot(kind='bar', figsize=(16,8), fontsize=16, color='tab:green')
for p in ax.patches:
    ax.annotate("%.2f" % p.get_height(), (p.get_x() + p.get_width() / 2., 
    p.get_height()),ha='center', va='center', rotation=0, xytext=(0, -20), 
    textcoords='offset points', fontsize=20, fontweight='bold', color='k')
#ax.legend(bbox_to_anchor=(1, 1), ncol=1, fontsize=16)
plt.ylabel('Testing Accuracies (%)', fontsize=16)
plt.show()

In [None]:
test_roc = np.array([dtc_rfe_roc, rfc_rfe_roc, nn_tf_rfe_roc, dtc_roc, rfc_roc,\
            nn_tf_roc]).reshape(6,1) * 100

model_roc = pd.DataFrame(data=test_roc, index=model_names, 
                    columns=['Testing ROCs(%)'])

fig = plt.figure(figsize=(10,4))
ax = model_roc.plot(kind='bar', figsize=(16,8), fontsize=16, color='tab:blue')
for p in ax.patches:
    ax.annotate("%.2f" % p.get_height(), (p.get_x() + p.get_width() / 2., 
    p.get_height()),ha='center', va='center', rotation=0, xytext=(0, -20), 
    textcoords='offset points', fontsize=20, fontweight='bold', color='k')
#ax.legend(bbox_to_anchor=(1, 1), ncol=1, fontsize=16)
plt.ylabel('Testing ROCs (%)', fontsize=16)
plt.show()

In [None]:
model_names = ['Dec_Tree_RFE', 'Rand_Forest_RFE', 'Neural_Nets_RFE', 'Decision_Tree', 'Random_Forest', 'Neural_Nets']
test_tpr = np.array([dtc_rfe_tpr, rfc_rfe_tpr, nn_tf_rfe_tpr, dtc_tpr, rfc_tpr,\
            nn_tf_tpr]).reshape(6,1)

model_tpr = pd.DataFrame(data=test_tpr, index=model_names, 
                    columns=['Testing TPR(%)'])

fig = plt.figure(figsize=(10,4))
ax = model_tpr.plot(kind='bar', figsize=(16,8), fontsize=16, color='tab:olive')
for p in ax.patches:
    ax.annotate("%.2f" % p.get_height(), (p.get_x() + p.get_width() / 2., 
    p.get_height()),ha='center', va='center', rotation=0, xytext=(0, -20), 
    textcoords='offset points', fontsize=20, fontweight='bold', color='k')
#ax.legend(bbox_to_anchor=(1, 1), ncol=1, fontsize=16)
plt.ylabel('Testing TPR (%)', fontsize=16)
plt.show()

In [None]:
test_tnr = np.array([dtc_rfe_tnr, rfc_rfe_tnr, nn_tf_rfe_tnr, dtc_tnr, rfc_tnr,\
            nn_tf_tnr]).reshape(6,1)

model_tnr = pd.DataFrame(data=test_tnr, index=model_names, 
                    columns=['Testing TNR(%)'])

fig = plt.figure(figsize=(10,4))
ax = model_tnr.plot(kind='bar', figsize=(16,8), fontsize=16, color='tab:cyan')
for p in ax.patches:
    ax.annotate("%.2f" % p.get_height(), (p.get_x() + p.get_width() / 2., 
    p.get_height()),ha='center', va='center', rotation=0, xytext=(0, -20), 
    textcoords='offset points', fontsize=20, fontweight='bold', color='k')
#ax.legend(bbox_to_anchor=(1, 1), ncol=1, fontsize=16)
plt.ylabel('Testing TNR (%)', fontsize=16)
plt.show()

In [None]:
test_ppv = np.array([dtc_rfe_ppv, rfc_rfe_ppv, nn_tf_rfe_ppv, dtc_ppv, rfc_ppv,\
            nn_tf_ppv]).reshape(6,1)

model_ppv = pd.DataFrame(data=test_ppv, index=model_names, 
                    columns=['Testing PPV(%)'])

fig = plt.figure(figsize=(10,4))
ax = model_ppv.plot(kind='bar', figsize=(16,8), fontsize=16, color='tab:gray')
for p in ax.patches:
    ax.annotate("%.2f" % p.get_height(), (p.get_x() + p.get_width() / 2., 
    p.get_height()),ha='center', va='center', rotation=0, xytext=(0, -20), 
    textcoords='offset points', fontsize=20, fontweight='bold', color='k')
#ax.legend(bbox_to_anchor=(1, 1), ncol=1, fontsize=16)
plt.ylabel('Testing PPV (%)', fontsize=16)
plt.show()

In [None]:
print('--- {:.3f} seconds ---'.format(time.time() - start_time))