In [None]:
import os
import copy
import numpy as np
from pywt import wavedec, waverec
import matplotlib.pyplot as plt
from IPython import display
from imblearn.over_sampling import SMOTE
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import RandomizedSearchCV, train_test_split
from sklearn.metrics import  accuracy_score, precision_score, recall_score
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
from sklearn.metrics import roc_auc_score, roc_curve, auc
import warnings
warnings.filterwarnings('ignore')
plt.rcParams.update({"font.size": 12, "font.weight": 'bold', "lines.linewidth": 2, "lines.markersize": 7})
os.chdir(r'C:\Users\gs582\Dropbox\IITK_Cervical\Denoisig_RFclassification\figure')


In [None]:
# Write the functions here
def wavelet_denoising(data, wave, lev, tsr, tm):


    # Decompose the signal and calculate sparsity
    cf = wavedec(data, wavelet = wave, level = lev)
       
    # Threshold the wavelet coefficients
    for j in range(lev):
        thrh = thvalue_vector(cf[-j-1][::], tsr)
        cf[-j-1][::] = threshold_vector(x=cf[-j-1][::], thr=thrh, tm=tm)

    # Reconstruct the denoised data
    data_den = waverec(cf, wavelet = wave)

    return data_den



def extract_noise(data, wave, lev):
    """ 
    Return noise present in the data. Noise is reconstructed from detail coefficeientsandthe denoised signal
    is reconstructed using approximation coefficients.

    decomposition level: decomposition level is decided using sparsity value of detail components.
                         The  cutoff used for sparsity here is 0.03.

    data_noise, data_denoised = extract_noise(data, wave)
    
    INPUT:
        data: from which noise will be extracted.
        wave: Mother wavelet function to be used for wavelet decomposition.

    OUTPUT:
        data_appx: Data reconstructed using only approximation coefficient at the last decomposition level.
        data_det: Data reconstructed using detail components at all decomposition level.

    Calculate sparsity of detail component at each level. 
    If sparsity at any level is > 0.5. then make it the final decomposition level.
    Reconstruct only noise and signal using only detail component and approximation component.

     """
    
    # Find the length of the data
    dat_len = len(data)

    # Fins out if the number is even.
    if (dat_len % 2) != 0:
        raise ValueError('Data length should be even.')

    cf = wavedec(data, wavelet = wave, level = lev)

    cf_copy = copy.deepcopy(cf)

    # Zero the approximation component and reconstruct only noise using the coefficients.
    cf[0][::] = 0
    #cf[-1][::] = 0
    data_noise = waverec(cf, wavelet = wave)

    # Zero the detail component and reconstrruct the denoised data using only approximation.
    for j in range(lev):
        cf_copy[-j-1][::] = 0

    data_denoised = waverec(cf_copy, wavelet = wave)

    return data_noise, data_denoised

def data_wcfs(data, wave, lev):
    """Returns the approximation and detail of coefficients of the data at all the decomposition level using "wavedec".

    Args:
        data (Numpy Array): 1D vector of the reference data.
        wave (String): Wavelet to be used for wavelet decomposition.
        lev (Intiger): Deomposition level till which signal will be decomposed.

    Returns:
        cf (List): Returns a list of approximation and detail coefficients.
    """

    app_ref, det_ref, cf = [], [], []
    for i in range(1, lev+1):
        c = wavedec(data=data, wavelet = wave, level=i)
        exec(f'app_ref.append(c[0])')
        exec(f'det_ref.append(c[1])')
    cf.append(app_ref)
    cf.append(det_ref)
    return cf


def Rf_classification(data, label, N_iter, CV):
    """_summary_

    Args:
        data (_type_): _description_
        label (_type_): _description_
        N_iter (_type_): _description_
        CV (_type_): _description_

    Returns:
        _type_: _description_
    """

    # Define hyperfine parameters
    n_estimator, max_depth = np.linspace(5, 100, 20, dtype=np.int64), np.linspace(2, 30, 15, dtype=np.int64)
    max_feature = ['auto', 'sqrt', 'log2']
    min_samples_split, min_samples_leaf = [2, 3, 5, 7, 10, 12, 15], [1, 2, 5, 7, 10, 12, 15]
    
    grid_param = {'n_estimators' : n_estimator, 'max_features' : max_feature,
                   'max_depth' : max_depth, 'min_samples_split' : min_samples_split,
                   'min_samples_leaf' : min_samples_leaf}
    
    clf = RandomForestClassifier(random_state=0)
    clf = RandomizedSearchCV(estimator=clf, param_distributions=grid_param, n_iter=N_iter,
                             cv=CV, verbose=2, n_jobs=-1)
    
    # Train the model
    model = clf.fit(data, label)

    return model

def eval_metrics(y_v, y_pred):
    """_summary_

    Args:
        y_v (_type_): _description_
        y_pred (_type_): _description_

    Returns:
        _type_: _description_
    """

    # Calculate the confusion matrix
    conf_mat = confusion_matrix(y_v, y_pred)
    fp = conf_mat.sum(axis = 0) - np.diag(conf_mat)
    fn = conf_mat.sum(axis = 1) - np.diag(conf_mat)
    tp = np.diag(conf_mat)
    tn = conf_mat.sum() - tp + fn + fp
    # Calculate accuracy, sensitivity, specificity, precisson and recall
    Accuracy = (tp + tn)/(tp + tn + fp + fn)
    Sensitivity = tp/(tp + fn)
    Specificity = tn/(tn + fp)

    precisson = precision_score(y_v, y_pred, average='micro')
    recall = recall_score(y_v, y_pred, average='micro')
    
    return Accuracy, Sensitivity, Specificity, precisson, recall


In [None]:
# Load and save the old data
dat = np.load(r'C:\Users\gs582\Dropbox\IITK_Cervical\Data\Exvivo\Nor_Pre_Can.npy')[:, :432]
lab = np.load(r'C:\Users\gs582\Dropbox\IITK_Cervical\Data\Exvivo\Label_NPC.npy')
lambd = np.loadtxt(r'C:\Users\gs582\Dropbox\IITK_Cervical\Data\lambd.txt')
# Delete the too noisy or corrupted data
idx = [9, 10, 11, 51, 52, 53, 54, 55, 56, 57, 58, 59, 62, 63, 64, 65, 66, 67, 68, 69, 117, 118, 153, 155, 161, 174, 175, 176, 177,
       181, 182, 183, 184, 185, 186, 187, 202, 203, 204, 207, 208, 263, 264, 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, 285]
dat=np.delete(dat, idx, axis=0)
lab = np.delete(lab, idx, axis=0)


In [None]:
print(dat.shape)

In [None]:
# Denoise the data
# Define wavelet function and decomposition level
wave, dl = 'coif3', 3

data_denoised, lev = np.empty((dat.shape), dtype=dat.dtype), np.empty((dat.shape[0]))
for i in range(dat.shape[0]):
    # Obtain the noisy and denoised data using the function extract_noise.
    _, d2 = extract_noise(data=dat[i, :], wave=wave, lev=dl)
    data_denoised[i, :] = d2

In [None]:
# Area normalization of the data
for i in range(dat.shape[0]):
    dat[i, :] = np.divide(dat[i, :], np.trapz(dat[i, :], dx=1))
    data_denoised[i, :] = np.divide(data_denoised[i, :], np.trapz(data_denoised[i, :], dx=1))
    

In [None]:
# Plot of noisy and denoised data
i, j, k, alp, alp1 = 8, 148, 244, 0.7, 0.8
fig, ax = plt.subplots(1, 2, figsize=(12, 4), sharex=False, sharey=False, constrained_layout=True, num=1, clear=True)

ax[0].plot(lambd, dat[i, :]/np.max(dat[i, :]), 'b', alpha=alp1, label='Normal')
ax[0].plot(lambd, dat[j, :]/np.max(dat[j, :]), 'm', alpha=alp1, label='Precancer')
ax[0].plot(lambd, dat[k, :]/np.max(dat[k, :]), 'r', alpha=alp1, label='Cancer')
lg = ax[0].legend(frameon=False, fontsize=10)
for labels in lg.texts:
    labels.set_alpha(alp)
ax[0].set_xlim([lambd[0], lambd[431]])
ax[0].set_xlabel('Wavelength (nm)', fontweight='bold', alpha=alp)
ax[0].set_ylabel('Intensity (a.u.)', fontweight='bold', alpha=alp)
ax[0].text(x=570, y=-0.39, s='(a)', fontsize=16)

ax[1].plot(lambd, dat[i, :]/np.max(dat[i, :]), 'b', label='Noisy')
ax[1].plot(lambd, data_denoised[i, :]/np.max(data_denoised[i, :]), 'r', label='Denoised')
lg = ax[1].legend(frameon=False, fontsize=10)
for labels in lg.texts:
    labels.set_alpha(alp)
ax[1].set_xlim([lambd[0], lambd[431]])
ax[1].set_xlabel('Wavelength (nm)', fontweight='bold', alpha=alp)
ax[1].set_ylabel('Intensity (a.u.)', fontweight='bold', alpha=alp)
ax[1].text(x=570, y=-0.38, s='(b)', fontsize=16)
# plt.savefig('int_den.png', dpi=400, bbox_inches='tight', transparent=True, pad_inches=0.02)
plt.show()

In [None]:
# Plot of noisy data
# i, j, k, alp = 8, 148, 244, 0.7
# plt.figure(figsize=(6, 4), dpi=100, constrained_layout=True)
# plt.plot(lambd, dat[i, :]/np.max(dat[i, :]), 'b', label='Normal')
# plt.plot(lambd, dat[j, :]/np.max(dat[j, :]), 'm', label='Precancer')
# plt.plot(lambd, dat[k, :]/np.max(dat[k, :]), 'r', label='Cancer')
# lg = plt.legend(frameon=False, fontsize=10)
# for labels in lg.texts:
#     labels.set_alpha(alp)
# plt.xlim([lambd[0], lambd[431]])
# plt.xlabel('Wavelength (nm)', fontweight='bold', alpha=alp)
# plt.ylabel('Intensity (a.u.)', fontweight='bold', alpha=alp)
# # plt.savefig('int.png', dpi=400, bbox_inches='tight', transparent=True, pad_inches=0.02)
# plt.show()

In [None]:
# # Plot of noisy and denoised data
# i, j, k, alp = 8, 148, 244, 0.7
# plt.figure(figsize=(6, 4), dpi=100, constrained_layout=True)
# plt.plot(lambd, dat[i, :]/np.max(dat[i, :]), 'b', label='Noisy')
# plt.plot(lambd, data_denoised[i, :]/np.max(data_denoised[i, :]), 'b', label='Denoised')
# # plt.plot(lambd, dat[j, :]/np.max(dat[j, :]), 'm', label='Noisy')
# # plt.plot(lambd, data_denoised[j, :]/np.max(data_denoised[j, :]), 'm', label='Denoised')
# # plt.plot(lambd, dat[k, :]/np.max(dat[k, :]), 'r', label='Noisy')
# # plt.plot(lambd, data_denoised[k, :]/np.max(data_denoised[k, :]), 'r', label='Denoised')

# lg = plt.legend(frameon=False, fontsize=10)
# for labels in lg.texts:
#     labels.set_alpha(alp)
# plt.xlim([lambd[0], lambd[431]])
# plt.xlabel('Wavelength (nm)', fontweight='bold', alpha=alp)
# plt.ylabel('Intensity (a.u.)', fontweight='bold', alpha=alp)
# # plt.savefig('den_int.png', dpi=400, bbox_inches='tight', transparent=True, pad_inches=0.02)
# plt.show()

##### Classification of imbalanced noisy data

In [None]:
# Split the data into training and testing set
xtun, xvun, ytun, yvun = train_test_split(dat, lab, test_size=0.20, random_state=0)

In [None]:
modelun = Rf_classification(data=xtun, label=ytun, N_iter=500, CV=3)

# Predict the class of test data set
ypredun = modelun.predict(xvun)
# Calculate evaluation matrices
scoret, scorev = modelun.score(xtun, ytun), modelun.score(xvun, yvun)
# Obtain the evaluation matrices
accun, sensun, specun, precun, recalun = eval_metrics(y_pred=ypredun, y_v=yvun)


In [None]:
modelun.best_params_

In [None]:
print('Training Score : %0.2f' % (scoret*100))
print('Testing Score : %0.2f' % (scorev*100))
print('Accuracy :', accun)
print('Sensitivity :', sensun)
print('Specificity :', specun)
print('Precission :', precun)
print('Recall :', recalun)

In [None]:
# Plot confusion matrix and ROC
# ROC plot for different classes
pred_probun = modelun.predict_proba(xvun)

fprun, tprun, roc_aucun = dict(), dict(), dict()
n_cls = 3

for i in range(n_cls):
    fprun[i], tprun[i], _ = roc_curve(yvun, pred_probun[:, i], pos_label=i)
    roc_aucun[i] = auc(fprun[i], tprun[i])

fig = plt.figure(figsize=(8, 3), dpi=100, layout='constrained')
gs = fig.add_gridspec(1, 2, hspace=0, wspace=0)
ax = gs.subplots(sharex=False, sharey=False)

ConfusionMatrixDisplay.from_predictions(yvun, ypredun, display_labels=('Normal', 'Precancer', 'Cancer'), colorbar=False, ax=ax[0])
# plt.set_yticks(rotation=90, verticalalignment='center')
plt.setp(ax[0].get_yticklabels(), rotation=90, verticalalignment='center')
# plt.savefig('conf_mat.png', dpi=400, bbox_inches='tight', pad_inches=0.02)

ax[1].plot(fprun[0], tprun[0], '-b', label='Normal(Area = %0.2f)' %roc_aucun[0])
ax[1].plot(fprun[1], tprun[1], '-m', label='Precancer(Area = %0.2f)' %roc_aucun[1])
ax[1].plot(fprun[2], tprun[2], '-r', label='Cancer(Area = %0.2f)' %roc_aucun[2])
ax[1].set_xlabel('False Positive Rate')
ax[1].set_ylabel('True Positive rate')
ax[1].set_xlim([-0.02, 1.02])
ax[1].set_ylim([-0.02, 1.02])
ax[1].legend(frameon=False, loc='best')
plt.show()

In [None]:
# ROC plot
alp, alp1 = 0.7, 0.8
plt.figure(figsize=(6, 4), dpi=100, constrained_layout=True)
plt.plot(fprun[0], tprun[0], '-b', alpha=alp1, label='Normal(Area = %0.2f)' %roc_aucun[0])
plt.plot(fprun[1], tprun[1], '-m', alpha=alp1, label='Precancer(Area = %0.2f)' %roc_aucun[1])
plt.plot(fprun[2], tprun[2], '-r', alpha=alp1, label='Cancer(Area = %0.2f)' %roc_aucun[2])
plt.xlabel('False Positive Rate', fontweight='bold', alpha=alp)
plt.ylabel('True Positive rate', fontweight='bold', alpha=alp)
plt.xlim([-0.01, 1.01])
plt.ylim([-0.01, 1.01])
lg = plt.legend(frameon=False, fontsize=8, loc='lower right')#, prop=dict(weight='light'))
for labels in lg.texts:
    labels.set_alpha(alp)
# plt.savefig('roc_noisy.png', dpi=400, bbox_inches='tight', transparent=True, pad_inches=0.02)

plt.show()

In [None]:
# Plot confusion matrix
alp=0.7
ConfusionMatrixDisplay.from_predictions(yvun, ypredun, display_labels=('Normal', 'Precancer', 'Cancer'), colorbar=False)
plt.yticks(rotation=90, verticalalignment='center', alpha=0.6, fontweight='light', fontsize=10)
plt.xticks(alpha=0.6, fontweight='light', fontsize=10)
plt.xlabel('Predicted label', fontweight='bold', alpha=alp)
plt.ylabel('True label', fontweight='bold', alpha=alp)
# plt.savefig('conf_noisy.png', dpi=400, bbox_inches='tight', transparent=True, pad_inches=0.02)
plt.show()

##### Classification of balanced noisy data

In [None]:
# Generate balanced noisy data set using SMOTE
datan, label1 = SMOTE().fit_resample(dat, lab)
# Split the data into training and testing set
xtn, xvn, ytn, yvn = train_test_split(datan, label1, test_size=0.20, random_state=0)

In [None]:
# Classification of denoised data
modeln = Rf_classification(data=xtn, label=ytn, N_iter=500, CV=3)

# Predict the class of test data set
ypredn = modeln.predict(xvn)
# Calculate evaluation matrices
scoret, scorev = modeln.score(xtn, ytn), modeln.score(xvn, yvn)
# Obtain the evaluation matrices
accurn, sensn, specn, precisn, recaln = eval_metrics(y_pred=ypredn, y_v=yvn)


In [None]:
modeln.best_params_

In [None]:
print('Training Score : %0.2f' % (scoret*100))
print('Testing Score : %0.2f' % (scorev*100))
print('Accuracy :', accurn)
print('Sensitivity :', sensn)
print('Specificity :', specn)
print('Precission :', precisn)
print('Recall :', recaln)
# print('AUC_ROC : %0.2f' % auc_roc)

In [None]:
# Plot confusion matrix and ROC
# ROC plot for different classes
pred_probn = modeln.predict_proba(xvn)

fprn, tprn, roc_aucn = dict(), dict(), dict()
n_cls = 3

for i in range(n_cls):
    fprn[i], tprn[i], _ = roc_curve(yvn, pred_probn[:, i], pos_label=i)
    roc_aucn[i] = auc(fprn[i], tprn[i])

fig = plt.figure(figsize=(8, 3), dpi=100, layout='constrained')
gs = fig.add_gridspec(1, 2, hspace=0, wspace=0)
ax = gs.subplots(sharex=False, sharey=False)

ConfusionMatrixDisplay.from_predictions(yvn, ypredn, display_labels=('Normal', 'Precancer', 'Cancer'), colorbar=False, ax=ax[0])
# plt.set_yticks(rotation=90, verticalalignment='center')
plt.setp(ax[0].get_yticklabels(), rotation=90, verticalalignment='center')
# plt.savefig('conf_mat.png', dpi=400, bbox_inches='tight', pad_inches=0.02)

ax[1].plot(fprn[0], tprn[0], '-b', label='Normal(Area = %0.2f)' %roc_aucn[0])
ax[1].plot(fprn[1], tprn[1], '-m', label='Precancer(Area = %0.2f)' %roc_aucn[1])
ax[1].plot(fprn[2], tprn[2], '-r', label='Cancer(Area = %0.2f)' %roc_aucn[2])
ax[1].set_xlabel('False Positive Rate')
ax[1].set_ylabel('True Positive rate')
ax[1].set_xlim([-0.02, 1.02])
ax[1].set_ylim([-0.02, 1.02])
ax[1].legend(frameon=False, fontsize=8, loc='lower right')
plt.show()

In [None]:
# Plot confusion matrix
alp=0.7
ConfusionMatrixDisplay.from_predictions(yvn, ypredn, display_labels=('Normal', 'Precancer', 'Cancer'), colorbar=False)
plt.yticks(rotation=90, verticalalignment='center', alpha=alp, fontweight='light', fontsize=10)
plt.xticks(alpha=alp, fontweight='light', fontsize=10)
plt.xlabel('Predicted label', fontweight='bold', alpha=alp)
plt.ylabel('True label', fontweight='bold', alpha=alp)
# plt.savefig('conf_noisy_bal.png', dpi=400, bbox_inches='tight', transparent=True, pad_inches=0.02)
plt.show()

In [None]:
# ROC plot
alp, alp1 = 0.7, 0.8
plt.figure(figsize=(6, 4), dpi=100, constrained_layout=True)
plt.plot(fprn[0], tprn[0], '-b', alpha=alp1, label='Normal(Area = %0.2f)' %roc_aucn[0])
plt.plot(fprn[1], tprn[1], '-m', alpha=alp1, label='Precancer(Area = %0.2f)' %roc_aucn[1])
plt.plot(fprn[2], tprn[2], '-r', alpha=alp1, label='Cancer(Area = %0.2f)' %roc_aucn[2])
plt.xlabel('False Positive Rate', fontweight='bold', alpha=alp)
plt.ylabel('True Positive rate', fontweight='bold', alpha=alp)
plt.xlim([-0.01, 1.01])
plt.ylim([-0.01, 1.01])
lg = plt.legend(frameon=False, fontsize=8, loc='lower right')#, prop=dict(weight='light'))
for labels in lg.texts:
    labels.set_alpha(alp)
# plt.savefig('roc_noisy_bal.png', dpi=400, bbox_inches='tight', transparent=True, pad_inches=0.02)
plt.show()

In [None]:
print(len(np.where(ytn==0)[0]), len(np.where(ytn==1)[0]), len(np.where(ytn==2)[0]))
print(len(np.where(yvn==0)[0]), len(np.where(yvn==1)[0]), len(np.where(yvn==2)[0]))
print(len(np.where(label1==0)[0]), len(np.where(label1==1)[0]), len(np.where(label1==2)[0]))
print(len(label1), len(ytn), len(yvn))

##### Classification of imbalanced denoised data

In [None]:
# Split the data into training and testing set
xtd, xvd, ytd, yvd = train_test_split(data_denoised, lab, test_size=0.20, random_state=0)
modeld = Rf_classification(data=xtd, label=ytd, N_iter=500, CV=3)

# Predict the class of test data set
ypredd = modeld.predict(xvd)
# Calculate evaluation matrices
scoret, scorev = modeld.score(xtd, ytd), modeld.score(xvd, yvd)
# Obtain the evaluation matrices
accuracy, sensitivity, specificity, precision, recal = eval_metrics(y_pred=ypredd, y_v=yvd)


In [None]:
print(len(np.where(ytd==0)[0]), len(np.where(ytd==1)[0]), len(np.where(ytd==2)[0]))
print(len(np.where(yvd==0)[0]), len(np.where(yvd==1)[0]), len(np.where(yvd==2)[0]))
print(len(np.where(lab==0)[0]), len(np.where(lab==1)[0]), len(np.where(lab==2)[0]))
print(len(lab), len(ytd), len(yvd))

In [None]:
modeld.best_params_

In [None]:
accuracy, sensitivity, specificity, precision, recal = eval_metrics(y_pred=ypredd, y_v=yvd)
print('Training Score : %0.2f' % (scoret*100))
print('Testing Score : %0.2f' % (scorev*100))
print('Accuracy :', accuracy)
print('Sensitivity :', sensitivity)
print('Specificity :', specificity)
print('Precission :', precision)
print('Recall :', recal)

In [None]:
# Plot confusion matrix and ROC
# ROC plot for different classes
pred_probd = modeld.predict_proba(xvd)

fprd, tprd, roc_aucd = dict(), dict(), dict()
n_cls = 3

for i in range(n_cls):
    fprd[i], tprd[i], _ = roc_curve(yvd, pred_probd[:, i], pos_label=i)
    roc_aucd[i] = auc(fprd[i], tprd[i])

fig = plt.figure(figsize=(10, 4), dpi=100, layout='constrained')
gs = fig.add_gridspec(1, 2, hspace=0, wspace=0)
ax = gs.subplots(sharex=False, sharey=False)

ConfusionMatrixDisplay.from_predictions(yvd, ypredd, display_labels=('Normal', 'Precancer', 'Cancer'), colorbar=False, ax=ax[0])
# plt.set_yticks(rotation=90, verticalalignment='center')
plt.setp(ax[0].get_yticklabels(), rotation=90, verticalalignment='center')
# plt.savefig('conf_mat.png', dpi=400, bbox_inches='tight', pad_inches=0.02)

ax[1].plot(fprd[0], tprd[0], '-b', label='Normal(Area = %0.2f)' %roc_aucd[0])
ax[1].plot(fprd[1], tprd[1], '-m', label='Precancer(Area = %0.2f)' %roc_aucd[1])
ax[1].plot(fprd[2], tprd[2], '-r', label='Cancer(Area = %0.2f)' %roc_aucd[2])
ax[1].set_xlabel('False Positive Rate')
ax[1].set_ylabel('True Positive rate')
ax[1].set_xlim([-0.02, 1.02])
ax[1].set_ylim([-0.02, 1.02])
ax[1].legend(frameon=False, loc='lower right')
plt.show()

In [None]:
# ROC plot
alp, alp1 = 0.7, 0.8
plt.figure(figsize=(6, 4), dpi=100, constrained_layout=True)
plt.plot(fprd[0], tprd[0], '-b', alpha=alp1, label='Normal(Area = %0.2f)' %roc_aucd[0])
plt.plot(fprd[1], tprd[1], '-m', alpha=alp1, label='Precancer(Area = %0.2f)' %roc_aucd[1])
plt.plot(fprd[2], tprd[2], '-r', alpha=alp1, label='Cancer(Area = %0.2f)' %roc_aucd[2])
plt.xlabel('False Positive Rate', fontweight='bold', alpha=alp)
plt.ylabel('True Positive rate', fontweight='bold', alpha=alp)
plt.xlim([-0.01, 1.01])
plt.ylim([-0.01, 1.01])
lg = plt.legend(frameon=False, fontsize=8, loc='lower right')#, prop=dict(weight='light'))
for labels in lg.texts:
    labels.set_alpha(alp)
# plt.savefig('roc_den.png', dpi=400, bbox_inches='tight', transparent=True, pad_inches=0.02)

plt.show()

In [None]:
# Plot confusion matrix
alp=0.7
ConfusionMatrixDisplay.from_predictions(yvd, ypredd, display_labels=('Normal', 'Precancer', 'Cancer'), colorbar=False)
plt.yticks(rotation=90, verticalalignment='center', alpha=0.6, fontweight='light', fontsize=10)
plt.xticks(alpha=0.6, fontweight='light', fontsize=10)
plt.xlabel('Predicted label', fontweight='bold', alpha=alp)
plt.ylabel('True label', fontweight='bold', alpha=alp)
# plt.savefig('conf_den.png', dpi=400, bbox_inches='tight', transparent=True, pad_inches=0.02)
plt.show()

##### Classification of balanced denoised data

In [None]:
# Generate balanced denoised data set using SMOTE
data_den, label2 = SMOTE().fit_resample(data_denoised, lab)
# Split the data into training and testing set
xtdb, xvdb, ytdb, yvdb = train_test_split(data_den, label2, test_size=0.20, random_state=0)


In [None]:
modeldb = Rf_classification(data=xtdb, label=ytdb, N_iter=500, CV=3)

# Predict the class of test data set
ypreddb = modeldb.predict(xvdb)
# Calculate evaluation matrices
scoret, scorev = modeldb.score(xtdb, ytdb), modeldb.score(xvdb, yvdb)
# Obtain the evaluation matrices
accuracy, sensitivity, specificity, precision, recal = eval_metrics(y_pred=ypreddb, y_v=yvdb)


In [None]:
modeldb.best_params_

In [None]:
accuracy, sensitivity, specificity, precision, recal = eval_metrics(y_pred=ypreddb, y_v=yvdb)
print('Training Score : %0.2f' % (scoret*100))
print('Testing Score : %0.2f' % (scorev*100))
print('Accuracy :', accuracy)
print('Sensitivity :', sensitivity)
print('Specificity :', specificity)
print('Precission :', precision)
print('Recall :', recal)

In [None]:
# Plot confusion matrix and ROC
# ROC plot for different classes
pred_probdb = modeldb.predict_proba(xvdb)

fprdb, tprdb, roc_aucdb = dict(), dict(), dict()
n_cls = 3

for i in range(n_cls):
    fprdb[i], tprdb[i], _ = roc_curve(yvdb, pred_probdb[:, i], pos_label=i)
    roc_aucdb[i] = auc(fprdb[i], tprdb[i])

fig = plt.figure(figsize=(10, 4), dpi=100, layout='constrained')
gs = fig.add_gridspec(1, 2, hspace=0, wspace=0)
ax = gs.subplots(sharex=False, sharey=False)

ConfusionMatrixDisplay.from_predictions(yvdb, ypreddb, display_labels=('Normal', 'Precancer', 'Cancer'), colorbar=False, ax=ax[0])
# plt.set_yticks(rotation=90, verticalalignment='center')
plt.setp(ax[0].get_yticklabels(), rotation=90, verticalalignment='center')
# plt.savefig('conf_mat.png', dpi=400, bbox_inches='tight', pad_inches=0.02)

ax[1].plot(fprdb[0], tprdb[0], '-b', label='Normal(Area = %0.2f)' %roc_aucdb[0])
ax[1].plot(fprdb[1], tprdb[1], '-m', label='Precancer(Area = %0.2f)' %roc_aucdb[1])
ax[1].plot(fprdb[2], tprdb[2], '-r', label='Cancer(Area = %0.2f)' %roc_aucdb[2])
ax[1].set_xlabel('False Positive Rate')
ax[1].set_ylabel('True Positive rate')
ax[1].set_xlim([-0.02, 1.02])
ax[1].set_ylim([-0.02, 1.02])
ax[1].legend(frameon=False, loc='best')
plt.show()

In [None]:
# Plot confusion matrix
alp=0.7
ConfusionMatrixDisplay.from_predictions(yvdb, ypreddb, display_labels=('Normal', 'Precancer', 'Cancer'), colorbar=False)
plt.yticks(rotation=90, verticalalignment='center', alpha=0.6, fontweight='light', fontsize=10)
plt.xticks(alpha=0.6, fontweight='light', fontsize=10)
plt.xlabel('Predicted label', fontweight='bold', alpha=alp)
plt.ylabel('True label', fontweight='bold', alpha=alp)
# plt.savefig('conf_den_bal.png', dpi=400, bbox_inches='tight', transparent=True, pad_inches=0.02)
plt.show()

In [None]:
# ROC plot
alp, alp1 = 0.7, 0.8
plt.figure(figsize=(6, 4), dpi=100, constrained_layout=True)
plt.plot(fprdb[0], tprdb[0], '-b', alpha=alp1, label='Normal(Area = %0.2f)' %roc_aucdb[0])
plt.plot(fprdb[1], tprdb[1], '-m', alpha=alp1, label='Precancer(Area = %0.2f)' %roc_aucdb[1])
plt.plot(fprdb[2], tprdb[2], '-r', alpha=alp1, label='Cancer(Area = %0.2f)' %roc_aucdb[2])
plt.xlabel('False Positive Rate', fontweight='bold', alpha=alp)
plt.ylabel('True Positive rate', fontweight='bold', alpha=alp)
plt.xlim([-0.01, 1.01])
plt.ylim([-0.01, 1.01])
lg = plt.legend(frameon=False, fontsize=8, loc='lower right')#, prop=dict(weight='light'))
for labels in lg.texts:
    labels.set_alpha(alp)
# plt.savefig('roc_den_bal.png', dpi=400, bbox_inches='tight', transparent=True, pad_inches=0.02)
plt.show()

In [None]:
# Plot of all the confusion matrix
alp=0.8
fig, ax = plt.subplots(1, 4, figsize=(16, 4), sharex=False, sharey=False, constrained_layout=True, num=1, clear=True)

ConfusionMatrixDisplay.from_predictions(yvun, ypredun, display_labels=('Normal', 'Precancer', 'Cancer'), colorbar=False, ax=ax[0])
plt.setp(ax[0].get_yticklabels(), rotation=90, verticalalignment='center', fontweight='light', alpha=alp, fontsize=12)
plt.setp(ax[0].get_xticklabels(), verticalalignment='center', fontweight='light', alpha=alp, fontsize=12)
ax[0].set_xlabel('Predicted label', fontsize=14, fontweight='bold', alpha=alp)
ax[0].set_ylabel('True label', fontsize=14, fontweight='bold', alpha=alp)
ax[0].text(x=0.85, y=3.1, s='(a)', fontsize=16)

ConfusionMatrixDisplay.from_predictions(yvd, ypredd, display_labels=('Normal', 'Precancer', 'Cancer'), colorbar=False, ax=ax[1])
plt.setp(ax[1].get_yticklabels(), rotation=90, verticalalignment='center', fontweight='light', alpha=alp, fontsize=12)
plt.setp(ax[1].get_xticklabels(), verticalalignment='center', fontweight='light', alpha=alp, fontsize=12)
ax[1].set_xlabel('Predicted label', fontsize=14, fontweight='bold', alpha=alp)
ax[1].set_ylabel('True label', fontsize=14, fontweight='bold', alpha=alp)
ax[1].text(x=0.85, y=3.1, s='(b)', fontsize=16)

ConfusionMatrixDisplay.from_predictions(yvn, ypredn, display_labels=('Normal', 'Precancer', 'Cancer'), colorbar=False, ax=ax[2])
plt.setp(ax[2].get_yticklabels(), rotation=90, verticalalignment='center', fontweight='light', alpha=alp, fontsize=12)
plt.setp(ax[2].get_xticklabels(), verticalalignment='center', fontweight='light', alpha=alp, fontsize=12)
ax[2].set_xlabel('Predicted label', fontsize=14, fontweight='bold', alpha=alp)
ax[2].set_ylabel('True label', fontsize=14, fontweight='bold', alpha=alp)
ax[2].text(x=0.85, y=3.1, s='(c)', fontsize=16)

ConfusionMatrixDisplay.from_predictions(yvdb, ypreddb, display_labels=('Normal', 'Precancer', 'Cancer'), colorbar=False, ax=ax[3])
plt.setp(ax[3].get_yticklabels(), rotation=90, verticalalignment='center', fontweight='light', alpha=alp, fontsize=12)
plt.setp(ax[3].get_xticklabels(), verticalalignment='center', fontweight='light', alpha=alp, fontsize=12)
ax[3].set_xlabel('Predicted label', fontsize=14, fontweight='bold', alpha=alp)
ax[3].set_ylabel('True label', fontsize=14, fontweight='bold', alpha=alp)
ax[3].text(x=0.85, y=3.1, s='(d)', fontsize=16)

# plt.savefig('conf_all.png', dpi=400, bbox_inches='tight', transparent=True, pad_inches=0.02)
plt.show()

In [None]:
# Roc plot of all the categories
alp, alp1 = 0.8, 0.9
fig, ax = plt.subplots(1, 4, figsize=(22, 4), sharex=False, sharey=False, constrained_layout=True, num=1, clear=True)

ax[0].plot(fprun[0], tprun[0], '-b', alpha=alp1, label='Normal(Area = %0.2f)' %roc_aucun[0])
ax[0].plot(fprun[1], tprun[1], '-m', alpha=alp1, label='Precancer(Area = %0.2f)' %roc_aucun[1])
ax[0].plot(fprun[2], tprun[2], '-r', alpha=alp1, label='Cancer(Area = %0.2f)' %roc_aucun[2])
ax[0].set_xlabel('False Positive Rate', fontweight='bold', alpha=alp)
ax[0].set_ylabel('True Positive rate', fontsize=14, fontweight='bold', alpha=alp)
ax[0].set_xlim([-0.01, 1.01])
ax[0].set_ylim([-0.01, 1.01])
lg = ax[0].legend(frameon=False, fontsize=14, loc='lower right')#, prop=dict(weight='light'))
for labels in lg.texts:
    labels.set_alpha(alp)
ax[0].text(x=0.5, y=-0.24, s='(a)', fontsize=16)

ax[1].plot(fprd[0], tprd[0], '-b', alpha=alp1, label='Normal(Area = %0.2f)' %roc_aucd[0])
ax[1].plot(fprd[1], tprd[1], '-m', alpha=alp1, label='Precancer(Area = %0.2f)' %roc_aucd[1])
ax[1].plot(fprd[2], tprd[2], '-r', alpha=alp1, label='Cancer(Area = %0.2f)' %roc_aucd[2])
ax[1].set_xlabel('False Positive Rate', fontsize=14, fontweight='bold', alpha=alp)
ax[1].set_ylabel('True Positive rate', fontsize=14, fontweight='bold', alpha=alp)
ax[1].set_xlim([-0.01, 1.01])
ax[1].set_ylim([-0.01, 1.01])
lg = ax[1].legend(frameon=False, fontsize=14, loc='lower right')#, prop=dict(weight='light'))
for labels in lg.texts:
    labels.set_alpha(alp)
ax[1].text(x=0.5, y=-0.24, s='(b)', fontsize=16)

ax[2].plot(fprn[0], tprn[0], '-b', alpha=alp1, label='Normal(Area = %0.2f)' %roc_aucn[0])
ax[2].plot(fprn[1], tprn[1], '-m', alpha=alp1, label='Precancer(Area = %0.2f)' %roc_aucn[1])
ax[2].plot(fprn[2], tprn[2], '-r', alpha=alp1, label='Cancer(Area = %0.2f)' %roc_aucn[2])
ax[2].set_xlabel('False Positive Rate', fontsize=14, fontweight='bold', alpha=alp)
ax[2].set_ylabel('True Positive rate', fontsize=14, fontweight='bold', alpha=alp)
ax[2].set_xlim([-0.01, 1.01])
ax[2].set_ylim([-0.01, 1.01])
lg = ax[2].legend(frameon=False, fontsize=14, loc='lower right')#, prop=dict(weight='light'))
for labels in lg.texts:
    labels.set_alpha(alp)
ax[2].text(x=0.5, y=-0.24, s='(c)', fontsize=16)

ax[3].plot(fprdb[0], tprdb[0], '-b', alpha=alp1, label='Normal(Area = %0.2f)' %roc_aucdb[0])
ax[3].plot(fprdb[1], tprdb[1], '-m', alpha=alp1, label='Precancer(Area = %0.2f)' %roc_aucdb[1])
ax[3].plot(fprdb[2], tprdb[2], '-r', alpha=alp1, label='Cancer(Area = %0.2f)' %roc_aucdb[2])
ax[3].set_xlabel('False Positive Rate', fontsize=14, fontweight='bold', alpha=alp)
ax[3].set_ylabel('True Positive rate', fontsize=14, fontweight='bold', alpha=alp)
ax[3].set_xlim([-0.01, 1.01])
ax[3].set_ylim([-0.01, 1.01])
lg = ax[3].legend(frameon=False, fontsize=14, loc='lower right')#, prop=dict(weight='light'))
for labels in lg.texts:
    labels.set_alpha(alp)
ax[3].text(x=0.5, y=-0.24, s='(d)', fontsize=16)
# plt.savefig('roc_all.png', dpi=400, bbox_inches='tight', transparent=True, pad_inches=0.02)
plt.show()