In [None]:
# Importing Necessary Libraries
import cv2
import os
import shutil 
import math
import random
import numpy as np
import pandas as pd 
import matplotlib.pyplot as plt
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
import seaborn as sns 
import warnings

from tensorflow.keras.layers import AveragePooling2D,GlobalAveragePooling2D
from tensorflow.keras.layers import Dropout
from tensorflow.keras.layers import Flatten
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Input
from tensorflow.keras.models import Model
from tensorflow.keras.callbacks import ReduceLROnPlateau,EarlyStopping,ModelCheckpoint

warnings.filterwarnings("ignore")

In [None]:
# Function for Formatting Dataset
def FormatDataset(dataset_src, dataset_dest, classes):
    # Making a Copy of Dataset
    new_cropped_dest = [os.path.join(dataset_dest, cls, 'CROPPED') for cls in classes];
    new_complete_dest = [os.path.join(dataset_dest, cls, 'COMPLETE') for cls in classes];
    cropped_src = [ dataset_src + "/im_" + cls + "/im_" + cls + "/CROPPED" for cls in classes ];
    complete_src = [ dataset_src + "/im_" + cls + "/im_" + cls for cls in classes ];
    for (dest1, dest2) in zip(new_cropped_dest, new_complete_dest):
        os.makedirs(dest1);
        os.makedirs(dest2);
    # Formating Cropped Images
    for (src,new_dest) in zip(cropped_src, new_cropped_dest):
        for file in os.listdir(src):
            filename, file_ext = os.path.splitext(file);
            if file_ext == '.bmp':
                img_des = os.path.join(new_dest, filename + '.jpg');
                img = cv2.imread(os.path.join(src, file));
                img = cv2.resize(img, (64, 64));
                img = cv2.copyMakeBorder(img, 1, 1, 1, 1, cv2.BORDER_CONSTANT, value=0);
                img = cv2.blur(img, (2, 2));
                cv2.imwrite(img_des ,img);
    # Formatting Complete Images
    for (src,new_dest) in zip(complete_src, new_complete_dest):
        for file in os.listdir(src):
            filename, file_ext = os.path.splitext(file);
            if file_ext == '.bmp':
                img_des = os.path.join(new_dest, filename + '.jpg');
                img = cv2.imread(os.path.join(src, file));
                img = cv2.resize(img, (256, 256));
                img = cv2.copyMakeBorder(img, 2, 2, 2, 2, cv2.BORDER_CONSTANT, value=0);
                img = cv2.blur(img, (2, 2));
                cv2.imwrite(img_des ,img);

# Source Location for Dataset
src = '../input/cervical-cancer-largest-dataset-sipakmed';
# Destination Location for Dataset
dest = './CervicalCancer';
# Image Classes
classes = ["Dyskeratotic","Koilocytotic","Metaplastic","Parabasal","Superficial-Intermediate"];
# Formatting Dataset
FormatDataset(src, dest, classes);

In [None]:
root_dir = "./CervicalCancer"
classes = ["Dyskeratotic","Koilocytotic","Metaplastic","Parabasal","Superficial-Intermediate"]

def GetDatasetSize(path, classes, main = "CROPPED"):
    num_of_image = {}
    for cls in classes:
        # Counting the Number of Files in the Folder
        num_of_image[cls] = len(os.listdir(os.path.join(path, cls, main)));
    return num_of_image;

print(GetDatasetSize(root_dir, classes, "COMPLETE"));

### Split the Dataset such that we have
* 70% for Train Data
* 15% for Validation Data
* 15% for Testing Data

In [None]:
# Function for Creating Train / Validation / Test folders (One time use Only)

def TrainValTestSplit(root_dir, classes_dir, main = "CROPPED", val_ratio = 0.15, test_ratio = 0.15):
    for cls in classes_dir:
        # Creating Split Folders
        os.makedirs('train/' + cls)
        os.makedirs('val/' + cls)
        os.makedirs('test/' + cls)

        # Folder to copy images from
        src = os.path.join(root_dir, cls, main);

        # Spliting the Files in the Given ratio
        allFileNames = os.listdir(src)
        np.random.shuffle(allFileNames)
        train_FileNames, val_FileNames, test_FileNames = np.split(np.array(allFileNames), [int(len(allFileNames)* (1 - (val_ratio + test_ratio))), int(len(allFileNames)* (1 - test_ratio))])

        train_FileNames = [src+'/'+ name for name in train_FileNames.tolist()]
        val_FileNames = [src+'/' + name for name in val_FileNames.tolist()]
        test_FileNames = [src+'/' + name for name in test_FileNames.tolist()]

        # Printing the Split Details
        print(cls,':')
        print('Total images: ', len(allFileNames))
        print('Training: ', len(train_FileNames))
        print('Validation: ', len(val_FileNames))
        print('Testing: ', len(test_FileNames))

        # Copy-pasting images
        for name in train_FileNames:
            shutil.copy(name, 'train/' + cls)

        for name in val_FileNames:
            shutil.copy(name, 'val/' + cls)

        for name in test_FileNames:
            shutil.copy(name, 'test/' + cls)
        print();
        

# Preforming Train / Validation / Test Split
root_dir = "./CervicalCancer"               # Dataset Root Folder
classes_dir = ["Dyskeratotic", "Koilocytotic", "Metaplastic", "Parabasal", "Superficial-Intermediate"]   # Classes
TrainValTestSplit(root_dir, classes_dir);

### Building Model 


In [None]:
# Importing Keras for Image Classification
import keras
from keras.layers import Dense,Conv2D, Flatten, MaxPool2D, Dropout
from keras.models import Sequential
from keras.preprocessing import image
from keras.callbacks import ModelCheckpoint
from keras.models import load_model

### Preparing data using data generator 

In [None]:
# Expand the size of dataset with new transformed images from the original dataset using ImageDataGenerator.
train_datagen = image.ImageDataGenerator(zoom_range = 0.2, shear_range = 0.2 , rescale = 1./255 , horizontal_flip=True)
val_datagen = image.ImageDataGenerator(rescale = 1./255)
test_datagen = image.ImageDataGenerator(rescale = 1./255)

In [None]:
train_data = train_datagen.flow_from_directory(directory= "./train", target_size=(64, 64), batch_size=100, class_mode = 'categorical',shuffle=True)

In [None]:
train_data.class_indices

In [None]:
val_data = val_datagen.flow_from_directory(directory= "./val", target_size=(64, 64), batch_size=100, class_mode = 'categorical',shuffle=True)

In [None]:
val_data.class_indices

In [None]:
test_data = test_datagen.flow_from_directory(directory= "./test", target_size=(64, 64), batch_size=100, class_mode = 'categorical',shuffle=True)

## PRETRAINED MODEL Declaration

In [None]:
from tensorflow.keras.applications import EfficientNetB2,VGG19
from tensorflow.keras.applications.resnet_v2 import ResNet152V2
from tensorflow.keras.applications.densenet import DenseNet201,DenseNet169,DenseNet121

In [None]:
from tensorflow import keras
METRICS = ["accuracy",
      keras.metrics.TruePositives(name='tp'),
      keras.metrics.FalsePositives(name='fp'),
      keras.metrics.TrueNegatives(name='tn'),
      keras.metrics.FalseNegatives(name='fn'), 
      keras.metrics.Precision(name='precision'),
      keras.metrics.Recall(name='recall'),
      keras.metrics.AUC(name='auc'),
]
def mymodel(image_size):
    model = Sequential() 
    # Convolutional Layer with input shape (64,64,3)
    model.add(Conv2D(filters=16, kernel_size= (3,3), activation= 'relu', input_shape=(image_size,image_size,3)) )

    model.add(Conv2D(filters=32, kernel_size=(3,3), activation='relu' ))
    model.add(MaxPool2D(pool_size=(2,2)))


    model.add(Conv2D(filters=64, kernel_size=(3,3), activation='relu' ))
    model.add(MaxPool2D(pool_size=(2,2)))


    model.add(Conv2D(filters=128, kernel_size=(3,3), activation='relu' ))
    model.add(MaxPool2D(pool_size=(2,2)))

    model.add(Dropout(rate=0.25))

    model.add(Flatten())
    model.add(Dense(units=128, activation='relu'))
    model.add(Dense(units=64, activation='relu'))
    model.add(Dropout(rate=0.25))
    model.add(Dense(units=5, activation='sigmoid'))
    return model


def working_model(name, image_size,EPOCHS=50,pretrain=False,save=True):
    if name=='mymodel':
        eff_model=mymodel(image_size)
    elif name=='vgg19':
        base_model = VGG19(weights='imagenet', include_top=False, input_shape=(image_size,image_size, 3))
    elif name=='dense121':
        base_model = DenseNet121(weights='imagenet', include_top=False, input_shape=(image_size,image_size, 3))
    elif name=='dense169':
        base_model = DenseNet169(weights='imagenet', include_top=False, input_shape=(image_size,image_size, 3))
    elif name=='dense201':
        base_model = DenseNet201(weights='imagenet', include_top=False, input_shape=(image_size,image_size, 3))
    elif name=='res152':
        base_model = ResNet152V2(weights='imagenet', include_top=False, input_shape=(image_size,image_size, 3))
    elif name=='effNetB2':
        base_model = EfficientNetB2(weights='imagenet', include_top=False, input_shape=(image_size,image_size, 3))
    else:
        raise Exception("Not Listed")
    
    if pretrain:
        base_model.trainable=False
        eff_model = base_model.output
        eff_model = GlobalAveragePooling2D()(eff_model)
        eff_model = Dense(512, activation='relu')(eff_model)
        eff_model = Dense(256, activation='relu')(eff_model)
        eff_model = Dense(128, activation='relu')(eff_model)
        eff_model = Dense(64, activation='relu')(eff_model)
        # eff_model = Dropout(rate=0.25)(eff_model)
        eff_model = Dense(5, activation='sigmoid')(eff_model)
        eff_model = Model(inputs = base_model.input, outputs=eff_model)

        eff_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=METRICS )
    else:
        eff_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=METRICS )
    
    if save:
        mc = ModelCheckpoint(filepath=f"_{name}_modelcervical_cancer_best.hdf5", monitor= 'val_accuracy', verbose= 1, save_best_only= True, mode = 'auto');
        call_back = [ mc ];
        eff_cnn = eff_model.fit(train_data, 
        #                   steps_per_epoch= 28, 
                          epochs= EPOCHS,
                          validation_data= val_data, 
                          validation_steps= 6,
                          callbacks = call_back )
    else:
        eff_cnn = eff_model.fit(train_data, 
        #                   steps_per_epoch= 28, 
                          epochs= EPOCHS,
                          validation_data= test_data, 
                          validation_steps= 6,
                           )

    # Checking the Accuracy of the Model 
    print('-'*20,name,'-'*20)
    accuracy = eff_model.evaluate_generator(generator= val_data)[1] 
    print(f"The accuracy of {name} is = {accuracy*100} %")
#     h =  eff_cnn.history
    # Ploting Accuracy In Training Set & Validation Set
    print('-'*20)
#     classification Report
    from sklearn.metrics import classification_report
    
    
    print('-'*20,name,'-'*20)

    plt.plot(eff_cnn.history['accuracy'])
    plt.plot(eff_cnn.history['val_accuracy'] , c = "red")
    plt.legend(['Train','Val'])
    plt.title("acc vs v-acc")
    plt.savefig(f"{name}_acc_val_acc.jpeg",dpi=300)
    plt.show()

    # Ploting Loss In Training Set & Validation Set

    plt.plot(eff_cnn.history['loss'])
    plt.plot(eff_cnn.history['val_loss'] , c = "red")
    plt.title("loss vs v-loss")
    plt.legend(['Train','Val'])
    plt.savefig(f"{name}_acc_val_loss.jpeg",dpi=300)
    plt.show()
    
    print('-'*20,name,'-'*20)
    
#     confusion Matrix
    import seaborn as sns 
    
    matrix = confusion_matrix(
    test_data.classes,
        np.argmax(eff_model.predict(test_data),axis=1)

    )
    plt.figure(figsize=(16,10))
    ax = plt.subplot()
    sns.heatmap(matrix,annot=True,ax=ax)

    ax.set_xlabel('Predicted Labels',size=20)
    ax.set_ylabel('True Labels',size=20)
    ax.set_title('Cervical Classification',size=20)
    ax.xaxis.set_ticklabels(test_data.class_indices)
    ax.yaxis.set_ticklabels(test_data.class_indices)
    plt.savefig(f"{name}_confusion_matrix.jpeg",dpi=300)
    
    plt.show()
    
    print('-'*20,name,'-'*20)
    colors=['b','r']
    def plot_metrics(H,model):
        plt.figure(figsize =(15,10))

        metrics =  [ 'auc', 'precision', 'recall']
        for n, metric in enumerate(metrics):

            name = metric.replace("_"," ").capitalize()
            plt.subplot(3,3,n+1)
            plt.plot(H.epoch,  H.history[metric], color=colors[0], label='Train')
            plt.plot(H.epoch, H.history['val_'+metric],
                     color=colors[1], linestyle="--", label='Val')
            plt.xlabel('Epoch')
            plt.ylabel(name)
            if metric == 'loss':
                plt.ylim([0, plt.ylim()[1]])
            elif metric == 'auc':
                plt.ylim([0.8,1])
            else:
                plt.ylim([0,1])

            plt.legend()
            plt.savefig(f"{name}_of_{model}.jpeg",dpi=300)
    plot_metrics(eff_cnn,name)
    
    print('-'*20,name,'-'*20)
    
    def calculation_parameters(name,history):
        def calculate_matrix(tp,fp,tn,fn):
            accuracy= (tp+tn)/(tp+tn+fp+fn)
            precision = tp/(tp+fp)
            recall = tp/(tp+fn)
            specificity= tn/(tn+fp)
            f1_score = 2*((precision*recall)/(precision+recall))
            print(f"Accuracy is {accuracy}\nPrecision is {precision*100}\nRecall is {recall*100}\nSpecificity is {specificity*100}\nF1_score is {f1_score*100}")

        #training time
        print('-'*20,'Train Section of ',name,'-'*20)
        avg_tp=np.sum(history.history['tp'])/len(history.history['tp'])
        avg_fp=np.sum(history.history['fp'])/len(history.history['fp'])
        avg_tn=np.sum(history.history['tn'])/len(history.history['tn'])
        avg_fn=np.sum(history.history['fn'])/len(history.history['fn'])
        avg_precision=np.sum(history.history['precision'])/len(history.history['precision'])
        avg_recall=np.sum(history.history['recall'])/len(history.history['recall'])
        print("Average True Positive : {}".format(avg_tp))
        print("Average False Positive : {}".format(avg_fp))
        print("Average True Negative : {}".format(avg_tn))
        print("Average False Negative : {}".format(avg_fn))
#         print("Average Precision : {}".format(avg_precision))
#         print("Average Recall : {}".format(avg_recall))
        calculate_matrix(avg_tp,avg_fp,avg_tn,avg_fn)
        # testing time
        print('-'*20,'Test Section of ',name,'-'*20)
        avg_tp=np.sum(history.history['val_tp'])/len(history.history['val_tp'])
        avg_fp=np.sum(history.history['val_fp'])/len(history.history['val_fp'])
        avg_tn=np.sum(history.history['val_tn'])/len(history.history['val_tn'])
        avg_fn=np.sum(history.history['val_fn'])/len(history.history['val_fn'])
        avg_precision=np.sum(history.history['val_precision'])/len(history.history['val_precision'])
        avg_recall=np.sum(history.history['val_recall'])/len(history.history['val_recall'])
        print("Average True Positive : {}".format(avg_tp))
        print("Average False Positive : {}".format(avg_fp))
        print("Average True Negative : {}".format(avg_tn))
        print("Average False Negative : {}".format(avg_fn))
#         print("Average Precision : {}".format(avg_precision))
#         print("Average Recall : {}".format(avg_recall))


        calculate_matrix(avg_tp,avg_fp,avg_tn,avg_fn)
    calculation_parameters(name,eff_cnn)
#     print(f"___[INFO] evaluating network of {name}____")
#     predIdxs = np.argmax(eff_model.predict(test_data),axis=1)
#     print(classification_report(test_data.classes, predIdxs,
#                                 target_names=['Dyskeratotic','Koilocytotic',
#                                               'Metaplastic','Parabasal',
#                                               'Superficial-Intermediate']))
    
#     return (eff_model,eff_cnn)

In [None]:
# pretrained_model_list = ['vgg19','dense121','dense169','dense201','res152','effNetB2'
# ]
pretrained_model_list = ['dense121','dense169','dense201']
# working_model('mymodel',image_size=64,EPOCHS=50,pretrain=False,save=True)
for model_name in pretrained_model_list:
    working_model(model_name,image_size=64,EPOCHS=75,pretrain=True,save=True)
#     model,history = working_model(model_name,image_size=64,EPOCHS=10,pretrain=True,save=True)
    

In [None]:
model,history=working_model('mymodel',image_size=64,EPOCHS=50,pretrain=False,save=False)

In [None]:
def calculate_matrix(tp,fp,tn,fn):
    precision = tp/(tp+fp)
    recall = tp/(tp+fn)
    specificity= tn/(tn+fp)
    f1_score = 2*((precision*recall)/(precision+recall))
    print(f"Precision is {precision*100}\nRecall is {recall*100}\nSpecificity is {specificity*100}\nF1_score is {f1_score*100}")
    
#training time
print('-'*20,'Train Section','-'*20)
avg_tp=np.sum(history.history['tp'])/len(history.history['tp'])
avg_fp=np.sum(history.history['fp'])/len(history.history['fp'])
avg_tn=np.sum(history.history['tn'])/len(history.history['tn'])
avg_fn=np.sum(history.history['fn'])/len(history.history['fn'])
avg_precision=np.sum(history.history['precision'])/len(history.history['precision'])
avg_recall=np.sum(history.history['recall'])/len(history.history['recall'])
print("Average True Positive : {}".format(avg_tp))
print("Average False Positive : {}".format(avg_fp))
print("Average True Negative : {}".format(avg_tn))
print("Average False Negative : {}".format(avg_fn))
print("Average Precision : {}".format(avg_precision))
print("Average Recall : {}".format(avg_recall))
calculate_matrix(avg_tp,avg_fp,avg_tn,avg_fn)
# testing time
print('-'*20,'Test Section','-'*20)
avg_tp=np.sum(history.history['val_tp'])/len(history.history['val_tp'])
avg_fp=np.sum(history.history['val_fp'])/len(history.history['val_fp'])
avg_tn=np.sum(history.history['val_tn'])/len(history.history['val_tn'])
avg_fn=np.sum(history.history['val_fn'])/len(history.history['val_fn'])
avg_precision=np.sum(history.history['val_precision'])/len(history.history['val_precision'])
avg_recall=np.sum(history.history['val_recall'])/len(history.history['val_recall'])
print("Average True Positive : {}".format(avg_tp))
print("Average False Positive : {}".format(avg_fp))
print("Average True Negative : {}".format(avg_tn))
print("Average False Negative : {}".format(avg_fn))
print("Average Precision : {}".format(avg_precision))
print("Average Recall : {}".format(avg_recall))


calculate_matrix(avg_tp,avg_fp,avg_tn,avg_fn)

In [None]:
history.history

In [None]:
epochs=[i+1 for i in range(50)]
plt.plot(epochs,history.history['tp'])
plt.plot(epochs,history.history['val_tp'])
plt.plot(epochs,history.history['tn'])
plt.plot(epochs,history.history['val_tn'])
plt.plot(epochs,history.history['fp'])
plt.plot(epochs,history.history['val_fp'])
plt.plot(epochs,history.history['fn'])
plt.plot(epochs,history.history['val_fn'])
plt.plot(epochs,history.history['precision'])
plt.plot(epochs,history.history['val_precision'])

plt.legend()
plt.show()

In [None]:
## AUC CURVE
colors=['b','r']
def plot_metrics(H):
    plt.figure(figsize =(15,10))
    
    metrics =  [ 'auc', 'precision', 'recall']
    for n, metric in enumerate(metrics):
        
        name = metric.replace("_"," ").capitalize()
        plt.subplot(3,3,n+1)
        plt.plot(H.epoch,  H.history[metric], color=colors[0], label='Train')
        plt.plot(H.epoch, H.history['val_'+metric],
                 color=colors[1], linestyle="--", label='Val')
        plt.xlabel('Epoch')
        plt.ylabel(name)
        if metric == 'loss':
            plt.ylim([0, plt.ylim()[1]])
        elif metric == 'auc':
            plt.ylim([0.8,1])
        else:
            plt.ylim([0,1])

        plt.legend()
plot_metrics(history)

In [None]:
"""
from sklearn.metrics import confusion_matrix
confusion_matrix(
test_data.classes,
  np.argmax(model.predict(test_data),axis=1),
#     num_classes=len(test_data.class_indices)
)
"""

import seaborn as sns 
# plt.style.use('fivethirtyeight')
matrix = confusion_matrix(
test_data.classes,
    np.argmax(model.predict(test_data),axis=1)

)
plt.figure(figsize=(16,10))
ax = plt.subplot()
sns.heatmap(matrix,annot=True,ax=ax)

ax.set_xlabel('Predicted Labels',size=20)
ax.set_ylabel('True Labels',size=20)
ax.set_title('Cervical Cancer Classification',size=20)
ax.xaxis.set_ticklabels(test_data.class_indices)
ax.yaxis.set_ticklabels(test_data.class_indices)
# plt.savefig("Vgg16_CHest_CM_DataGenerator.jpeg")
# plt.savefig("Vgg16_CHest_CM_DataGenerator.svg")
plt.show()

In [None]:
print("[INFO] evaluating network...")
predIdxs = np.argmax(model.predict(test_data),axis=1)
print(classification_report(test_data.classes, predIdxs, target_names=test_data.classes))

In [None]:
preds=model.predict(val_data)
preds_cls_idx = preds.argmax(axis=-1)
idx_to_cls = {v: k for k, v in val_data.class_indices.items()}
preds_cls = np.vectorize(idx_to_cls.get)(preds_cls_idx)
filenames_to_cls = list(zip(test_data.filenames, preds_cls))