In [None]:
pip install tensorflow==2.7.4

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting tensorflow==2.7.4
  Downloading tensorflow-2.7.4-cp39-cp39-manylinux2010_x86_64.whl (496.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m496.1/496.1 MB[0m [31m2.2 MB/s[0m eta [36m0:00:00[0m
Collecting keras<2.8,>=2.7.0rc0
  Downloading keras-2.7.0-py2.py3-none-any.whl (1.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31m83.2 MB/s[0m eta [36m0:00:00[0m
Collecting protobuf<3.20,>=3.9.2
  Downloading protobuf-3.19.6-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.1/1.1 MB[0m [31m81.0 MB/s[0m eta [36m0:00:00[0m
Collecting flatbuffers<3.0,>=1.12
  Downloading flatbuffers-2.0.7-py2.py3-none-any.whl (26 kB)
Collecting keras-preprocessing>=1.1.1
  Downloading Keras_Preprocessing-1.1.2-py2.py3-none-any.whl (42 kB)
[2K     [90m━━━

In [24]:
import tensorflow as tf
from tensorflow.keras.models import Model
import numpy as np
import scipy
import os
import pandas as pd
from sklearn.utils import shuffle
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import cv2
from sklearn.metrics import confusion_matrix, accuracy_score
from sklearn.metrics import precision_score, recall_score, f1_score
from sklearn.model_selection import KFold
import argparse
from torch import optim, cuda

In [25]:
from google.colab import drive
drive.mount('/content/drive')

# Whether to train on a gpu
train_on_gpu = cuda.is_available()
print(f'Train on gpu: {train_on_gpu}')
multi_gpu = False
# Number of gpus
if train_on_gpu:
    gpu_count = cuda.device_count()
    print(f'{gpu_count} gpus detected.')
    if gpu_count > 1:
        multi_gpu = True
    else:
        multi_gpu = False

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Train on gpu: True
1 gpus detected.


In [33]:
def generate_csv(path):
    print("CSV being generated")
    uniques = ["Dyskeratotic", "Koilocytotic", "Metaplastic", "Parabasal", "SuperficialIntermediate"]
    dirs = ["train", "test"]

    """
            +-- train
            |   +-- Dyskeratotic
            |   +-- Koilocytotic
            |   +-- Metaplastic
            |   +-- Parabasal
            |   +-- SuperficialIntermediate

            +-- test
            |   +-- Dyskeratotic
            |   +-- Koilocytotic
            |   +-- Metaplastic
            |   +-- Parabasal
            |   +-- SuperficialIntermediate

    
    """
    # Above is the expected directory structure

    data = []
    for d in dirs:
        for unique in uniques:
            directory = path + "/" + d + "/" + unique  # required path

            for filename in os.listdir(directory):
                paths = directory + "/" + filename  # required path
                data.append([filename, paths, unique])

    df = pd.DataFrame(data, columns=["filename", "path", "class"])
    df = shuffle(df)
    name = "/content/drive/MyDrive/Colab Notebooks/CSC413/final/csv_files/" + "Data-full"  # required path
    df.to_csv(name, index=False)
    print("Generation Complete")
    return df

In [34]:
def k_fold_splits(x, y, files_for_train_x, files_for_validation_x, files_for_train_y, files_for_validation_y,
                  n_splits=5):
    kf = KFold(n_splits=n_splits)
    # https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.KFold.html
    # fold_no = 1
    for train_index, val_index in kf.split(x):
        # it will split the entire data into 5 folds
        x_train, x_val = x[train_index], x[val_index]
        y_train, y_val = y[train_index], y[val_index]

        # split the into 5 folds

        files_for_train_x.append(x_train)
        files_for_validation_x.append(x_val)
        files_for_train_y.append(y_train)
        files_for_validation_y.append(y_val)
        # fold_no += 1

In [35]:
def create_model(model_name, IMG_SIZE=256, output=5):
    IMG_SHAPE = (IMG_SIZE, IMG_SIZE, 3)  # IMG_SIZE = 256
    if (model_name == "MobileNetV2"):

        model = tf.keras.applications.MobileNetV2(input_shape=IMG_SHAPE,
                                                  include_top=False,
                                                  weights='imagenet')
    elif (model_name == "InceptionV3"):
        model = tf.keras.applications.inception_v3.InceptionV3(input_shape=IMG_SHAPE,
                                                               include_top=False,
                                                               weights='imagenet')

    elif (model_name == "InceptionResNetV2"):
        model = tf.keras.applications.inception_resnet_v2.InceptionResNetV2(input_shape=IMG_SHAPE,
                                                                            include_top=False,
                                                                            weights='imagenet')
    else:
        return

    x = tf.keras.layers.Conv2D(128, (3, 3), activation='relu')(model.output)
    x = tf.keras.layers.MaxPooling2D(pool_size=(2, 2))(x)
    x = tf.keras.layers.Flatten()(x)
    x = tf.keras.layers.Dense(100, activation='relu')(x)
    x = tf.keras.layers.Dense(output, activation='softmax')(x)

    model = Model(inputs=model.input, outputs=x)

    my_model = tf.keras.models.clone_model(model)
    return my_model

In [45]:
def fuzzy_dist(classifier1, classifier2, classifier3, verbose=True):
    out = np.empty(len(classifier1))
    for i in range(len(classifier1)):
        if np.argmax(classifier1[i]) == np.argmax(classifier2[i]) == np.argmax(classifier3[i]):
            out[i] = np.argmax(classifier2[i])
        else:
            measure = np.zeros(len(classifier1[i]))
            for j in range(len(classifier1[i])):
                scores = np.array(
                    [classifier1[i, j], classifier2[i, j], classifier3[i, j]])
                measure[j] = scipy.spatial.distance.cosine(np.ones(3), scores) * scipy.spatial.distance.euclidean(
                    np.ones(3), scores) * scipy.spatial.distance.cityblock(np.ones(3), scores)
                if verbose:
                    print(measure)
            out[i] = np.argmin(measure)
    return out

In [46]:
# "Dyskeratotic" , "Koilocytotic" , "Metaplastic" , "Parabasal" , "SuperficialIntermediate"
# this function will encode the labels
# for five classes
def encode_y(y):
    Y = []
    for i in y:
        if i == "Dyskeratotic":
            Y.append(0)
        elif i == "Koilocytotic":
            Y.append(1)
        if i == "Metaplastic":
            Y.append(2)
        if i == "Parabasal":
            Y.append(3)
        if i == "SuperficialIntermediate":
            Y.append(4)

    return np.array(Y).astype("float32")


# convert file paths info nums 
# then normalize
def process_x(x):
    lis = []
    for i in x:
        img = cv2.imread(i)
        resize = cv2.resize(img, (256, 256))
        lis.append(resize)
    return np.array(lis).astype("float32") / 255.0



def k_fold_separate(x_train, y_train, x_val, y_val, model_name1, model_name2, model_name3, fold_no, NUM_EPOCHS=70,
                    train_batch=16, validation_batch=16, lr=1e-4):
    train_datagen = ImageDataGenerator(rotation_range=40,
                                       width_shift_range=0.2,
                                       height_shift_range=0.2,
                                       shear_range=0.2,
                                       zoom_range=0.2,
                                       horizontal_flip=True)

    val_datagen = ImageDataGenerator()

    x_train, y_train, x_val, y_val = process_x(x_train), encode_y(y_train), process_x(x_val), encode_y(y_val)

    train = train_datagen.flow(x_train, y_train, batch_size=train_batch)
    validation = val_datagen.flow(x_val, y_val,
                                  batch_size=validation_batch)
    test = x_val
    print('------------------------------------------------------------------------')
    print()
    print("fold no --- ", fold_no)
    print()
    print('------------------------------------------------------------------------')

    y_preds = []
    print()
    print(model_name1)
    print()

    model1 = create_model(model_name1)

    # Compile the model
    model1.compile(loss='sparse_categorical_crossentropy',
                   optimizer=tf.keras.optimizers.Adam(learning_rate=lr, beta_1=0.9, beta_2=0.999, decay=0.0001),
                   metrics=['accuracy'])

    # Generate a print

    # Fit data to model
    history1 = model1.fit(x=train,
                          validation_data=validation,
                          epochs=NUM_EPOCHS

                          )

    # model save..
    model_saved_name = model_name1 + "_weights"+ "_" + str(fold_no) + ".h5"

    model1.save_weights("require path" + model_saved_name)

    hist_df = pd.DataFrame(history1.history) 
    hist_csv_file =  "history_" + model_name1 + "_weights" + "_" + str(fold_no) + ".csv"
    filepath = "require path" + hist_csv_file 
    with open(filepath, mode='w') as f:
        hist_df.to_csv(f)

    print(f'{model_saved_name} saved')
    print(f'{hist_csv_file} saved')

    # Generate generalization metrics
    scores = model1.evaluate(validation)
    print(
        f'Score for fold {fold_no}: {model1.metrics_names[0]} of {scores[0]}; {model1.metrics_names[1]} of {scores[1] * 100}%')
    # predictions = model.predict()
    preds1 = model1.predict(test, batch_size=validation_batch)
    for pred in preds1:
        y_preds.append(np.argmax(pred))
    print('Accuracy Score: ', accuracy_score(y_val, y_preds))
    n = len(precision_score(y_val, y_preds, average=None))
    print('Precision Score(Class wise): ', precision_score(y_val, y_preds, average=None), " mean- ",
          sum(precision_score(y_val, y_preds, average=None)) / n)
    print('Recall Score(Class wise): ', recall_score(y_val, y_preds, average=None), " mean- ",
          sum(recall_score(y_val, y_preds, average=None)) / n)
    print('F1 Score(Class wise): ', f1_score(y_val, y_preds, average=None), " mean- ",
          sum(f1_score(y_val, y_preds, average=None)) / n)
    print('Conf Matrix Score(Class wise):\n ', confusion_matrix(y_val, y_preds))

    y_preds = []
    print()
    print(model_name2)
    print()

    model2 = create_model(model_name2)

    # Compile the model
    model2.compile(loss='sparse_categorical_crossentropy',
                   optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001, beta_1=0.9, beta_2=0.999, decay=0.0001),
                   metrics=['accuracy'])

    # Generate a print

    # Fit data to model
    history2 = model2.fit(x=train,
                          validation_data=validation,
                          epochs=NUM_EPOCHS

                          )

    # model save..
    model_saved_name = model_name2 + "_weights" + "_" + str(fold_no) + ".h5"

    model2.save("require path" + model_saved_name)

    hist_df = pd.DataFrame(history2.history) 
    hist_csv_file =  "history_" + model_name2 + "_weights" + "_" + str(fold_no) + ".csv"
    filepath = "require path" + hist_csv_file 
    with open(filepath, mode='w') as f:
        hist_df.to_csv(f)

    print(f'{model_saved_name} saved')
    print(f'{hist_csv_file} saved')

    # Generate generalization metrics
    scores = model2.evaluate(validation)
    print(
        f'Score for fold {fold_no}: {model2.metrics_names[0]} of {scores[0]}; {model2.metrics_names[1]} of {scores[1] * 100}%')
    # predictions = model.predict()
    preds2 = model2.predict(test, batch_size=validation_batch)
    for pred in preds2:
        y_preds.append(np.argmax(pred))

    print('Accuracy Score: ', accuracy_score(y_val, y_preds))

    print('Precision Score(Class wise): ', precision_score(y_val, y_preds, average=None), " mean- ",
          sum(precision_score(y_val, y_preds, average=None)) / n)
    print('Recall Score(Class wise): ', recall_score(y_val, y_preds, average=None), " mean- ",
          sum(recall_score(y_val, y_preds, average=None)) / n)
    print('F1 Score(Class wise): ', f1_score(y_val, y_preds, average=None), " mean- ",
          sum(f1_score(y_val, y_preds, average=None)) / n)
    print('Conf Matrix Score(Class wise):\n ', confusion_matrix(y_val, y_preds))

    y_preds = []
    print()
    print(model_name3)
    print()

    model3 = create_model(model_name3)

    # Compile the model
    model3.compile(loss='sparse_categorical_crossentropy',
                   optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001, beta_1=0.9, beta_2=0.999, decay=0.0001),
                   metrics=['accuracy'])

    # Generate a print

    # Fit data to model
    history3 = model3.fit(x=train,
                          validation_data=validation,
                          epochs=NUM_EPOCHS

                          )

    # model save..
    model_saved_name = model_name3 + "_weights" + "_" + str(fold_no) + ".h5"

    model3.save("require path" + model_saved_name)

    hist_df = pd.DataFrame(history2.history)
    hist_csv_file =  "history_" + model_name3 + "_weights" + "_" + str(fold_no) + ".csv"
    filepath = "require path" + hist_csv_file 
    with open(filepath, mode='w') as f:
        hist_df.to_csv(f)

    print(f'{model_saved_name} saved')
    print(f'{hist_csv_file} saved')

    # Generate generalization metrics
    scores = model3.evaluate(validation)
    print(
        f'Score for fold {fold_no}: {model3.metrics_names[0]} of {scores[0]}; {model3.metrics_names[1]} of {scores[1] * 100}%')
    # predictions = model.predict()
    preds3 = model3.predict(test, batch_size=validation_batch)
    for pred in preds3:
        y_preds.append(np.argmax(pred))

    print('Accuracy Score: ', accuracy_score(y_val, y_preds))

    print('Precision Score(Class wise): ', precision_score(y_val, y_preds, average=None), " mean- ",
          sum(precision_score(y_val, y_preds, average=None)) / n)
    print('Recall Score(Class wise): ', recall_score(y_val, y_preds, average=None), " mean- ",
          sum(recall_score(y_val, y_preds, average=None)) / n)
    print('F1 Score(Class wise): ', f1_score(y_val, y_preds, average=None), " mean- ",
          sum(f1_score(y_val, y_preds, average=None)) / n)
    print('Conf Matrix Score(Class wise):\n ', confusion_matrix(y_val, y_preds))

    ensem_pred = fuzzy_dist(preds1, preds2, preds3)
    print('Post Ensemble Accuracy Score: ', accuracy_score(y_val, ensem_pred))

    print('Post Ensemble Precision Score(Class wise): ', precision_score(y_val, ensem_pred, average=None), " mean- ",
          sum(precision_score(y_val, ensem_pred, average=None)) / n)
    print('Post Ensemble Recall Score(Class wise): ', recall_score(y_val, ensem_pred, average=None), " mean- ",
          sum(recall_score(y_val, ensem_pred, average=None)) / n)
    print('Post Ensemble F1 Score(Class wise): ', f1_score(y_val, ensem_pred, average=None), " mean- ",
          sum(f1_score(y_val, ensem_pred, average=None)) / n)
    print('Post Ensemble Conf Matrix Score(Class wise):\n ', confusion_matrix(y_val, ensem_pred))

In [None]:
df = generate_csv('/content/drive/MyDrive/Colab Notebooks/CSC413/final')

y = np.array(list(df["class"]))
x = np.array(list(df["path"]))

files_for_train_x = []
files_for_validation_x = []
files_for_train_y = []
files_for_validation_y = []

k_fold_splits(x, y, files_for_train_x, files_for_validation_x,
              files_for_train_y, files_for_validation_y, n_splits=5)  # n_splits = 5


# N is the number of folds
N = len(files_for_train_x)
for i in range(0, N):
    k_fold_separate(files_for_train_x[i], files_for_train_y[i],
                    files_for_validation_x[i], files_for_validation_y[i],
                    "InceptionV3", "MobileNetV2", "InceptionResNetV2", i + 1,
                    NUM_EPOCHS=70, train_batch=16,
                    validation_batch=16, lr=1e-4)

CSV being generated
Generation Complete
------------------------------------------------------------------------

fold no ---  1

------------------------------------------------------------------------

InceptionV3

Epoch 1/70
Epoch 2/70
Epoch 3/70
Epoch 4/70
Epoch 5/70
Epoch 6/70
Epoch 7/70
Epoch 8/70
Epoch 9/70
Epoch 10/70
Epoch 11/70
Epoch 12/70
Epoch 13/70
Epoch 14/70
Epoch 15/70
Epoch 16/70
Epoch 17/70
Epoch 18/70
Epoch 19/70
Epoch 20/70
Epoch 21/70
Epoch 22/70
Epoch 23/70
Epoch 24/70
Epoch 25/70
Epoch 26/70
Epoch 27/70
Epoch 28/70
Epoch 29/70
Epoch 30/70
Epoch 31/70
Epoch 32/70
Epoch 33/70
Epoch 34/70
Epoch 35/70
Epoch 36/70
Epoch 37/70
Epoch 38/70
Epoch 39/70
Epoch 40/70
Epoch 41/70
Epoch 42/70
Epoch 43/70
Epoch 44/70
Epoch 45/70
Epoch 46/70
Epoch 47/70
Epoch 48/70
Epoch 49/70
Epoch 50/70
Epoch 51/70
Epoch 52/70
Epoch 53/70
Epoch 54/70
Epoch 55/70
Epoch 56/70
Epoch 57/70
Epoch 58/70
Epoch 59/70
Epoch 60/70
Epoch 61/70
Epoch 62/70
Epoch 63/70
Epoch 64/70
Epoch 65/70
Epoch 66/70




Accuracy Score:  0.9469135802469136
Precision Score(Class wise):  [0.95705521 0.92810458 0.9010989  0.99310345 0.96407186]  mean-  0.9486867991099027
Recall Score(Class wise):  [0.95121951 0.84023669 0.97619048 0.99310345 0.98170732]  mean-  0.9484914880250326
F1 Score(Class wise):  [0.95412844 0.88198758 0.93714286 0.99310345 0.97280967]  mean-  0.9478343982198318
Conf Matrix Score(Class wise):
  [[156   8   0   0   0]
 [  6 142  17   0   4]
 [  1   1 164   0   2]
 [  0   0   1 144   0]
 [  0   2   0   1 161]]

MobileNetV2

Epoch 1/70
Epoch 2/70
Epoch 3/70
Epoch 4/70
Epoch 5/70
Epoch 6/70
Epoch 7/70
Epoch 8/70
Epoch 9/70
Epoch 10/70
Epoch 11/70
Epoch 12/70
Epoch 13/70
Epoch 14/70
Epoch 15/70
Epoch 16/70
Epoch 17/70
Epoch 18/70
Epoch 19/70
Epoch 20/70
Epoch 21/70
Epoch 22/70
Epoch 23/70
Epoch 24/70
Epoch 25/70
Epoch 26/70
Epoch 27/70
Epoch 28/70
Epoch 29/70
Epoch 30/70
Epoch 31/70
Epoch 32/70
Epoch 33/70
Epoch 34/70
Epoch 35/70
Epoch 36/70
Epoch 37/70
Epoch 38/70
Epoch 39/70
Epoch 40/7

  layer_config = serialize_layer_fn(layer)


MobileNetV2_weights_1.h5 saved
history_MobileNetV2_weights_1.csv saved
Score for fold 1: loss of 0.23699422180652618; accuracy of 92.34567880630493%
Accuracy Score:  0.9234567901234568
Precision Score(Class wise):  [0.92485549 0.92307692 0.86740331 0.97902098 0.93529412]  mean-  0.9259301651983136
Recall Score(Class wise):  [0.97560976 0.78106509 0.93452381 0.96551724 0.9695122 ]  mean-  0.9252456181760058
F1 Score(Class wise):  [0.9495549  0.84615385 0.89971347 0.97222222 0.95209581]  mean-  0.9239480479900891
Conf Matrix Score(Class wise):
  [[160   3   1   0   0]
 [ 13 132  18   0   6]
 [  0   5 157   3   3]
 [  0   0   3 140   2]
 [  0   3   2   0 159]]

InceptionResNetV2

Epoch 1/70
Epoch 2/70
Epoch 3/70
Epoch 4/70
Epoch 5/70
Epoch 6/70
Epoch 7/70
Epoch 8/70
Epoch 9/70
Epoch 10/70
Epoch 11/70
Epoch 12/70
Epoch 13/70
Epoch 14/70
Epoch 15/70
Epoch 16/70
Epoch 17/70
Epoch 18/70
Epoch 19/70
Epoch 20/70
Epoch 21/70
Epoch 22/70
Epoch 23/70
Epoch 24/70
Epoch 25/70
Epoch 26/70
Epoch 27/70