In [1]:
import warnings
warnings.filterwarnings("ignore")

import tensorflow as tf
import os
import numpy as np
import random
from tqdm.auto import tqdm
from skimage.io import imread, imshow
from skimage.transform import resize
import matplotlib.pyplot as plt
from glob import glob
import cv2
from skimage.io import imread, imshow, show

from tensorflow import keras
from keras import layers
from keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications.resnet50 import ResNet50, preprocess_input

seed = 42
np.random.seed = seed  # To get same random seed everytime we run the whole thing.
tf.random.set_seed(42)
random.seed(seed)

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
!pip install -U tensorflow-addons
import tensorflow_addons as tfa

Collecting tensorflow-addons
  Downloading tensorflow_addons-0.21.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (612 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m612.1/612.1 kB[0m [31m6.6 MB/s[0m eta [36m0:00:00[0m
Collecting typeguard<3.0.0,>=2.7 (from tensorflow-addons)
  Downloading typeguard-2.13.3-py3-none-any.whl (17 kB)
Installing collected packages: typeguard, tensorflow-addons
Successfully installed tensorflow-addons-0.21.0 typeguard-2.13.3


In [4]:
H1_path = "/content/drive/MyDrive/ICCIT 2023/Selected_data/H1/*"
H2_path = "/content/drive/MyDrive/ICCIT 2023/Selected_data/H2/*"
H3_path = "/content/drive/MyDrive/ICCIT 2023/Selected_data/H3/*"
H5_path = "/content/drive/MyDrive/ICCIT 2023/Selected_data/H5/*"
H6_path = "/content/drive/MyDrive/ICCIT 2023/Selected_data/H6/*"

In [5]:
def sorted_file_paths(path):
    file_paths = sorted(glob(path))
    print("Total image files:")
    print(f"{len(file_paths)} \n")
    return file_paths

H1_700_paths = sorted_file_paths(H1_path)
H2_700_paths = sorted_file_paths(H2_path)
H3_700_paths = sorted_file_paths(H3_path)
H5_700_paths = sorted_file_paths(H5_path)
H6_700_paths = sorted_file_paths(H6_path)

Total image files:
700 

Total image files:
700 

Total image files:
700 

Total image files:
700 

Total image files:
700 



In [6]:
print(len(H1_700_paths),len(H2_700_paths),len(H3_700_paths),len(H5_700_paths),len(H6_700_paths) )

all_3500_paths = H1_700_paths + H2_700_paths + H3_700_paths + H5_700_paths + H6_700_paths

print(type(all_3500_paths))
print(len(all_3500_paths))
print(all_3500_paths[50])
print(all_3500_paths[750])
print(all_3500_paths[3400])

700 700 700 700 700
<class 'list'>
3500
/content/drive/MyDrive/ICCIT 2023/Selected_data/H1/H1_105d_5.jpg
/content/drive/MyDrive/ICCIT 2023/Selected_data/H2/H2_122e_4.jpg
/content/drive/MyDrive/ICCIT 2023/Selected_data/H6/H6_70a_6.jpg


In [8]:
def image_preprocess(paths):
    total = len(paths)
    im_array = np.zeros((total, 256, 256, 3), dtype = np.float32)
    for i in tqdm(range(total)):
        img = imread(paths[i])
        # print(image.shape)
        img = cv2.resize(img, (256, 256))
        # img = img / 255.0
        im_array[i, :, :, :] = img
    return im_array

# [0, 255] and (128, 128, 3) shaped float32 images.
X = image_preprocess(all_3500_paths)
print(X.shape)
print(X.dtype)

  0%|          | 0/3500 [00:00<?, ?it/s]

(3500, 256, 256, 3)
float32


In [13]:
labels = ['H1']*700 + ['H2']*700 + ['H3']*700 + ['H5']*700 + ['H6']*700
print(type(labels))
print(len(labels))
print(labels[3300])
print(labels)


<class 'list'>
3500
H6
['H1', 'H1', 'H1', 'H1', 'H1', 'H1', 'H1', 'H1', 'H1', 'H1', 'H1', 'H1', 'H1', 'H1', 'H1', 'H1', 'H1', 'H1', 'H1', 'H1', 'H1', 'H1', 'H1', 'H1', 'H1', 'H1', 'H1', 'H1', 'H1', 'H1', 'H1', 'H1', 'H1', 'H1', 'H1', 'H1', 'H1', 'H1', 'H1', 'H1', 'H1', 'H1', 'H1', 'H1', 'H1', 'H1', 'H1', 'H1', 'H1', 'H1', 'H1', 'H1', 'H1', 'H1', 'H1', 'H1', 'H1', 'H1', 'H1', 'H1', 'H1', 'H1', 'H1', 'H1', 'H1', 'H1', 'H1', 'H1', 'H1', 'H1', 'H1', 'H1', 'H1', 'H1', 'H1', 'H1', 'H1', 'H1', 'H1', 'H1', 'H1', 'H1', 'H1', 'H1', 'H1', 'H1', 'H1', 'H1', 'H1', 'H1', 'H1', 'H1', 'H1', 'H1', 'H1', 'H1', 'H1', 'H1', 'H1', 'H1', 'H1', 'H1', 'H1', 'H1', 'H1', 'H1', 'H1', 'H1', 'H1', 'H1', 'H1', 'H1', 'H1', 'H1', 'H1', 'H1', 'H1', 'H1', 'H1', 'H1', 'H1', 'H1', 'H1', 'H1', 'H1', 'H1', 'H1', 'H1', 'H1', 'H1', 'H1', 'H1', 'H1', 'H1', 'H1', 'H1', 'H1', 'H1', 'H1', 'H1', 'H1', 'H1', 'H1', 'H1', 'H1', 'H1', 'H1', 'H1', 'H1', 'H1', 'H1', 'H1', 'H1', 'H1', 'H1', 'H1', 'H1', 'H1', 'H1', 'H1', 'H1', 'H1', 'H1'

In [14]:
from sklearn.preprocessing import LabelEncoder
from tensorflow import keras

label_enconder = LabelEncoder()
y = label_enconder.fit_transform(labels)
print(y.shape)
print(y)
print('\n')


classes = list(label_enconder.classes_)
print(classes)
print('\n')

(3500,)
[0 0 0 ... 4 4 4]


['H1', 'H2', 'H3', 'H5', 'H6']




In [15]:
X = np.array(X, dtype=np.float32)
y = np.array(y, dtype=np.float32)
print(X.shape, y.shape)
print(X.dtype, y.dtype)
# All are 4D and 1D array.

(3500, 256, 256, 3) (3500,)
float32 float32


## Model Building

In [16]:
image_size = 256
no_class = 5
batch_size = 32
learning_rate = 0.001
num_epochs = 100
weight_decay = 0.0001

In [17]:

def creat_model(layer):

    input = keras.Input(shape=(image_size, image_size, 3))
    x = layer(input)

    base_model = tf.keras.applications.vgg16.VGG16(
                                                include_top=False,
                                                weights='imagenet',
                                                input_shape=(image_size, image_size, 3),
                                                pooling=None)

    base_model.trainable = False

    x = base_model(x, training=False)

    x = layers.GlobalAveragePooling2D()(x)
    x = layers.Dropout(0.2)(x)
    x = tf.keras.layers.Dense(1024,activation='relu')(x)
    x = tf.keras.layers.Dropout(0.6)(x)
    x = tf.keras.layers.Dense(512,activation='relu')(x)
    x = tf.keras.layers.Dropout(0.5)(x)
    output = tf.keras.layers.Dense(no_class, activation="softmax")(x)

    model = keras.Model(inputs = input, outputs = output)

    return model


data_augmentation = keras.Sequential([layers.RandomFlip("horizontal_and_vertical"),
                                          layers.RandomRotation(factor=0.02)],
                                          name="data_augmentation")
model = creat_model(data_augmentation)

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/vgg16/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5


In [18]:
print(model.summary())

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 256, 256, 3)]     0         
                                                                 
 data_augmentation (Sequent  (None, 256, 256, 3)       0         
 ial)                                                            
                                                                 
 vgg16 (Functional)          (None, 8, 8, 512)         14714688  
                                                                 
 global_average_pooling2d (  (None, 512)               0         
 GlobalAveragePooling2D)                                         
                                                                 
 dropout (Dropout)           (None, 512)               0         
                                                                 
 dense (Dense)               (None, 1024)              525312

## Training

In [None]:
from sklearn.model_selection import StratifiedKFold
cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

fold_no = 1
acc_per_fold = [] #save accuracy from each fold
history_all = []

for train, test in cv.split(X, y):

    print('   ')
    print(f'Training for fold {fold_no} ...', '\n')

    x_train = X[train]
    x_test = X[test]
    y_train = y[train]
    y_test = y[test]

    y_train = tf.keras.utils.to_categorical(y_train, num_classes=5)
    y_test = tf.keras.utils.to_categorical(y_test, num_classes=5)

    data_augmentation = keras.Sequential([layers.RandomFlip("horizontal_and_vertical"),
                                          layers.RandomRotation(factor=0.02)],
                                          name="data_augmentation")

    model = creat_model(data_augmentation)

    optimizer = tfa.optimizers.AdamW(learning_rate=learning_rate,
                                     weight_decay=weight_decay)

    model.compile(optimizer=optimizer,
                  loss=tf.keras.losses.CategoricalCrossentropy(from_logits=True),
                  metrics=[tf.keras.metrics.CategoricalAccuracy(name="accuracy")])

    history = model.fit(
        x=x_train,
        y=y_train,
        batch_size=batch_size,
        epochs=num_epochs,
        validation_data = (x_test, y_test))

    _, accuracy = model.evaluate(x_test, y_test)
    print(f"Test accuracy: {round(accuracy * 100, 2)}%", '\n')

    acc_per_fold.append(accuracy*100)
    history_all.append(history)

    fold_no = fold_no + 1


   
Training for fold 1 ... 

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100

In [None]:
print(acc_per_fold, '\n')

mean = np.mean(np.array(acc_per_fold))
std = np.std(np.array(acc_per_fold))

print(f"Overall Accuracy is : {mean}+-{std}")