Final MobileNetV2 Model Training Code

In [None]:
import numpy as np
import pandas as pd
from pathlib import Path
import os
import matplotlib.pyplot as plt
from IPython.display import display, Markdown
import seaborn as sns
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow.keras.layers import Dense, BatchNormalization, Dropout
from tensorflow.keras import Model
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from sklearn.metrics import classification_report, roc_auc_score, roc_curve, auc
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Function to print with Markdown
def printmd(string):
    display(Markdown(string))



# Load and preprocess dataset
image_dir = Path(r'E:\Abroad period research\new idea implementation codes\First part of the Covid paper\Original dataset performance\Original dataset')

filepaths = list(image_dir.glob(r'**/*.png'))
labels = list(map(lambda x: os.path.split(os.path.split(x)[0])[1], filepaths))

filepaths = pd.Series(filepaths, name='Filepath').astype(str)
labels = pd.Series(labels, name='Label')

image_df = pd.concat([filepaths, labels], axis=1)
image_df = image_df.sample(frac=1).reset_index(drop=True)

# Split data into training, validation, and test sets
train_df, temp_df = train_test_split(image_df, train_size=0.7, shuffle=True, random_state=1)
val_df, test_df = train_test_split(temp_df, train_size=0.5, shuffle=True, random_state=1)

# Create data generators with augmentation
def create_gen():
    train_generator = ImageDataGenerator(
        preprocessing_function=tf.keras.applications.mobilenet_v2.preprocess_input,
        rotation_range=20,
        width_shift_range=0.2,
        height_shift_range=0.2,
        shear_range=0.2,
        zoom_range=0.2,
        horizontal_flip=True,
        fill_mode='nearest'
    )
    val_test_generator = ImageDataGenerator(
        preprocessing_function=tf.keras.applications.mobilenet_v2.preprocess_input
    )

    train_images = train_generator.flow_from_dataframe(
        dataframe=train_df,
        x_col='Filepath',
        y_col='Label',
        target_size=(224, 224),
        color_mode='rgb',
        class_mode='categorical',
        batch_size=32,
        shuffle=True,
        seed=0
    )

    val_images = val_test_generator.flow_from_dataframe(
        dataframe=val_df,
        x_col='Filepath',
        y_col='Label',
        target_size=(224, 224),
        color_mode='rgb',
        class_mode='categorical',
        batch_size=32,
        shuffle=False
    )

    test_images = val_test_generator.flow_from_dataframe(
        dataframe=test_df,
        x_col='Filepath',
        y_col='Label',
        target_size=(224, 224),
        color_mode='rgb',
        class_mode='categorical',
        batch_size=32,
        shuffle=False
    )

    return train_images, val_images, test_images

# Load pre-trained MobileNetV2 model
pretrained_model = tf.keras.applications.MobileNetV2(
    input_shape=(224, 224, 3),
    include_top=False,
    weights='imagenet',
    pooling='avg'
)

# Unfreeze some layers for fine-tuning
pretrained_model.trainable = True
for layer in pretrained_model.layers[:-50]:
    layer.trainable = False

# Create data generators
train_images, val_images, test_images = create_gen()

# Custom Model with additional layers
inputs = pretrained_model.input
x = pretrained_model.output
x = Dense(256, activation='relu')(x)
x = BatchNormalization()(x)
x = Dropout(0.5)(x)
x = Dense(128, activation='relu')(x)
x = BatchNormalization()(x)
x = Dropout(0.5)(x)
outputs = Dense(len(image_df['Label'].unique()), activation='softmax')(x)

# Compile the model using Adam optimizer
optimizer = tf.keras.optimizers.Adam(learning_rate=1e-4)
model = Model(inputs=inputs, outputs=outputs)
model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])

# Cyclical Learning Rate Callback
class CyclicLR(tf.keras.callbacks.Callback):
    def __init__(self, base_lr=1e-4, max_lr=1e-2, step_size=2000., mode='triangular'):
        super(CyclicLR, self).__init__()
        self.base_lr = base_lr
        self.max_lr = max_lr
        self.step_size = step_size
        self.mode = mode
        self.clr_iterations = 0
        self.history = {}

    def clr(self):
        cycle = np.floor(1 + self.clr_iterations / (2 * self.step_size))
        x = np.abs(self.clr_iterations / self.step_size - 2 * cycle + 1)
        lr = self.base_lr + (self.max_lr - self.base_lr) * max(0, (1 - x))
        return lr

    def on_train_begin(self, logs=None):
        logs = logs or {}
        self.clr_iterations = 0

    def on_batch_end(self, epoch, logs=None):
        logs = logs or {}
        self.clr_iterations += 1
        logs['lr'] = self.clr()
        self.history.setdefault('lr', []).append(logs['lr'])

# Callbacks
clr = CyclicLR(base_lr=1e-4, max_lr=1e-3, step_size=2000.)
callbacks = [
    EarlyStopping(monitor='val_loss', patience=15, restore_best_weights=True),
    ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=10, min_lr=1e-6),
    clr
]

# Train the model
history = model.fit(
    train_images,
    validation_data=val_images,
    batch_size=32,
    epochs=50,
    callbacks=callbacks
)

# Save the trained model
model.save('Mobilenetv2_on_Original_Dataset.h5')

# Plot accuracy and loss curves
pd.DataFrame(history.history)[['accuracy', 'val_accuracy']].plot()
plt.title("Training and Validation Accuracy")
plt.show()

pd.DataFrame(history.history)[['loss', 'val_loss']].plot()
plt.title("Training and Validation Loss")
plt.show()

# Plot Cyclical Learning Rate
plt.plot(clr.history['lr'])
plt.title('Cyclical Learning Rate Schedule')
plt.xlabel('Iterations')
plt.ylabel('Learning Rate')
plt.show()

# Evaluate the model on the test set
results = model.evaluate(test_images, verbose=0)
printmd(f"## Test Loss: {results[0]:.5f}")
printmd(f"## Accuracy on the test set: {results[1] * 100:.2f}%")




In [None]:
from sklearn.metrics import roc_curve, auc
import numpy as np
import matplotlib.pyplot as plt

# Compute AUC-ROC for each class
y_true = test_images.classes  # Actual labels
y_pred_proba = model.predict(test_images, verbose=0)  # Predicted probabilities for each class
n_classes = len(image_df['Label'].unique())

# Binarize the output labels for each class (one-vs-rest)
from sklearn.preprocessing import label_binarize
y_true_binarized = label_binarize(y_true, classes=np.arange(n_classes))

fpr = {}
tpr = {}
roc_auc = {}

for i in range(n_classes):
    fpr[i], tpr[i], _ = roc_curve(y_true_binarized[:, i], y_pred_proba[:, i])
    roc_auc[i] = auc(fpr[i], tpr[i])

# Plotting ROC Curve for each class
plt.figure(figsize=(10, 8))
for i in range(n_classes):
    plt.plot(fpr[i], tpr[i], label=f"Class {i} (area = {roc_auc[i]:.2f})")

plt.plot([0, 1], [0, 1], 'k--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel("False Positive Rate")
plt.ylabel("True Positive Rate")
plt.title("ROC Curve for each class")
plt.legend(loc="lower right")
plt.show()


In [None]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import classification_report, confusion_matrix

# Make predictions
pred = model.predict(test_images)
pred = np.argmax(pred, axis=1)

# Map the label
labels = (train_images.class_indices)  # Assuming train_images is a data generator
labels = dict((v, k) for k, v in labels.items())  # Reverse the dictionary to get label mapping
pred = [labels[k] for k in pred]

# Assuming your test_df contains true labels for comparison
y_test = list(test_df['Label'])  # Replace 'Label' with the actual column name containing true labels

# Evaluation Metrics
print(classification_report(y_test, pred))

# Confusion matrix
cf_matrix = confusion_matrix(y_test, pred)

# Create the confusion matrix plot
plt.figure(figsize=(10, 6))
sns.heatmap(cf_matrix, annot=True, fmt='d', xticklabels=sorted(set(y_test)), yticklabels=sorted(set(y_test)), cmap="Blues")
plt.title('Confusion Matrix')
plt.tight_layout()
plt.show()


 Test-Time Augmentation (TTA), a technique where multiple augmentations of the test images are made, and the modelâ€™s predictions are averaged across those augmentations to improve robustness and generalization.

In [None]:
# Function for Test-Time Augmentation (TTA)
def predict_with_tta(model, test_images, augmentations=5):
    tta_predictions = []
    
    for _ in range(augmentations):
        # Generate augmented data by resetting the iterator
        test_images.reset()
        
        # Get predictions for all test images
        preds = model.predict(test_images, verbose=0)
        tta_predictions.append(preds)
    
    # Average the predictions across augmentations
    return np.mean(tta_predictions, axis=0)

# Test-Time Augmentation (TTA) prediction
tta_preds = predict_with_tta(model, test_images, augmentations=5)
tta_class_preds = np.argmax(tta_preds, axis=1)
tta_accuracy = np.mean(tta_class_preds == test_images.classes)  # test_images.classes gives the true labels

# Print the TTA accuracy
print(f"TTA Accuracy on the test set: {tta_accuracy * 100:.2f}%")

# ----------------------------------------------
# Additional Visualizations for the Paper
# ----------------------------------------------

# 1. Confusion Matrix with TTA
from sklearn.metrics import confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt

cf_matrix = confusion_matrix(test_images.classes, tta_class_preds)

# Plot confusion matrix
plt.figure(figsize=(10, 6))
sns.heatmap(cf_matrix, annot=True, fmt='d', cmap="Blues", 
            xticklabels=test_images.class_indices, 
            yticklabels=test_images.class_indices)
plt.title('Confusion Matrix - Test-Time Augmentation (TTA)')
plt.xlabel('Predicted Label')
plt.ylabel('True Label')
plt.show()

# 2. ROC and AUC Curves for Each Class
from sklearn.metrics import roc_curve, auc
from sklearn.preprocessing import label_binarize

num_classes = len(test_images.class_indices)
y_true = label_binarize(test_images.classes, classes=[i for i in range(num_classes)])
y_score = tta_preds

# Compute ROC curve and ROC area for each class
fpr = dict()
tpr = dict()
roc_auc = dict()
for i in range(num_classes):
    fpr[i], tpr[i], _ = roc_curve(y_true[:, i], y_score[:, i])
    roc_auc[i] = auc(fpr[i], tpr[i])

# Plot ROC curves for all classes
plt.figure(figsize=(10, 8))
for i in range(num_classes):
    plt.plot(fpr[i], tpr[i], label=f'Class {i} (AUC = {roc_auc[i]:.2f})')
plt.plot([0, 1], [0, 1], 'k--', label='Random Guessing')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC Curve - Test-Time Augmentation (TTA)')
plt.legend(loc='lower right')
plt.show()

# 3. Accuracy Comparison Table
accuracy_without_tta = results[1]  # Assuming 'results' contains the accuracy without TTA
print(f"Accuracy without TTA: {accuracy_without_tta * 100:.2f}%")
print(f"TTA Accuracy: {tta_accuracy * 100:.2f}%")

# 4. Distribution of Predicted Probabilities with TTA
plt.figure(figsize=(10, 6))
plt.hist(np.max(tta_preds, axis=1), bins=20, alpha=0.7, label='TTA')
plt.title('Distribution of Max Predicted Probabilities - TTA')
plt.xlabel('Max Probability')
plt.ylabel('Frequency')
plt.legend()
plt.show()

# 5. Learning Curves for Training with TTA
# Assuming 'history' is from model training
pd.DataFrame(history.history)[['accuracy', 'val_accuracy']].plot()
plt.title("Training and Validation Accuracy with TTA")
plt.xlabel("Epochs")
plt.ylabel("Accuracy")
plt.show()


Grad Cam Visualization code 

Saving model

Deep Feature extraction like i extracted in matlab 

In [None]:
import os
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Model
from tensorflow.keras.applications.mobilenet_v2 import preprocess_input

# Paths
dataset_dir = r"E:\Abroad period research\new idea implementation codes\First part of the Covid paper\Original dataset"
model_path = 'Mobilenetv2.h5'

# Load the pre-trained MobileNetV2 model
model = tf.keras.models.load_model(model_path)

# Define the layer from which to extract features
# For example, we can extract from the last convolutional layer 'Conv_1'
feature_layer = model.get_layer('Conv_1')  # Modify the layer name as per your model's architecture

# Create a model that outputs the activations from the feature extraction layer
feature_extractor_model = Model(inputs=model.input, outputs=feature_layer.output)

# Image preprocessing using ImageDataGenerator
datagen = ImageDataGenerator(preprocessing_function=preprocess_input)

# Load images from the dataset
batch_size = 32  # Adjust based on available memory
img_size = (224, 224)  # Size expected by MobileNetV2

train_generator = datagen.flow_from_directory(
    dataset_dir,
    target_size=img_size,
    batch_size=batch_size,
    class_mode='categorical',  # Assuming you have multiple classes
    shuffle=False  # No shuffling so that order of labels and features matches
)

# Extract features from the dataset
num_samples = train_generator.samples
num_batches = np.ceil(num_samples / batch_size)

# Initialize a placeholder for storing extracted features
features = []

# Loop through the batches of images and extract features
for x_batch, y_batch in train_generator:  # Iterating over the generator
    features_batch = feature_extractor_model.predict(x_batch)  # Extract features
    features.append(features_batch)

    # Break the loop when all batches are processed
    if len(features) >= num_batches:
        break

# Stack the list of feature batches into a single array
features = np.vstack(features)

# Get the labels corresponding to the features
labels = train_generator.classes  # These are the true labels for each image

# Save the extracted features and labels
np.save('MobileNetV2_features.npy', features)
np.save('MobileNetV2_labels.npy', labels)

print("Feature extraction completed. Features and labels saved.")


Classification of features using saved extracted features

In [None]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, f1_score
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.decomposition import IncrementalPCA

# Load features and labels with memory mapping to avoid loading the entire dataset into memory
features = np.load('MobileNetV2_features.npy', mmap_mode='r')  # shape: (num_samples, height, width, channels)
labels = np.load('MobileNetV2_labels.npy')

# Reshape features to 2D (num_samples, num_features)
num_samples = features.shape[0]
num_features = features.shape[1] * features.shape[2] * features.shape[3]

# Use memory mapping for reshaped features
features_reshaped = np.memmap('MobileNetV2_reshaped_features.dat', dtype='float32', mode='w+', shape=(num_samples, num_features))

# Reshape features into a 2D array and downcast to float32 to reduce memory usage
features_reshaped[:] = features.reshape(num_samples, num_features).astype(np.float32)

# Use IncrementalPCA to reduce memory consumption by processing in batches
n_components = 500  # Adjust this based on your memory constraints
ipca = IncrementalPCA(n_components=n_components, batch_size=1000)

# Create a memory-mapped file for the reduced features to avoid using RAM
features_reduced = np.memmap('MobileNetV2_features_reduced.dat', dtype='float32', mode='w+', shape=(num_samples, n_components))

# Apply PCA in batches and save the reduced features incrementally
for i in range(0, num_samples, 1000):  # Process in batches of 1000 samples
    batch = features_reshaped[i:i + 1000]
    features_reduced[i:i + 1000] = ipca.partial_fit_transform(batch)

# Save the reduced features (optional, if you want to use them later)
np.save('MobileNetV2_features_reduced.npy', features_reduced)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(features_reduced, labels, test_size=0.2, random_state=42)

# Define a list of classifiers to use
classifiers = {
    'SVM': SVC(kernel='linear', random_state=42),
    'Decision Tree': DecisionTreeClassifier(random_state=42),
    'Random Forest': RandomForestClassifier(n_estimators=100, random_state=42),
    'Naive Bayes': GaussianNB(),
    'Gradient Boosting': GradientBoostingClassifier(random_state=42)
}

# Function to plot confusion matrix
def plot_confusion_matrix(cm, classifier_name):
    plt.figure(figsize=(6,4))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', cbar=False)
    plt.title(f'Confusion Matrix for {classifier_name}')
    plt.xlabel('Predicted')
    plt.ylabel('True')
    plt.show()

# Loop over classifiers, train, predict, and evaluate
for classifier_name, clf in classifiers.items():
    # Train the classifier
    clf.fit(X_train, y_train)
    
    # Make predictions on the test set
    y_pred = clf.predict(X_test)
    
    # Calculate confusion matrix
    cm = confusion_matrix(y_test, y_pred)
    
    # Plot the confusion matrix
    plot_confusion_matrix(cm, classifier_name)
    
    # Calculate evaluation metrics
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred, average='weighted')
    recall = recall_score(y_test, y_pred, average='weighted')
    f1 = f1_score(y_test, y_pred, average='weighted')
    
    # Print the results
    print(f'Classifier: {classifier_name}')
    print(f'Accuracy: {accuracy:.4f}')
    print(f'Precision: {precision:.4f}')
    print(f'Recall: {recall:.4f}')
    print(f'F1 Score: {f1:.4f}')
    print('-' * 40)


Training Decision tree using statistical features and rule extraction using decision tree

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
import matplotlib.cm as cm
import os
from pathlib import Path

# Load and preprocess image
def get_img_array(img_path, size=(224, 224)):
    img = tf.keras.preprocessing.image.load_img(img_path, target_size=size)
    array = tf.keras.preprocessing.image.img_to_array(img)
    array = np.expand_dims(array, axis=0)
    return array

# Function to create heatmap using Grad-CAM
def make_gradcam_heatmap(img_array, model, last_conv_layer_name, pred_index=None):
    grad_model = tf.keras.models.Model(
        inputs=model.inputs,
        outputs=[model.get_layer(last_conv_layer_name).output, model.output]
    )

    with tf.GradientTape() as tape:
        last_conv_layer_output, preds = grad_model(img_array)
        if pred_index is None:
            pred_index = tf.argmax(preds[0])
        class_channel = preds[:, pred_index]

    grads = tape.gradient(class_channel, last_conv_layer_output)
    pooled_grads = tf.reduce_mean(grads, axis=(0, 1, 2))
    last_conv_layer_output = last_conv_layer_output[0]
    heatmap = last_conv_layer_output @ pooled_grads[..., tf.newaxis]
    heatmap = tf.squeeze(heatmap)

    heatmap = tf.maximum(heatmap, 0) / tf.math.reduce_max(heatmap)
    return heatmap.numpy()

# Function to save Grad-CAM visualization
def save_gradcam(img_path, heatmap, output_dir, alpha=0.4):
    img = tf.keras.preprocessing.image.load_img(img_path)
    img = tf.keras.preprocessing.image.img_to_array(img)

    heatmap = np.uint8(255 * heatmap)
    jet = cm.get_cmap("jet")
    jet_colors = jet(np.arange(256))[:, :3]
    jet_heatmap = jet_colors[heatmap]
    jet_heatmap = tf.keras.preprocessing.image.array_to_img(jet_heatmap)
    jet_heatmap = jet_heatmap.resize((img.shape[1], img.shape[0]))
    jet_heatmap = tf.keras.preprocessing.image.img_to_array(jet_heatmap)

    superimposed_img = jet_heatmap * alpha + img
    superimposed_img = tf.keras.preprocessing.image.array_to_img(superimposed_img)

    # Save original and XAI image
    os.makedirs(output_dir, exist_ok=True)
    original_img_name = Path(img_path).stem
    original_img_path = os.path.join(output_dir, f"{original_img_name}.png")
    cam_img_path = os.path.join(output_dir, f"{original_img_name}_XAI.png")

    tf.keras.preprocessing.image.save_img(original_img_path, img)
    superimposed_img.save(cam_img_path)

# Paths and model loading
dataset_dir = r"E:\Abroad period research\new idea implementation codes\First part of the Covid paper\Cropped augmented dataset performance\Augmented-Dataset"
model_path = r"E:\Abroad period research\new idea implementation codes\First part of the Covid paper\Final codes\Mobilenetv2_on_Cropped_augmented_dataset.h5"
output_base_dir = r"E:\GradCAM_Results"

# Load the model and specify layer name
model = tf.keras.models.load_model(model_path)
last_conv_layer_name = "Conv_1"
img_size = (224, 224)

# Preprocess Input
preprocess_input = tf.keras.applications.mobilenet_v2.preprocess_input

# Loop through each subfolder and apply Grad-CAM
for class_folder in os.listdir(dataset_dir):
    class_folder_path = os.path.join(dataset_dir, class_folder)
    
    if os.path.isdir(class_folder_path):
        print(f"Processing class: {class_folder}")
        
        # Create output folders
        output_class_dir = os.path.join(output_base_dir, class_folder)
        os.makedirs(output_class_dir, exist_ok=True)

        # Limit to 50 images
        image_files = [f for f in os.listdir(class_folder_path) if f.lower().endswith(('.png', '.jpg', '.jpeg'))]
        image_files = image_files[:50]  # Limit to 50 images

        # Process each image
        for img_file in image_files:
            img_path = os.path.join(class_folder_path, img_file)
            img_array = preprocess_input(get_img_array(img_path, size=img_size))
            
            # Generate Grad-CAM heatmap
            heatmap = make_gradcam_heatmap(img_array, model, last_conv_layer_name)
            
            # Save original and Grad-CAM images
            save_gradcam(img_path, heatmap, output_class_dir)

print("Grad-CAM visualizations generated successfully!")
