In [None]:
import os
import cv2
import pandas as pd
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix
from tensorflow.keras.models import Sequential
from sklearn.model_selection import train_test_split
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense

In [None]:
os.environ["KAGGLE_USERNAME"] = "magdhndi"
os.environ["KAGGLE_KEY"] = "9b6c8953fb75d807a407f863ae22edc6"

In [None]:
!kaggle datasets download tawsifurrahman/covid19-radiography-database
!kaggle datasets download artyomkolas/3-kinds-of-pneumonia
!kaggle datasets download darshan1504/covid19-detection-xray-dataset

Dataset URL: https://www.kaggle.com/datasets/tawsifurrahman/covid19-radiography-database
License(s): copyright-authors
Downloading covid19-radiography-database.zip to /content
 98% 764M/778M [00:08<00:00, 88.6MB/s]
100% 778M/778M [00:08<00:00, 101MB/s] 
Dataset URL: https://www.kaggle.com/datasets/artyomkolas/3-kinds-of-pneumonia
License(s): Attribution 4.0 International (CC BY 4.0)
Downloading 3-kinds-of-pneumonia.zip to /content
100% 3.48G/3.49G [00:41<00:00, 86.6MB/s]
100% 3.49G/3.49G [00:41<00:00, 90.0MB/s]
Dataset URL: https://www.kaggle.com/datasets/darshan1504/covid19-detection-xray-dataset
License(s): Attribution 4.0 International (CC BY 4.0)
Downloading covid19-detection-xray-dataset.zip to /content
 95% 177M/186M [00:02<00:00, 60.0MB/s]
100% 186M/186M [00:02<00:00, 77.0MB/s]


In [None]:
!unzip covid19-radiography-database
!unzip 3-kinds-of-pneumonia
!unzip covid19-detection-xray-dataset

In [None]:
normal_paths = ['COVID-19_Radiography_Dataset/Normal/images', 'Curated X-Ray Dataset/Normal', 'NonAugmentedTrain/Normal']
covid_paths = ['COVID-19_Radiography_Dataset/COVID/images', 'Curated X-Ray Dataset/COVID-19', 'NonAugmentedTrain/COVID-19']
viralPneumonia_paths = ['COVID-19_Radiography_Dataset/Viral Pneumonia/images', 'Curated X-Ray Dataset/Pneumonia-Viral', 'NonAugmentedTrain/ViralPneumonia']
Lung_Opacity_paths = ['COVID-19_Radiography_Dataset/Lung_Opacity/images']
BacterialPneumonia_paths = ['Curated X-Ray Dataset/Pneumonia-Bacterial', 'NonAugmentedTrain/BacterialPneumonia']

In [None]:
all_paths = [normal_paths, covid_paths, viralPneumonia_paths, BacterialPneumonia_paths, Lung_Opacity_paths]

In [None]:
import os
import numpy as np
from tensorflow.keras.preprocessing.image import load_img, img_to_array

def load_images(paths, target_label, target_size=(100, 100), Max=3413):
    images = []
    labels = []

    for folder_path in paths:
        images_names = os.listdir(folder_path)
        for image_name in images_names:
          if len(images) >= Max:
            break
          image_path = os.path.join(folder_path, image_name)
          image = load_img(image_path, target_size=target_size)
          image_array = img_to_array(image) / 255.0  # Normalize the image
          images.append(image_array)
          labels.append(target_label)  # Use the corresponding label from 'target' list

    images = np.array(images)
    labels = np.array(labels)

    return images, labels


In [None]:
# Preprocess image data
data_images = []
data_targets = []
for target_label, db_images in enumerate(all_paths):
  images, target = load_images(db_images, target_label)
  data_images.extend(images)
  data_targets.extend(target)


In [None]:
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.utils import to_categorical

def encode_labels(labels):
    """
    Encode categorical labels into a numerical format using one-hot encoding.

    Args:
        labels (np.array): Array of categorical labels.

    Returns:
        np.array: One-hot encoded labels.
    """
    label_encoder = LabelEncoder()
    integer_encoded = label_encoder.fit_transform(labels)
    one_hot_encoded = to_categorical(integer_encoded)

    return one_hot_encoded, label_encoder


In [None]:
# Encode labels
labels_one_hot, label_encoder = encode_labels(data_targets)

In [None]:
from sklearn.model_selection import train_test_split

def split_dataset(images, labels, test_size=0.2, val_size=0.1, random_state=42):
    """
    Split dataset into training, validation, and test sets.

    Args:
        images (np.array): Array of images.
        labels (np.array): Array of labels.
        test_size (float): Proportion of the data to include in the test split.
        val_size (float): Proportion of the data to include in the validation split.
        random_state (int): Seed used by the random number generator.

    Returns:
        tuple: Split data (X_train, X_val, X_test, y_train, y_val, y_test).
    """
    X_train, X_temp, y_train, y_temp = train_test_split(images, labels, test_size=(test_size + val_size), random_state=random_state)
    val_ratio = val_size / (test_size + val_size)
    X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=val_ratio, random_state=random_state)

    return np.asarray(X_train), np.asarray(X_val), np.asarray(X_test), np.asarray(y_train), np.asarray(y_val), np.asarray(y_test)


In [None]:
# Split dataset
X_train, X_val, X_test, y_train, y_val, y_test = split_dataset(data_images, labels_one_hot)

In [None]:
data_images, labels_one_hot = 0, 0

In [None]:
def shuffle_data(X, y):
    """
    Shuffle the data to ensure randomness.

    Args:
        X (np.array): Array of images.
        y (np.array): Array of labels.

    Returns:
        tuple: Shuffled data (X_shuffled, y_shuffled).
    """
    indices = np.arange(X.shape[0])
    np.random.shuffle(indices)
    return X[indices], y[indices]

In [None]:
# Shuffle data
X_train, y_train = shuffle_data(X_train, y_train)
X_val, y_val = shuffle_data(X_val, y_val)
X_test, y_test = shuffle_data(X_test, y_test)

In [None]:
len(X_train), len(y_train)

(11945, 11945)

In [None]:
len(X_val), len(y_val)

(3413, 3413)

In [None]:
len(X_test), len(y_test)

(1707, 1707)

In [None]:
import tensorflow as tf

def prepare_batches(X, y, batch_size=32):
    """
    Prepare data batches for training.

    Args:
        X (np.array): Array of images.
        y (np.array): Array of labels.
        batch_size (int): Size of the batches.

    Returns:
        tf.data.Dataset: Batched dataset.
    """
    dataset = tf.data.Dataset.from_tensor_slices((X, y))
    dataset = dataset.batch(batch_size)
    return dataset


In [None]:
# Prepare batches
train_dataset = prepare_batches(X_train, y_train)
val_dataset = prepare_batches(X_val, y_val)
test_dataset = prepare_batches(X_test, y_test)

In [None]:
X_train, X_val, X_test, y_train, y_val, y_test = 0,0,0, 0,0,0

In [None]:
def cache_and_prefetch(dataset, cache=True, buffer_size=tf.data.AUTOTUNE):
    """
    Cache and prefetch the dataset for performance optimization.

    Args:
        dataset (tf.data.Dataset): Input dataset.
        cache (bool): Whether to cache the dataset.
        buffer_size (int): Buffer size for prefetching.

    Returns:
        tf.data.Dataset: Optimized dataset.
    """
    if cache:
        dataset = dataset.cache()
    dataset = dataset.prefetch(buffer_size)
    return dataset


In [None]:
# Cache and prefetch
train_dataset = cache_and_prefetch(train_dataset)
val_dataset = cache_and_prefetch(val_dataset)
test_dataset = cache_and_prefetch(test_dataset, cache=False)  # Typically don't cache test set

In [None]:
len(train_dataset)

374

In [None]:
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, BatchNormalization
from tensorflow.keras.models import Model
from tensorflow.keras.regularizers import l2
import tensorflow as tf
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint

# Define the CNN model with regularization
base_model = ResNet50(weights='imagenet', include_top=False, input_shape=(100, 100, 3))
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dense(1024, activation='relu', kernel_regularizer=l2(0.01))(x)
x = BatchNormalization()(x)
x = Dense(512, activation='relu', kernel_regularizer=l2(0.01))(x)
x = BatchNormalization()(x)
predictions = Dense(5, activation='softmax')(x)

image_model = Model(inputs=base_model.input, outputs=predictions)
image_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])


Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5


In [None]:
# Freeze the base model layers
for layer in base_model.layers:
    layer.trainable = False


In [None]:
# Callbacks for early stopping and saving the best model
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
model_checkpoint = ModelCheckpoint('best_model.h5', monitor='val_loss', save_best_only=True)


In [None]:
# Class weights to balance the contributions of each class
class_weights = {0: 1.0, 1: 1.0, 2: 1.0, 3: 1.0, 4: 1.0}


In [None]:
# Number of epochs for initial training
initial_epochs = 10

# Train the model
history = image_model.fit(train_dataset,
                          validation_data=val_dataset,
                          epochs=initial_epochs,
                          class_weight=class_weights,
                          callbacks=[early_stopping, model_checkpoint])


Epoch 1/10

  saving_api.save_model(


Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [None]:
# Unfreeze the top layers of the base model
for layer in base_model.layers[-20:]:  # Adjusted number of layers to unfreeze
    layer.trainable = True


In [None]:
# Recompile the model with a lower learning rate for fine-tuning
image_model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=1e-5),
                    loss='categorical_crossentropy',
                    metrics=['accuracy'])


In [None]:
# Number of epochs for fine-tuning
fine_tuning_epochs = 20  # Adjusted based on expected complexity

# Continue training the model
fine_tuning_history = image_model.fit(train_dataset,
                                      validation_data=val_dataset,
                                      epochs=fine_tuning_epochs,
                                      class_weight=class_weights,
                                      callbacks=[early_stopping, model_checkpoint])


Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20


In [None]:
# Evaluate the model on the test set
test_loss, test_accuracy = image_model.evaluate(test_dataset)
print(f'Test accuracy: {test_accuracy * 100:.2f}%')

Test accuracy: 86.12%


In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
image_model.save('/content/drive/My Drive/path_to_save/model.h5')

In [None]:
import numpy as np
from sklearn.metrics import confusion_matrix

def TFNP(cm):
    """
    Calculate True Positives, False Negatives, False Positives, and True Negatives for each class.
    """
    all_samples = np.sum(cm)
    cm_classes = []

    for i in range(len(cm)):
        tp = cm[i, i]
        fn = np.sum(cm[i, :]) - tp
        fp = np.sum(cm[:, i]) - tp
        tn = all_samples - tp - fp - fn
        cm_classes.append([[tp, fn], [fp, tn]])

    return np.asarray(cm_classes)


In [None]:
def Report(cm_classes):
    """
    Generate performance metrics report for each class.
    """
    repo = []
    for i in range(len(cm_classes)):
        tp = cm_classes[i, 0, 0]
        fn = cm_classes[i, 0, 1]
        fp = cm_classes[i, 1, 0]
        tn = cm_classes[i, 1, 1]

        iou = tp / (tp + fn + fp)  # Intersection over Union
        dsc = (2 * tp) / ((2 * tp) + fp + fn)  # Dice Similarity Coefficient
        acc = (tp + tn) / (tp + tn + fp + fn)  # Accuracy
        precision = tp / (tp + fp)  # Precision (Positive Predictive Value)
        recall = tp / (tp + fn)  # Recall (Sensitivity)
        spec = tn / (tn + fp)  # Specificity
        f1_score = 2 * (precision * recall) / (precision + recall)  # F1-Score

        info = {
            'IOU': iou,
            'DSC': dsc,
            'ACC': acc,
            'Specificity': spec,
            'Precision': precision,
            'Recall': recall,
            'F1-Score': f1_score
        }

        repo.append(info)

    return np.asarray(repo)


In [None]:
def model_performance_report(model, test_dataset, class_labels):
    """
    Generate a performance report for a given model and test data.
    """
    y_pred_all = []
    y_test_all = []

    for x_test, y_test in test_dataset:
        y_pred = model.predict(x_test)
        y_pred_classes = np.argmax(y_pred, axis=1)
        y_test_classes = np.argmax(y_test, axis=1)

        y_pred_all.extend(y_pred_classes)
        y_test_all.extend(y_test_classes)

    y_pred_all = np.array(y_pred_all)
    y_test_all = np.array(y_test_all)

    cm = confusion_matrix(y_test_all, y_pred_all)
    cm_classes = TFNP(cm)
    report = Report(cm_classes)

    for i, label in enumerate(class_labels):
        print(f"The report for {label} is : \n {report[i]} \n")

    return report, cm



In [None]:
# Example usage:
# Assuming you have a trained model `image_model`, test dataset `test_dataset`, and class labels `disease_labels`
disease_labels = ["Normal", "Covid-19", "Viral Pneumonia", "Bacterial Pneumonia", "Lung Opacity"]
report, cm = model_performance_report(best_model_v2, test_dataset, disease_labels)

In [None]:
cm

array([[288,   6,  10,   4,  19],
       [ 10, 315,   0,   0,  12],
       [  0,   2, 313,  49,   0],
       [  0,   2,  81, 247,   0],
       [ 30,  10,   1,   1, 307]])

In [None]:
disease = {0:"Normal", 1:"Covid-19", 2:"Viral Pneumonia", 3:"Bacterial Pneumonia", 4:"Lung Opacity"}
for i in range(len(report)):
  print(f"The report for {disease[i]} is : \n {report[i]} \n")

The report for Normal is : 
 {'IOU': 0.784741144414169, 'DSC': 0.8793893129770992, 'ACC': 0.9537199765670767, 'Specificity': 0.9710144927536232, 'Precision': 0.8780487804878049, 'Recall': 0.8807339449541285, 'F1-Score': 0.8793893129770993} 

The report for Covid-19 is : 
 {'IOU': 0.8823529411764706, 'DSC': 0.9375, 'ACC': 0.9753954305799648, 'Specificity': 0.9854014598540146, 'Precision': 0.9402985074626866, 'Recall': 0.9347181008902077, 'F1-Score': 0.9375} 

The report for Viral Pneumonia is : 
 {'IOU': 0.6864035087719298, 'DSC': 0.8140442132639792, 'ACC': 0.9162272993555947, 'Specificity': 0.9314966492926284, 'Precision': 0.7728395061728395, 'Recall': 0.8598901098901099, 'F1-Score': 0.8140442132639792} 

The report for Bacterial Pneumonia is : 
 {'IOU': 0.6432291666666666, 'DSC': 0.7828843106180665, 'ACC': 0.9197422378441711, 'Specificity': 0.9607843137254902, 'Precision': 0.8205980066445183, 'Recall': 0.7484848484848485, 'F1-Score': 0.7828843106180666} 

The report for Lung Opacity i