In [1]:
import os
import cv2
import pandas as pd
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix
from tensorflow.keras.models import Sequential
from sklearn.model_selection import train_test_split
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense


In [2]:
os.environ["KAGGLE_USERNAME"] = "magdhndi"
os.environ["KAGGLE_KEY"] = "9b6c8953fb75d807a407f863ae22edc6"

In [3]:
!kaggle datasets download tawsifurrahman/covid19-radiography-database
!kaggle datasets download artyomkolas/3-kinds-of-pneumonia
!kaggle datasets download darshan1504/covid19-detection-xray-dataset

Dataset URL: https://www.kaggle.com/datasets/tawsifurrahman/covid19-radiography-database
License(s): copyright-authors
Downloading covid19-radiography-database.zip to /content
100% 776M/778M [00:06<00:00, 171MB/s]
100% 778M/778M [00:06<00:00, 131MB/s]
Dataset URL: https://www.kaggle.com/datasets/artyomkolas/3-kinds-of-pneumonia
License(s): Attribution 4.0 International (CC BY 4.0)
Downloading 3-kinds-of-pneumonia.zip to /content
 99% 3.47G/3.49G [00:40<00:00, 76.7MB/s]
100% 3.49G/3.49G [00:40<00:00, 92.6MB/s]
Dataset URL: https://www.kaggle.com/datasets/darshan1504/covid19-detection-xray-dataset
License(s): Attribution 4.0 International (CC BY 4.0)
Downloading covid19-detection-xray-dataset.zip to /content
 91% 169M/186M [00:02<00:00, 72.0MB/s]
100% 186M/186M [00:02<00:00, 82.8MB/s]


In [None]:
!unzip covid19-radiography-database
!unzip 3-kinds-of-pneumonia
!unzip covid19-detection-xray-dataset

In [5]:
normal_paths = ['COVID-19_Radiography_Dataset/Normal/images', 'Curated X-Ray Dataset/Normal', 'NonAugmentedTrain/Normal']
covid_paths = ['COVID-19_Radiography_Dataset/COVID/images', 'Curated X-Ray Dataset/COVID-19', 'NonAugmentedTrain/COVID-19']
viralPneumonia_paths = ['COVID-19_Radiography_Dataset/Viral Pneumonia/images', 'Curated X-Ray Dataset/Pneumonia-Viral', 'NonAugmentedTrain/ViralPneumonia']
Lung_Opacity_paths = ['COVID-19_Radiography_Dataset/Lung_Opacity/images']
BacterialPneumonia_paths = ['Curated X-Ray Dataset/Pneumonia-Bacterial', 'NonAugmentedTrain/BacterialPneumonia']

In [6]:
all_paths = [normal_paths, covid_paths, viralPneumonia_paths, BacterialPneumonia_paths, Lung_Opacity_paths]

In [7]:
import os
import numpy as np
from tensorflow.keras.preprocessing.image import load_img, img_to_array

def load_images(paths, target_label, target_size=(100, 100), Max=3413):
    images = []
    labels = []

    for folder_path in paths:
        images_names = os.listdir(folder_path)
        for image_name in images_names:
          if len(images) >= Max:
            break
          image_path = os.path.join(folder_path, image_name)
          image = load_img(image_path, target_size=target_size)
          image_array = img_to_array(image) / 255.0  # Normalize the image
          images.append(image_array)
          labels.append(target_label)  # Use the corresponding label from 'target' list

    images = np.array(images)
    labels = np.array(labels)

    return images, labels


In [8]:
# Preprocess image data
data_images = []
data_targets = []
for target_label, db_images in enumerate(all_paths):
  images, target = load_images(db_images, target_label)
  data_images.extend(images)
  data_targets.extend(target)


In [9]:
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.utils import to_categorical

def encode_labels(labels):
    """
    Encode categorical labels into a numerical format using one-hot encoding.

    Args:
        labels (np.array): Array of categorical labels.

    Returns:
        np.array: One-hot encoded labels.
    """
    label_encoder = LabelEncoder()
    integer_encoded = label_encoder.fit_transform(labels)
    one_hot_encoded = to_categorical(integer_encoded)

    return one_hot_encoded, label_encoder


In [10]:
# Encode labels
labels_one_hot, label_encoder = encode_labels(data_targets)

In [11]:
from sklearn.model_selection import train_test_split

def split_dataset(images, labels, test_size=0.2, val_size=0.1, random_state=42):
    """
    Split dataset into training, validation, and test sets.

    Args:
        images (np.array): Array of images.
        labels (np.array): Array of labels.
        test_size (float): Proportion of the data to include in the test split.
        val_size (float): Proportion of the data to include in the validation split.
        random_state (int): Seed used by the random number generator.

    Returns:
        tuple: Split data (X_train, X_val, X_test, y_train, y_val, y_test).
    """
    X_train, X_temp, y_train, y_temp = train_test_split(images, labels, test_size=(test_size + val_size), random_state=random_state)
    val_ratio = val_size / (test_size + val_size)
    X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=val_ratio, random_state=random_state)

    return np.asarray(X_train), np.asarray(X_val), np.asarray(X_test), np.asarray(y_train), np.asarray(y_val), np.asarray(y_test)


In [12]:
# Split dataset
X_train, X_val, X_test, y_train, y_val, y_test = split_dataset(data_images, labels_one_hot)

In [13]:
data_images, labels_one_hot = 0, 0
X_train, y_train, X_val, y_val = 0, 0, 0, 0

In [14]:
def shuffle_data(X, y):
    """
    Shuffle the data to ensure randomness.

    Args:
        X (np.array): Array of images.
        y (np.array): Array of labels.

    Returns:
        tuple: Shuffled data (X_shuffled, y_shuffled).
    """
    indices = np.arange(X.shape[0])
    np.random.shuffle(indices)
    return X[indices], y[indices]


In [15]:
# Shuffle data
#X_train, y_train = shuffle_data(X_train, y_train)
#X_val, y_val = shuffle_data(X_val, y_val)
X_test, y_test = shuffle_data(X_test, y_test)

In [16]:
#print(len(X_train), len(y_train))
#print(len(X_val), len(y_val))
print(len(X_test), len(y_test))

1707 1707


#Skip

In [27]:
import tensorflow as tf

def prepare_batches(X, y, batch_size=32):
    """
    Prepare data batches for training.

    Args:
        X (np.array): Array of images.
        y (np.array): Array of labels.
        batch_size (int): Size of the batches.

    Returns:
        tf.data.Dataset: Batched dataset.
    """
    dataset = tf.data.Dataset.from_tensor_slices((X, y))
    dataset = dataset.batch(batch_size)
    return dataset


In [29]:
# Prepare batches
#train_dataset = prepare_batches(X_train, y_train)
#val_dataset = prepare_batches(X_val, y_val)
test_dataset = prepare_batches(X_test, y_test)

In [30]:
X_train, X_val, X_test, y_train, y_val, y_test = 0,0,0, 0,0,0

In [31]:
def cache_and_prefetch(dataset, cache=True, buffer_size=tf.data.AUTOTUNE):
    """
    Cache and prefetch the dataset for performance optimization.

    Args:
        dataset (tf.data.Dataset): Input dataset.
        cache (bool): Whether to cache the dataset.
        buffer_size (int): Buffer size for prefetching.

    Returns:
        tf.data.Dataset: Optimized dataset.
    """
    if cache:
        dataset = dataset.cache()
    dataset = dataset.prefetch(buffer_size)
    return dataset


In [32]:
# Cache and prefetch
#train_dataset = cache_and_prefetch(train_dataset)
#val_dataset = cache_and_prefetch(val_dataset)
test_dataset = cache_and_prefetch(test_dataset, cache=False)  # Typically don't cache test set

In [34]:
#print(len(train_dataset))
#print(len(val_dataset))
print(len(test_dataset))


54


#Come Back

In [17]:
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [18]:
# File: Path to Save
pts_all_diseases_v3 = tf.keras.models.load_model('/content/drive/My Drive/path_to_save/all_diseases_v3.h5')

pts_new_best_model_v2 = tf.keras.models.load_model('/content/drive/My Drive/path_to_save/new_best_model_v2.h5')
pts_best_model_v2 = tf.keras.models.load_model('/content/drive/My Drive/path_to_save/best_model_v2.h5')

pts_best_model_v1 = tf.keras.models.load_model('/content/drive/My Drive/path_to_save/best_model_v1.h5')
pts_model = tf.keras.models.load_model('/content/drive/My Drive/path_to_save/model.h5')


In [19]:
# File: My ResNet50 Models
mr_ResNet50_all_diseasesimage_model = tf.keras.models.load_model('/content/drive/My Drive/My_ResNet50_Models/ResNet50_all_diseasesimage_model.h5')
mr_best_model_v2 = tf.keras.models.load_model('/content/drive/My Drive/My_ResNet50_Models/best_model_v2.h5')
mr_best_model_v1 =tf.keras.models.load_model('/content/drive/My Drive/My_ResNet50_Models/best_model_v1.h5')


In [20]:
print(pts_all_diseases_v3.input_shape)
print(pts_new_best_model_v2.input_shape)
print(pts_best_model_v2.input_shape)
print(pts_best_model_v1.input_shape)
print(pts_model.input_shape)
print(mr_ResNet50_all_diseasesimage_model.input_shape)
print(mr_best_model_v2.input_shape)
print(mr_best_model_v1.input_shape)

(None, 100, 100, 3)
(None, 100, 100, 3)
(None, 100, 100, 3)
(None, 100, 100, 3)
(None, 100, 100, 3)
(None, 100, 100, 3)
(None, 100, 100, 3)
(None, 100, 100, 3)


In [21]:
print(pts_all_diseases_v3.output_shape)
print(pts_new_best_model_v2.output_shape)
print(pts_best_model_v2.output_shape)
print(pts_best_model_v1.output_shape)
print(pts_model.output_shape)
print(mr_ResNet50_all_diseasesimage_model.output_shape)
print(mr_best_model_v2.output_shape)
print(mr_best_model_v1.output_shape)

(None, 5)
(None, 5)
(None, 5)
(None, 5)
(None, 5)
(None, 5)
(None, 5)
(None, 5)


In [23]:
models = [

          pts_all_diseases_v3,

          pts_new_best_model_v2,

          pts_best_model_v2,

          pts_best_model_v1,

          pts_model,

          mr_ResNet50_all_diseasesimage_model,

          mr_best_model_v2,

          mr_best_model_v1

          ]

In [25]:
import numpy as np
from sklearn.metrics import confusion_matrix

def TFNP(cm):
    """
    Calculate True Positives, False Negatives, False Positives, and True Negatives for each class.
    """
    all_samples = np.sum(cm)
    cm_classes = []

    for i in range(len(cm)):
        tp = cm[i, i]
        fn = np.sum(cm[i, :]) - tp
        fp = np.sum(cm[:, i]) - tp
        tn = all_samples - tp - fp - fn
        cm_classes.append([[tp, fn], [fp, tn]])

    return np.asarray(cm_classes)


def Report(cm_classes):
    """
    Generate performance metrics report for each class.
    """
    repo = []
    for i in range(len(cm_classes)):
        tp = cm_classes[i, 0, 0]
        fn = cm_classes[i, 0, 1]
        fp = cm_classes[i, 1, 0]
        tn = cm_classes[i, 1, 1]

        iou = tp / (tp + fn + fp)  # Intersection over Union
        dsc = (2 * tp) / ((2 * tp) + fp + fn)  # Dice Similarity Coefficient
        acc = (tp + tn) / (tp + tn + fp + fn)  # Accuracy
        precision = tp / (tp + fp)  # Precision (Positive Predictive Value)
        recall = tp / (tp + fn)  # Recall (Sensitivity)
        spec = tn / (tn + fp)  # Specificity
        f1_score = 2 * (precision * recall) / (precision + recall)  # F1-Score

        info = {
     #       'IOU': iou,
     #      'DSC': dsc,
     #       'ACC': acc,
     #       'Specificity': spec,
     #       'Precision': precision,
     #       'Recall': recall,
            'F1-Score': f1_score
        }

        repo.append(info)

    return np.asarray(repo)


def model_performance_report(model, test_dataset, class_labels):
    """
    Generate a performance report for a given model and test data.
    """
    y_pred_all = []
    y_test_all = []

    for x_test, y_test in test_dataset:
        y_pred = model.predict(x_test)
        y_pred_classes = np.argmax(y_pred, axis=1)
        y_test_classes = np.argmax(y_test, axis=1)

        y_pred_all.extend(y_pred_classes)
        y_test_all.extend(y_test_classes)

    y_pred_all = np.array(y_pred_all)
    y_test_all = np.array(y_test_all)

    cm = confusion_matrix(y_test_all, y_pred_all)
    cm_classes = TFNP(cm)
    report = Report(cm_classes)

    for i, label in enumerate(class_labels):
        print(f"The report for {label} is : \n {report[i]} \n")

    return report, cm



In [None]:
# Example usage:
# Assuming you have a trained model `image_model`, test dataset `test_dataset`, and class labels `disease_labels`
disease_labels = ["Normal", "Covid-19", "Viral Pneumonia", "Bacterial Pneumonia", "Lung Opacity"]
conf = []
report = []
for model in models:
  # Generate performance report for each model
  rep, cm = model_performance_report(model, test_dataset, disease_labels)
  report.append(rep)
  conf.append(cm)


In [38]:
for i in range(len(report)):
#  print(conf[i])
  print(report[i])
  print('\n')

[{'F1-Score': 0.8524096385542168} {'F1-Score': 0.9159663865546218}
 {'F1-Score': 0.801186943620178} {'F1-Score': 0.0031545741324921135}
 {'F1-Score': nan}]


[{'F1-Score': 0.8967642526964561} {'F1-Score': 0.9505813953488373}
 {'F1-Score': 0.8669301712779974} {'F1-Score': 0.8499210110584517}
 {'F1-Score': 0.9138686131386862}]


[{'F1-Score': 0.8014311270125224} {'F1-Score': 0.9329268292682928}
 {'F1-Score': 0.8342541436464089} {'F1-Score': 0.8583690987124463}
 {'F1-Score': 0.8762886597938144}]


[{'F1-Score': 0.8014311270125224} {'F1-Score': 0.9329268292682928}
 {'F1-Score': 0.8342541436464089} {'F1-Score': 0.8583690987124463}
 {'F1-Score': 0.8762886597938144}]


[{'F1-Score': 0.8967642526964561} {'F1-Score': 0.9505813953488373}
 {'F1-Score': 0.8669301712779974} {'F1-Score': 0.8499210110584517}
 {'F1-Score': 0.9138686131386862}]


[{'F1-Score': 0.8967642526964561} {'F1-Score': 0.9505813953488373}
 {'F1-Score': 0.8669301712779974} {'F1-Score': 0.8499210110584517}
 {'F1-Score': 0.91386861

In [43]:
for i in range(len(report)):
  print(conf[i])
#  print(report[i])
  print('\n')

[[283  17   4  20   3]
 [  4 327   0   6   0]
 [  0   6 270   1  87]
 [  1   3  36   1 289]
 [ 49  24   0 276   0]]


[[291  16   3   0  17]
 [  4 327   1   0   5]
 [  0   2 329  33   0]
 [  0   0  60 269   1]
 [ 27   6   2   1 313]]


[[224  10  30   8  55]
 [  1 306   3   0  27]
 [  0   0 302  61   1]
 [  0   1  25 300   4]
 [  7   2   0   0 340]]


[[224  10  30   8  55]
 [  1 306   3   0  27]
 [  0   0 302  61   1]
 [  0   1  25 300   4]
 [  7   2   0   0 340]]


[[291  16   3   0  17]
 [  4 327   1   0   5]
 [  0   2 329  33   0]
 [  0   0  60 269   1]
 [ 27   6   2   1 313]]


[[291  16   3   0  17]
 [  4 327   1   0   5]
 [  0   2 329  33   0]
 [  0   0  60 269   1]
 [ 27   6   2   1 313]]


[[291  16   3   0  17]
 [  4 327   1   0   5]
 [  0   2 329  33   0]
 [  0   0  60 269   1]
 [ 27   6   2   1 313]]


[[224  10  30   8  55]
 [  1 306   3   0  27]
 [  0   0 302  61   1]
 [  0   1  25 300   4]
 [  7   2   0   0 340]]


