#Import Libraries

In [1]:
import os
import cv2
import pandas as pd
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix
from tensorflow.keras.models import Sequential
from sklearn.model_selection import train_test_split
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense

#Kaggle api

In [2]:
os.environ["KAGGLE_USERNAME"] = "magdhndi"
os.environ["KAGGLE_KEY"] = "9b6c8953fb75d807a407f863ae22edc6"

#Download and unzip dataset

In [3]:
!kaggle datasets download tawsifurrahman/covid19-radiography-database
!kaggle datasets download artyomkolas/3-kinds-of-pneumonia
!kaggle datasets download darshan1504/covid19-detection-xray-dataset

Dataset URL: https://www.kaggle.com/datasets/tawsifurrahman/covid19-radiography-database
License(s): copyright-authors
Downloading covid19-radiography-database.zip to /content
 98% 764M/778M [00:11<00:00, 71.4MB/s]
100% 778M/778M [00:11<00:00, 68.8MB/s]
Dataset URL: https://www.kaggle.com/datasets/artyomkolas/3-kinds-of-pneumonia
License(s): Attribution 4.0 International (CC BY 4.0)
Downloading 3-kinds-of-pneumonia.zip to /content
100% 3.48G/3.49G [00:46<00:00, 96.3MB/s]
100% 3.49G/3.49G [00:46<00:00, 81.2MB/s]
Dataset URL: https://www.kaggle.com/datasets/darshan1504/covid19-detection-xray-dataset
License(s): Attribution 4.0 International (CC BY 4.0)
Downloading covid19-detection-xray-dataset.zip to /content
100% 186M/186M [00:02<00:00, 82.6MB/s]
100% 186M/186M [00:02<00:00, 88.5MB/s]


In [4]:
!unzip covid19-radiography-database
!unzip 3-kinds-of-pneumonia
!unzip covid19-detection-xray-dataset

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
  inflating: NonAugmentedTrain/BacterialPneumonia/164.jpeg  
  inflating: NonAugmentedTrain/BacterialPneumonia/165.jpeg  
  inflating: NonAugmentedTrain/BacterialPneumonia/166.jpeg  
  inflating: NonAugmentedTrain/BacterialPneumonia/167.jpeg  
  inflating: NonAugmentedTrain/BacterialPneumonia/168.jpeg  
  inflating: NonAugmentedTrain/BacterialPneumonia/169.jpeg  
  inflating: NonAugmentedTrain/BacterialPneumonia/17.jpeg  
  inflating: NonAugmentedTrain/BacterialPneumonia/170.jpeg  
  inflating: NonAugmentedTrain/BacterialPneumonia/171.jpeg  
  inflating: NonAugmentedTrain/BacterialPneumonia/172.jpeg  
  inflating: NonAugmentedTrain/BacterialPneumonia/173.jpeg  
  inflating: NonAugmentedTrain/BacterialPneumonia/174.jpeg  
  inflating: NonAugmentedTrain/BacterialPneumonia/175.jpeg  
  inflating: NonAugmentedTrain/BacterialPneumonia/176.jpeg  
  inflating: NonAugmentedTrain/BacterialPneumonia/177.jpeg  
  inflating: NonAugme

#Initialize the paths of images in dataset

In [5]:
normal_paths = ['COVID-19_Radiography_Dataset/Normal/images', 'Curated X-Ray Dataset/Normal', 'NonAugmentedTrain/Normal']
covid_paths = ['COVID-19_Radiography_Dataset/COVID/images', 'Curated X-Ray Dataset/COVID-19', 'NonAugmentedTrain/COVID-19']
viralPneumonia_paths = ['COVID-19_Radiography_Dataset/Viral Pneumonia/images', 'Curated X-Ray Dataset/Pneumonia-Viral', 'NonAugmentedTrain/ViralPneumonia']
Lung_Opacity_paths = ['COVID-19_Radiography_Dataset/Lung_Opacity/images']
BacterialPneumonia_paths = ['Curated X-Ray Dataset/Pneumonia-Bacterial', 'NonAugmentedTrain/BacterialPneumonia']

#Preprocessing images

In [6]:
def loadImages(paths, target, Max):
  images = []
  labels = []
  for p in paths:
    urls = os.listdir(p)
    for i in range(len(urls)):
      if len(images) >= Max:
        break
      img_path = p + '/'+ urls[i]
      img = cv2.imread(img_path)
      img = img / 255.0
      img = cv2.resize(img, (100, 100))
      images.append(img)
      labels.append(target)
  return np.asarray(images), np.asarray(labels)

#Load images

In [7]:
# there are 5 category so 5 * 3,413 = 17,065 that too large
# RAM limited on 17,065 images
images_number = 3413

In [8]:
normal_images, normal_target = loadImages(normal_paths, 0, images_number)
covid_images, covid_target = loadImages(covid_paths, 1, images_number)
viralPneumonia_images, viralPneumonia_target = loadImages(viralPneumonia_paths, 2, images_number)
Lung_Opacity_images, Lung_Opacity_target = loadImages(Lung_Opacity_paths, 3, images_number)
BacterialPneumonia_images, BacterialPneumonia_target = loadImages(BacterialPneumonia_paths, 4, images_number)


In [9]:
data = [normal_images, covid_images, viralPneumonia_images, Lung_Opacity_images, BacterialPneumonia_images]
normal_images, covid_images, viralPneumonia_images, Lung_Opacity_images, BacterialPneumonia_images = 0,0,0,0,0

target = [normal_target, covid_target, viralPneumonia_target, Lung_Opacity_target, BacterialPneumonia_target]
normal_target, covid_target, viralPneumonia_target, Lung_Opacity_target, BacterialPneumonia_target = 0,0,0,0,0

#Split images to train, validation and test images

In [10]:
from math import floor
def split(data, target, train, validation, test):
  x_train, x_val, x_test = [], [], []
  y_train, y_val, y_test = [], [], []

  for i in range(len(data)):
    a = floor(len(data[i])*train)
    b = a + floor(len(data[i])*validation)
    c = a + floor(len(data[i])*test)

    x_train.extend(data[i][0:a])
    y_train.extend(target[i][0:a])

    x_val.extend(data[i][a:b])
    y_val.extend(target[i][a:b])

    x_test.extend(data[i][b:])
    y_test.extend(target[i][b:])

  return np.asarray(x_train), np.asarray(y_train), np.asarray(x_val), np.asarray(y_val), np.asarray(x_test), np.asarray(y_test)

In [11]:
x_train, y_train, x_val, y_val, x_test, y_test = split(data, target, train=0.6, validation=0.2, test=0.2)

In [12]:
data, target = 0, 0

#Build model

In [13]:
model = Sequential([
    Conv2D(32, 2, input_shape=(100, 100, 3), activation='relu'),
    MaxPooling2D(),
    Conv2D(16, 2, activation='relu'),
    MaxPooling2D(),
    Conv2D(16, 2, activation='relu'),
    MaxPooling2D(),
    Flatten(),
    Dense(512, activation='relu'),
    Dense(256, activation='relu'),
    Dense(5, activation='sigmoid')
])

In [14]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 99, 99, 32)        416       
                                                                 
 max_pooling2d (MaxPooling2  (None, 49, 49, 32)        0         
 D)                                                              
                                                                 
 conv2d_1 (Conv2D)           (None, 48, 48, 16)        2064      
                                                                 
 max_pooling2d_1 (MaxPoolin  (None, 24, 24, 16)        0         
 g2D)                                                            
                                                                 
 conv2d_2 (Conv2D)           (None, 23, 23, 16)        1040      
                                                                 
 max_pooling2d_2 (MaxPoolin  (None, 11, 11, 16)        0

In [15]:
model.compile(optimizer='adam', loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), metrics=['accuracy'])

#Early Stopping and Model Check Point

In [16]:
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint

# Callbacks for early stopping and saving the best model
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
model_checkpoint = ModelCheckpoint('best_tabular_model.h5', monitor='val_loss', save_best_only=True)


#Train model

In [17]:
model.fit(x_train, y_train,
          batch_size=32,
          epochs=25,
          validation_data=(x_val, y_val),
          callbacks=[early_stopping, model_checkpoint])


Epoch 1/25


  output, from_logits = _get_logits(


Epoch 2/25
 15/320 [>.............................] - ETA: 2s - loss: 0.5959 - accuracy: 0.7729

  saving_api.save_model(


Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25


<keras.src.callbacks.History at 0x7a65077bca90>

#Evaluate model

In [18]:
y_pred = model.predict(np.asarray(x_test))



In [19]:
y_pred

array([[9.9728322e-01, 8.4211725e-01, 2.4718143e-05, 9.9633527e-01,
        1.1866191e-05],
       [9.9957401e-01, 2.3157638e-01, 9.9556040e-05, 9.5179647e-01,
        2.3284068e-05],
       [9.9822193e-01, 1.7150618e-01, 7.9154881e-05, 9.9698621e-01,
        2.9734723e-05],
       ...,
       [5.2160436e-01, 6.8682414e-01, 4.0688206e-02, 6.8942517e-01,
        1.7263968e-01],
       [1.8571258e-02, 2.3415677e-02, 9.4864994e-01, 2.0650465e-03,
        9.9533892e-01],
       [3.4638613e-02, 5.2223173e-03, 5.3139204e-01, 1.3789976e-01,
        9.8505557e-01]], dtype=float32)

In [20]:
def post_processing(y_pred):
  y = []
  for i in range(len(y_pred)):
    y.append(np.argmax(y_pred[i]))

  return np.asarray(y)

In [21]:
y_pred = post_processing(y_pred)

In [22]:
y_pred

array([0, 0, 0, ..., 3, 4, 4])

In [23]:
y_test

array([0, 0, 0, ..., 4, 4, 4])

In [24]:
# cm is the confusion_matrix
def TFNP(cm):
  #Make confusion matrix for each disease

  all = 0

  for i in range(len(cm)):
    for j in range(len(cm)):
      all += cm[i,j]

  cm_classes = []

  for i in range(len(cm)):

    #True Positive
    tp = cm[i,i]
    #print("TP", tp)

    #False Negative
    fn = -tp
    for j in range(len(cm[i])):
      fn += cm[i,j]

    #False Positive
    fp = -tp
    for j in range(len(cm[i])):
      fp += cm[j,i]

    #True Negative
    tn = all - tp - fp - fn

    cm_classes.append([[tp,fn],[fp,tn]])

  return np.asarray(cm_classes)

#Calculate measures for each disease

In [25]:
def Report(cm_classes):
  repo = []
  for i in range(len(cm_classes)):
    tp = cm_classes[i,0,0]
    fn = cm_classes[i,0,1]
    fp = cm_classes[i,1,0]
    tn = cm_classes[i,1,1]

    iou = tp / (tp + fn + fp) #Instersection over union

    dsc = (2*tp) / ((2*tp) + fp + fn) #Doce Similarity Coefficient

    acc = (tp + tn) / (tp + tn + fp + fn) #Accuracy

    precision = tp / (tp + fp) #PPV

    recall = tp / (tp + fn) #Sensitivity

    spec = tn / (tn + fp) #Specificity

    f1_score = 2 * (precision * recall) / (precision + recall) #F1-Score

    info = {'IOU' : iou, 'DSC' : dsc, 'ACC': acc,'Specificity': spec, 'Precision': precision, 'Recall': recall, 'F1-Score': f1_score}

    repo.append(info)

  return np.asarray(repo)

In [26]:
cm = confusion_matrix(y_test, y_pred)

In [27]:
print(cm)

[[590  43   3  47   1]
 [ 22 640   1  21   0]
 [  0   4 506   3 171]
 [106  74   1 502   1]
 [  1  14 165   2 502]]


In [28]:
cm_classes = TFNP(cm)

In [29]:
print(cm_classes)

[[[ 590   94]
  [ 129 2607]]

 [[ 640   44]
  [ 135 2601]]

 [[ 506  178]
  [ 170 2566]]

 [[ 502  182]
  [  73 2663]]

 [[ 502  182]
  [ 173 2563]]]


In [30]:
report = Report(cm_classes)

In [31]:
disease = {0:"Normal", 1:"Covid-19", 2:"Viral Pneumonia", 3:"Lung Opacity", 4:"Bacterial Pneumonia"}
for i in range(len(report)):
  print(f"The report for {disease[i]} is : \n {report[i]} \n")

The report for Normal is : 
 {'IOU': 0.7257072570725708, 'DSC': 0.8410548823948681, 'ACC': 0.9347953216374268, 'Specificity': 0.9528508771929824, 'Precision': 0.8205841446453408, 'Recall': 0.8625730994152047, 'F1-Score': 0.841054882394868} 

The report for Covid-19 is : 
 {'IOU': 0.7814407814407814, 'DSC': 0.8773132282385195, 'ACC': 0.9476608187134503, 'Specificity': 0.9506578947368421, 'Precision': 0.8258064516129032, 'Recall': 0.935672514619883, 'F1-Score': 0.8773132282385195} 

The report for Viral Pneumonia is : 
 {'IOU': 0.5925058548009368, 'DSC': 0.7441176470588236, 'ACC': 0.8982456140350877, 'Specificity': 0.9378654970760234, 'Precision': 0.7485207100591716, 'Recall': 0.7397660818713451, 'F1-Score': 0.7441176470588236} 

The report for Lung Opacity is : 
 {'IOU': 0.6631439894319683, 'DSC': 0.7974583002382843, 'ACC': 0.9254385964912281, 'Specificity': 0.9733187134502924, 'Precision': 0.8730434782608696, 'Recall': 0.7339181286549707, 'F1-Score': 0.7974583002382843} 

The report fo

#Save model

In [34]:
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [35]:
model.save(os.path.join('/content/drive/My Drive/path_to_save/', 'all_diseases_v3.h5'))

#Make a predict

In [36]:
model_path = '/content/drive/My Drive/path_to_save/all_diseases_v3.h5'
model = tf.keras.models.load_model(model_path)
disease = {0: "Normal", 1:"Covid-19", 2:"Viral Pneumonia", 3:"Lung_Opacity", 4:"Bacterial Pneumonia"}

In [37]:
disease = {0:"Normal", 1:"Covid-19", 2:"Viral Pneumonia", 3:"Lung Opacity", 4:"Bacterial Pneumonia"}

In [38]:
def loadImages1(img_path):
  images = []
  img = cv2.imread(img_path)
  img = img / 255.0
  img = cv2.resize(img, (100, 100))
  images.append(img)
  return np.asarray(images)

In [39]:
path = '/content/Pneumonia_Bacterial_8.jpg'
img = loadImages1(path)
y = model.predict(img)
print(disease[np.argmax(y)])

Bacterial Pneumonia
