In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt
import os
import seaborn as sns
%matplotlib inline
import glob
import PIL
from sklearn.model_selection import train_test_split
from sklearn import svm
from sklearn.model_selection import GridSearchCV
import tensorflow as tf
from tensorflow.keras import regularizers
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import (BatchNormalization, Conv2D, MaxPooling2D, Activation, Flatten, Dropout, Dense)
from tensorflow.keras import backend as K

In [None]:
from google.colab import drive
drive.mount('/content/drive/')

Mounted at /content/drive/


In [None]:
# pixels range from 0 to 255 so dividing by 255 so value lie between 0 and 1 and ML works best with numbers between 0 and 1.
# We split dataset into training and validation category. Model learns from Train data and Model accuracy is checked with Validation data as this is the data model has not train on. (unknown real world image)

normal = glob.glob('../content/drive/MyDrive/cleandata/train/NORMAL/*.*')
pneumonia = glob.glob('../content/drive/MyDrive/cleandata/train/PNEUMONIA/*.*')
tuber = glob.glob('../content/drive/MyDrive/cleandata/train/TURBERCULOSIS/*.*')

normal_test = glob.glob('../content/drive/MyDrive/cleandata/test/NORMAL/*.*')
pneumonia_test = glob.glob('../content/drive/MyDrive/cleandata/test/PNEUMONIA/*.*')
tuber_test = glob.glob('../content/drive/MyDrive/cleandata/test/TURBERCULOSIS/*.*')
 
normal.extend(normal_test)
pneumonia.extend(pneumonia_test)
tuber.extend(tuber_test)


data = []
labels = []


for i in normal:   
    image=tf.keras.preprocessing.image.load_img(i, 
    target_size= (224,224,3))
    image=np.array(image)
    data.append(image)
    labels.append(0)
for i in pneumonia[:566]:   
    image=tf.keras.preprocessing.image.load_img(i, 
    target_size= (224,224,3))
    image=np.array(image)
    data.append(image)
    labels.append(1)
for i in tuber[:566]:   
    image=tf.keras.preprocessing.image.load_img(i, 
    target_size= (224,224,3))
    image=np.array(image)
    data.append(image)
    labels.append(2)

data = np.array(data)
labels = np.array(labels)

In [None]:
len(data)

2707

In [None]:
X_train, X_test, y_train, y_test = train_test_split(data, labels, test_size=0.2,random_state=42,stratify=labels)


In [None]:
weight_decay = 1e-4

num_classes = 3

model = Sequential()

model.add(Conv2D(64, (4,4), padding='same', kernel_regularizer=regularizers.l2(weight_decay), input_shape=(224,224,3)))
model.add(Activation('elu'))
model.add(BatchNormalization())
model.add(Conv2D(64, (4,4), padding='same', kernel_regularizer=regularizers.l2(weight_decay)))
model.add(Activation('elu'))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Dropout(0.2))
 
model.add(Conv2D(128, (4,4), padding='same', kernel_regularizer=regularizers.l2(weight_decay)))
model.add(Activation('elu'))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Dropout(0.3))
 
model.add(Conv2D(128, (4,4), padding='same', kernel_regularizer=regularizers.l2(weight_decay)))
model.add(Activation('elu'))
model.add(BatchNormalization())
model.add(Conv2D(128, (4,4), padding='same', kernel_regularizer=regularizers.l2(weight_decay)))
model.add(Activation('elu'))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Dropout(0.4))
model.add(Flatten())
model.add(Dense(128, activation="linear"))
model.add(Activation('elu'))
model.add(Dense(num_classes, activation='softmax'))


In [None]:
model.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

In [None]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 224, 224, 64)      3136      
                                                                 
 activation (Activation)     (None, 224, 224, 64)      0         
                                                                 
 batch_normalization (BatchN  (None, 224, 224, 64)     256       
 ormalization)                                                   
                                                                 
 conv2d_1 (Conv2D)           (None, 224, 224, 64)      65600     
                                                                 
 activation_1 (Activation)   (None, 224, 224, 64)      0         
                                                                 
 batch_normalization_1 (Batc  (None, 224, 224, 64)     256       
 hNormalization)                                        

In [None]:
y_train_cnn = tf.keras.utils.to_categorical(y_train,3)
y_test_cnn = tf.keras.utils.to_categorical(y_test,3)

In [None]:
y_train_cnn

array([[1., 0., 0.],
       [0., 0., 1.],
       [1., 0., 0.],
       ...,
       [1., 0., 0.],
       [1., 0., 0.],
       [0., 1., 0.]], dtype=float32)

In [None]:
X_train = tf.expand_dims(X_train,axis=-1)

In [None]:
y_train_cnn.shape

(2436, 3)

In [None]:
X_test = tf.expand_dims(X_test,axis=-1)

In [None]:
X_test.shape

TensorShape([271, 224, 224, 3, 1])

In [None]:
checkpointer = EarlyStopping(monitor = 'val_accuracy', verbose = 1, restore_best_weights=True,mode="max",patience = 8)
# checkpointer to monitor accuracy and preventing overfit
# if validation accuracy do not imporve for 15 turns the training will stop.

In [None]:
history = model.fit(
            X_train,
            y_train_cnn,
            epochs=20,
            verbose=1,
            validation_split= 0.2,
            callbacks = [checkpointer])

In [None]:
print(f"Final Train accuracy = {model.evaluate(X_train,y_train_cnn)[1]*100}%")
print(f"Validation accuracy = {model.evaluate(X_test,y_test_cnn)[1]*100}%")

In [None]:
training_accuracy = history.history['accuracy']
val_accuracy = history.history['val_accuracy']

# Create count of the number of epochs
epoch_count = range(1, len(training_accuracy) + 1)

# Visualize loss history

plt.plot(epoch_count, training_accuracy, 'r--')
plt.plot(epoch_count, val_accuracy, 'b-')
plt.legend(['Training Accuracy', 'Val Accuracy'])
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.ylim(top = 1)
plt.show()

In [None]:
from sklearn.metrics import ConfusionMatrixDisplay
from sklearn.metrics import confusion_matrix
prediction = model.predict(X_test)
cm = confusion_matrix(y_test_cnn.argmax(axis=1), prediction.argmax(axis=1))
labels = ["Normal","Pneumonia","Tuberculosis"]
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=labels)
disp.plot(cmap=plt.cm.Blues)
plt.show()