# Lung Prediction Project

### Project Scope:
In this project we are looking at images of x-ray scans of 3 different lung types(Normal, Covid, and Pneunomia).   
I will perform a CNN to see if I can make accurate predictions on the test set of images. 

### Project Steps:
- Import librarys 
- Seperate the training data and test data
- build the CNN model 
- Set up the metrics and optimizations 
- Train the model 
- Test the model


In [5]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.preprocessing.image import ImageDataGenerator

from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix

from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras import layers

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd





# Train and Test Setup

In [6]:
# create train image generator with a scale of values from 0-255
generator = ImageDataGenerator(rescale=1./255,zoom_range=0.2,
        rotation_range=15,
        width_shift_range=0.05,
        height_shift_range=0.05)

batch_size= 16
# get file from training directory
train_iterator = generator.flow_from_directory('Covid19-dataset/train', class_mode = 'categorical', color_mode = 'grayscale', batch_size=batch_size, target_size=(256,256))
# create test image generator with a scale of values from 0-255
test_generator = generator = ImageDataGenerator(rescale=1./255)
# get file from test directory
test_iterator = test_generator.flow_from_directory('Covid19-dataset/test', class_mode = 'categorical', color_mode = 'grayscale', batch_size=batch_size, target_size=(256,256))
# get the input and labels information
sample_batch_input,sample_batch_labels  = train_iterator.next()
# pring the shapes of the input and labels
print(sample_batch_input.shape, sample_batch_labels.shape)

Found 251 images belonging to 3 classes.
Found 78 images belonging to 3 classes.
(16, 256, 256, 1) (16, 3)


# Model Building

In [43]:
# Create a sequential model
model = Sequential()
# Add an input layer with a specified shape
model.add(layers.Input(shape=(256, 256, 1)))
# Add a convolutional layer with 2 filters, 5x5 kernel, ReLU activation, valid padding, and stride of 2
model.add(layers.Conv2D(2, 5, activation='relu', padding='valid', strides=2))
# Add a max pooling layer with a pool size of 5x5 and stride of 2 in both dimensions
model.add(layers.MaxPooling2D(pool_size=(5, 5), strides=(2, 2)))
# Add another convolutional layer with 4 filters, 3x3 kernel, ReLU activation, valid padding, and stride of 2
model.add(layers.Conv2D(4, 3, activation='relu', padding='valid', strides=2))
# Add another max pooling layer with a pool size of 2x2 and stride of 2 in both dimensions
model.add(layers.MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))
# Flatten the output from the previous layer to prepare for fully connected layers
model.add(layers.Flatten())
# Add a dense layer with 200 units and ReLU activation function
model.add(layers.Dense(200, activation='relu'))
# Add the output layer with 3 units (for classification) and softmax activation
model.add(layers.Dense(3, activation='softmax'))


# Optimization and Model Metrics

In [8]:
# set the optimizer of the model and the optimizers learning rate
opt = tf.keras.optimizers.Adam(learning_rate=0.005)

# compile the model using categorical crossentropy because we have more than two output labels. 
model.compile(optimizer=opt , loss='categorical_crossentropy', metrics=[tf.keras.metrics.CategoricalAccuracy(), tf.keras.metrics.AUC()])
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 126, 126, 2)       52        
                                                                 
 max_pooling2d (MaxPooling2  (None, 61, 61, 2)         0         
 D)                                                              
                                                                 
 conv2d_1 (Conv2D)           (None, 30, 30, 4)         76        
                                                                 
 max_pooling2d_1 (MaxPoolin  (None, 15, 15, 4)         0         
 g2D)                                                            
                                                                 
 flatten (Flatten)           (None, 900)               0         
                                                                 
 dense (Dense)               (None, 200)               1

# Model Training

In [21]:
# stop the model early if it gets a low loss value on the validation data
early_stop = EarlyStopping(monitor='val_loss', patience=5)

# save the model at the lowest loss on the validation data
checkpoint_path = 'Covid19-dataset/model/checkpoint-best-accuracy'
model_checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
    filepath = checkpoint_path,
    monitor='val_loss',
    mode='min',
    save_best_only=True,
)

# fit the model 
model.fit(train_iterator, 
        steps_per_epoch = train_iterator.samples/batch_size, 
        epochs = 50, batch_size=batch_size, 
        validation_data = test_iterator,
        validation_steps = test_iterator.samples/batch_size,
        callbacks = [early_stop, model_checkpoint_callback]
)



Epoch 1/50


INFO:tensorflow:Assets written to: Covid19-dataset/model\checkpoint-best-accuracy\assets


Epoch 2/50


INFO:tensorflow:Assets written to: Covid19-dataset/model\checkpoint-best-accuracy\assets


Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50


<keras.src.callbacks.History at 0x1e81ccdc2e0>

# Model Evaluation on Test Data

In [19]:
# evaluate the loss, accuracy, AUC
loss, acc, auc = model.evaluate(test_iterator)
print('Test accuracy:', acc)
# model.save('Covid19-dataset/model') # if you want to save the model uncomment and set your directory location

# see how the model is labeling the classes so we can know how to look at the predictions
classes = train_iterator.class_indices
print(classes)

Test accuracy: 0.9358974099159241
{'Covid': 0, 'Normal': 1, 'Pneumonia': 2}


# Load Model and Test New Unseen Data

In [41]:
from tensorflow.keras.preprocessing import image
from tensorflow.keras.applications.vgg16 import preprocess_input
import numpy as np

# Load the model
model = tf.keras.models.load_model('Covid19-dataset/model')

# evaluate the loss, accuracy, AUC 
loss, acc, auc = model.evaluate(test_iterator)
print('Test accuracy:', acc)
print('Test AUC Score: ', auc)
print('LOSS Score: ', loss)


# Specify the path to the image file
img_path = "Covid19-dataset/test/Normal/1.png"
# Load the image, resizing it to the target size of (256, 256) and using grayscale color mode
img = image.load_img(img_path, target_size=(256, 256), color_mode='grayscale')
# Convert the image to a NumPy array
img_array = image.img_to_array(img)
# Add an extra dimension to the array to represent the batch size (1 in this case)
img_array = np.expand_dims(img_array, axis=0)
# Normalize pixel values to the range [0, 1]
img_array = img_array / 255.0


# predict what the image is 
predictions = model.predict(img_array)

# label the classes so we know what we are predicting 
class_labels = ['Covid', 'Normal', 'Pneumonia'] 
# Find the index with the highest predicted probability in the array
predicted_class_index = np.argmax(predictions)
# Map the predicted index to the corresponding class label using class_labels
predicted_class_label = class_labels[predicted_class_index]


# print the labeld prediction
print("Predicted class:", predicted_class_label)
print("Raw predictions:", predictions)


Test accuracy: 1.0
AUC Score:  1.0
LOSS Score:  0.02254110760986805
Predicted class: Normal
Raw predictions: [[1.1007418e-06 9.9971598e-01 2.8286045e-04]]
