**Benchmark Model: CNN**

CNN model used to benchmark the performance of our model with.

Alex G

Dave T

James P

Mike T

In [1]:
import tensorflow as tf
import numpy as np
import os
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix, classification_report

# Set the path to the dataset directory
dataset_dir = '/content/drive/MyDrive/ML Capstone Project/chest_xray'

# Define image size and batch size
img_size = (150, 150)
batch_size = 32

# Data augmentation and preprocessing for training and validation
train_datagen = tf.keras.preprocessing.image.ImageDataGenerator(
    rescale=1./255,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True
)

test_datagen = tf.keras.preprocessing.image.ImageDataGenerator(rescale=1./255)

train_generator = train_datagen.flow_from_directory(
    os.path.join(dataset_dir, 'train'),
    target_size=img_size,
    batch_size=batch_size,
    class_mode='binary'
)

validation_generator = test_datagen.flow_from_directory(
    os.path.join(dataset_dir, 'val'),
    target_size=img_size,
    batch_size=batch_size,
    class_mode='binary'
)

# Build the CNN model
model = tf.keras.models.Sequential([
    tf.keras.layers.Conv2D(32, (3, 3), activation='relu', input_shape=(img_size[0], img_size[1], 3)),
    tf.keras.layers.MaxPooling2D((2, 2)),
    tf.keras.layers.Conv2D(64, (3, 3), activation='relu'),
    tf.keras.layers.MaxPooling2D((2, 2)),
    tf.keras.layers.Conv2D(128, (3, 3), activation='relu'),
    tf.keras.layers.MaxPooling2D((2, 2)),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(1, activation='sigmoid')  # Using sigmoid for binary classification
])

model.summary()

# Compile the model
model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])

# Train the model
history = model.fit(
    train_generator,
    steps_per_epoch=train_generator.n // batch_size,
    epochs=10,
    validation_data=validation_generator,
    validation_steps=validation_generator.n // batch_size
)




KeyboardInterrupt: ignored

In [None]:
# Evaluate the model
test_generator = test_datagen.flow_from_directory(
    os.path.join(dataset_dir, 'test'),
    target_size=img_size,
    batch_size=1,
    shuffle=False,
    class_mode='binary'
)

test_labels = test_generator.classes
test_predictions = model.predict(test_generator, steps=test_generator.n)

# Convert probabilities to binary predictions (0 or 1)
test_predictions = np.round(test_predictions).flatten()

# Calculate evaluation metrics
accuracy = np.mean(test_labels == test_predictions)
confusion = confusion_matrix(test_labels, test_predictions)
recall = confusion[1, 1] / (confusion[1, 0] + confusion[1, 1])

print("Accuracy:", accuracy)
print("Recall:", recall)
print("Confusion Matrix:")
print(confusion)


Found 624 images belonging to 2 classes.
Accuracy: 0.7115384615384616
Recall: 0.9974358974358974
Confusion Matrix:
[[ 55 179]
 [  1 389]]
