In [None]:
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import tensorflow as tf
import os
import seaborn as sns
import random
from PIL import Image
from sklearn.metrics import classification_report, confusion_matrix
from tensorflow import keras
from tensorflow.keras import  layers, models
from tensorflow.keras.preprocessing.image import ImageDataGenerator

In [None]:
# Define paths to the dataset directories
train_dir = 'venv/chest_xray/train'  # Replace with your actual path
val_dir = 'venv/chest_xray/val'    # Replace with your actual path
test_dir = 'venv/chest_xray/test'  # Replace with your actual path


In [None]:
class_names = ['Normal', 'Pneumonia']

# Define show_sample_images function to visualize sample images from each category

def show_sample_images(train_dir, class_names, samples_per_category=3):
    plt.figure(figsize=(12, 6))
    
    for i, category in enumerate(class_names):
        category_path = os.path.join(train_dir, category)
        images = os.listdir(category_path)
        
        # Pick random samples from category
        sample_images = random.sample(images, samples_per_category)
        
        for j, img_name in enumerate(sample_images):
            img_path = os.path.join(category_path, img_name)
            img = Image.open(img_path)
            
            plt.subplot(len(class_names), samples_per_category, i*samples_per_category + j + 1)
            plt.imshow(img, cmap='gray')
            plt.title(category)
            plt.axis('off')
    
    plt.tight_layout()
    plt.show()

show_sample_images(train_dir,class_names)

In [None]:
# Count the number of images in each category in the training set
counts = {}
for category in class_names:
    category_path = os.path.join(train_dir, category)
    counts[category] = len(os.listdir(category_path))


print(counts)

In [None]:
import matplotlib.pyplot as plt
# Plotting the counts of images per category
plt.figure(figsize=(6,4))
plt.bar(counts.keys(), counts.values(), color=['green', 'red'])
plt.title('Number of Images per Category')
plt.xlabel('Category')
plt.ylabel('Count')
plt.show()


In [None]:
# Set image dimensions and batch size
img_height = 150 
img_width = 150
batch_size = 32
# Define ImageDataGenerator for data augmentation and normalization
train_datagen = ImageDataGenerator(
        rescale=1./255,
        rotation_range=20,
        width_shift_range=0.2,
        height_shift_range=0.2,
        shear_range=0.2,
        zoom_range=0.2,
        horizontal_flip=True,
        fill_mode='nearest'
    )

val_test_datagen = ImageDataGenerator(rescale=1./255)

In [None]:
# Create generators for training, validation, and test datasets
train_generator = train_datagen.flow_from_directory(
        train_dir,
        target_size=(img_height, img_width),
        batch_size=batch_size,
        class_mode='binary'  # 'binary' for two classes (normal/pneumonia)
    )

validation_generator = val_test_datagen.flow_from_directory(
        val_dir,
        target_size=(img_height, img_width),
        batch_size=batch_size,
        class_mode='binary'
    )

test_generator = val_test_datagen.flow_from_directory(
        test_dir,
        target_size=(img_height, img_width),
        batch_size=batch_size,
        class_mode='binary',
        shuffle=False  # Typically set to False for consistent evaluation
    )

In [None]:
print(f'class indices:  {train_generator.class_indices}')

In [None]:
# Define the CNN model architecture
model=models.Sequential([
    layers.Conv2D(32, (3, 3), activation='relu', input_shape=(img_height, img_width, 3)),
    layers.MaxPooling2D((2, 2)),
    layers.Conv2D(64, (3, 3), activation='relu'),
    layers.MaxPooling2D((2, 2)),
    layers.Conv2D(128, (3, 3), activation='relu'),
    layers.MaxPooling2D((2, 2)),
    layers.Flatten(),
    layers.Dense(128, activation='relu'),
    layers.Dense(1, activation='sigmoid')  # Binary classification
])

In [None]:
# Compile the model
model.compile(optimizer='adam',
              loss=tf.keras.losses.binary_crossentropy
                , metrics=['accuracy'])

In [None]:
# Train the model
history = model.fit(
    train_generator,
    validation_data=validation_generator,
    epochs=10
)

In [None]:
# evaluate the model on the test set
results = model.evaluate(test_generator)
print(f"Test Accuracy: {results[1]*100:.2f}%")

In [None]:
# True labels
y_true = test_generator.classes

# Predicted probabilities
y_pred_probs = model.predict(test_generator)

# If using sigmoid activation (binary classifier)
y_pred = (y_pred_probs > 0.5).astype('int32').flatten()


In [None]:
# calculate and print classification report
print(classification_report(y_true, y_pred, target_names=class_names))

In [None]:
# Calculate confusion matrix and plot it
cm = confusion_matrix(y_true, y_pred)

plt.figure(figsize=(6, 5))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
            xticklabels=class_names, yticklabels=class_names)
plt.xlabel('Predicted')
plt.ylabel('True')
plt.title('Confusion Matrix')
plt.show()


In [None]:
# plot_history function to visualize training and validation accuracy and loss
def plot_history(history):
    acc = history.history['accuracy']
    val_acc = history.history['val_accuracy']
    loss = history.history['loss']
    val_loss = history.history['val_loss']

    plt.figure(figsize=(12, 5))

    plt.subplot(1, 2, 1)
    plt.plot(acc, label='Train Accuracy')
    plt.plot(val_acc, label='Val Accuracy')
    plt.legend()
    plt.title('Accuracy')

    plt.subplot(1, 2, 2)
    plt.plot(loss, label='Train Loss')
    plt.plot(val_loss, label='Val Loss')
    plt.legend()
    plt.title('Loss')

    plt.show()

plot_history(history)
