<a href="https://colab.research.google.com/github/ElahehBeheshti/DataMining/blob/main/DMfinal_ChestXray.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
# Step 1: Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

# Step 2: Import necessary libraries
import os

# Step 3: Define the correct paths to the folders
dataset_path = '/content/drive/My Drive/Chest-Xray'  # Update this with your correct path
folders = {
    'test': ['NORMAL', 'PNEUMONIA'],
    'train': ['NORMAL', 'PNEUMONIA'],
    'val': ['NORMAL', 'PNEUMONIA']
}

# Step 4: Function to count JPEG images in a folder
def count_jpeg_images(folder_path):
    count = 0
    if os.path.exists(folder_path):  # Check if the folder path exists
        for filename in os.listdir(folder_path):
            if filename.lower().endswith('.jpeg') or filename.lower().endswith('.jpg'):
                count += 1
    else:
        print(f"Folder not found: {folder_path}")
    return count

# Step 5: Loop through each folder and count images
for main_folder, subfolders in folders.items():
    for subfolder in subfolders:
        folder_path = os.path.join(dataset_path, main_folder, subfolder)
        num_images = count_jpeg_images(folder_path)
        print(f"Number of JPEG images in {main_folder}/{subfolder}: {num_images}")


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Number of JPEG images in test/NORMAL: 234
Number of JPEG images in test/PNEUMONIA: 390
Number of JPEG images in train/NORMAL: 1341
Number of JPEG images in train/PNEUMONIA: 3875
Number of JPEG images in val/NORMAL: 8
Number of JPEG images in val/PNEUMONIA: 8


**Data Preprocessing**

Resize and Normalize Images: Use TensorFlow or OpenCV to resize images to 128x128 pixels and normalize pixel values.
Data Augmentation: Set up data augmentation using ImageDataGenerator in Keras to generate more diverse training data.

In [4]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Data augmentation setup
train_datagen = ImageDataGenerator(
    rescale=1./255,  # Normalize pixel values
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

# Apply to training data
train_generator = train_datagen.flow_from_directory(
    os.path.join(dataset_path, 'train'),
    target_size=(128, 128),
    batch_size=32,
    class_mode='binary'
)


Found 5216 images belonging to 2 classes.


**Model Development**

Load Pre-trained Models: Use DenseNet121 and EfficientNetB4 for transfer learning.

We can customizing it for your binary classification task.

In [5]:
from tensorflow.keras.applications import DenseNet121
from tensorflow.keras import layers, models

# Load pre-trained DenseNet121
base_model = DenseNet121(weights='imagenet', include_top=False, input_shape=(128, 128, 3))
base_model.trainable = False  # Freeze the base model

# Add custom layers
model = models.Sequential([
    base_model,
    layers.GlobalAveragePooling2D(),
    layers.Dense(256, activation='relu'),
    layers.Dropout(0.5),
    layers.Dense(1, activation='sigmoid')  # Binary classification
])

# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])


Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/densenet/densenet121_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m29084464/29084464[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


**Train the Model**

***Set Class Weights: If there is class imbalance, calculate and use class weights.***

Fit the Model: Use the model.fit() function to start training.

Calculate Class Weights:

Suppose you have a list of class labels for all images in your training

dataset (0 for "Normal" and 1 for "Pneumonia"). You can generate this list by

examining your dataset.

In [6]:
import numpy as np
from sklearn.utils import class_weight  # Import class_weight

# Example: Assuming these are the total number of images in each class
num_normal_images = 1341  # Total number of Normal images in the training set
num_pneumonia_images = 3875  # Total number of Pneumonia images in the training set

# Create a list of labels for the training set
labels = np.array([0] * num_normal_images + [1] * num_pneumonia_images)

# Calculate class weights
class_weights = class_weight.compute_class_weight(
    class_weight='balanced',
    classes=np.unique(labels),
    y=labels
)

# Convert to a dictionary
class_weights = {0: class_weights[0], 1: class_weights[1]}

print(class_weights)  # Output the class weights


{0: 1.9448173005219984, 1: 0.6730322580645162}


Creating a Validation Data Generator


In [7]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Create an instance of ImageDataGenerator for validation data (only rescaling, no augmentation)
val_datagen = ImageDataGenerator(rescale=1./255)

# Define the validation data generator
val_generator = val_datagen.flow_from_directory(
    os.path.join(dataset_path, 'val'),  # Path to the validation data
    target_size=(128, 128),  # Resize images to 128x128
    batch_size=32,  # You can adjust the batch size as needed
    class_mode='binary'  # Use 'binary' for binary classification
)


Found 16 images belonging to 2 classes.


Updated Model Training Code


In [8]:
# Train the model with class weights and the validation generator
history = model.fit(
    train_generator,
    epochs=10,
    class_weight=class_weights,  # Apply class weights
    validation_data=val_generator  # Use the validation data generator
)


Epoch 1/10


  self._warn_if_super_not_called()


[1m163/163[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m517s[0m 3s/step - accuracy: 0.7988 - loss: 0.4942 - val_accuracy: 0.9375 - val_loss: 0.2602
Epoch 2/10
[1m163/163[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m390s[0m 2s/step - accuracy: 0.9016 - loss: 0.2433 - val_accuracy: 0.8750 - val_loss: 0.3104
Epoch 3/10
[1m163/163[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m436s[0m 2s/step - accuracy: 0.9096 - loss: 0.2305 - val_accuracy: 0.7500 - val_loss: 0.4172
Epoch 4/10
[1m163/163[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m384s[0m 2s/step - accuracy: 0.9022 - loss: 0.2320 - val_accuracy: 0.8125 - val_loss: 0.3303
Epoch 5/10
[1m163/163[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m439s[0m 2s/step - accuracy: 0.9152 - loss: 0.2122 - val_accuracy: 0.8125 - val_loss: 0.3604
Epoch 6/10
[1m163/163[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m374s[0m 2s/step - accuracy: 0.9183 - loss: 0.1974 - val_accuracy: 0.6250 - val_loss: 0.4790
Epoch 7/10
[1m163/163[0m [32m━