In [1]:
# Import Libraries:
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import VGG16
from tensorflow.keras import layers
from tensorflow.keras import Model
from tensorflow.keras.preprocessing import image
import numpy as np

In [2]:
# Set Dataset Paths, Image Dimensions and Batch Size:

# Set the paths to your dataset
train_dir = '/content/train.zip'
test_dir = '/content/test.zip'
val_dir = '/content/val.zip'

# Image dimensions and batch size
image_size = (224, 224)
batch_size = 32

In [11]:
import os

# List the contents of the unzipped training directory
train_dir = '/content/train'
print(os.listdir(train_dir))

# If there are subdirectories, list their contents as well
for item in os.listdir(train_dir):
    item_path = os.path.join(train_dir, item)
    if os.path.isdir(item_path):
        print(f"\nContents of {item}:")
        print(os.listdir(item_path))

['train']

Contents of train:
['NORMAL', 'PNEUMONIA']


In [12]:
import zipfile
import os

# Define a function to unzip files
def unzip_data(zip_path, extract_path):
    if not os.path.exists(extract_path):
        with zipfile.ZipFile(zip_path, 'r') as zip_ref:
            zip_ref.extractall(extract_path)
        print(f"Unzipped {zip_path} to {extract_path}")
    else:
        print(f"{extract_path} already exists. Skipping unzipping.")

# Unzip the datasets
unzip_data(train_dir, '/content/train')
unzip_data(test_dir, '/content/test')
unzip_data(val_dir, '/content/val')

# Update the directory paths to the unzipped directories
train_dir = '/content/train/train'
test_dir = '/content/test/test'
val_dir = '/content/val/val'

/content/train already exists. Skipping unzipping.
/content/test already exists. Skipping unzipping.
/content/val already exists. Skipping unzipping.


In [3]:
# Data augmentation for the training dataset
train_datagen = ImageDataGenerator(
    rescale=1.0/255,
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

In [13]:
# Preprocess and augment the training data
train_datagen = ImageDataGenerator(
    rescale=1.0/255,
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

# Preprocess and augment the training data
train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=image_size,
    batch_size=batch_size,
    class_mode='binary'
)

Found 5216 images belonging to 2 classes.


In [14]:
# Data preprocessing for the validation dataset (no augmentation)
val_datagen = ImageDataGenerator(rescale=1.0/255)

# Preprocess the validation data
val_generator = val_datagen.flow_from_directory(
    val_dir,
    target_size=image_size,
    batch_size=batch_size,
    class_mode='binary'
)

# Data preprocessing for the test dataset (no augmentation)
test_datagen = ImageDataGenerator(rescale=1.0/255)

# Preprocess the test data
test_generator = test_datagen.flow_from_directory(
    test_dir,
    target_size=image_size,
    batch_size=batch_size,
    class_mode='binary'
)

Found 16 images belonging to 2 classes.
Found 624 images belonging to 2 classes.


In [8]:
# Creating & compliling the CNN model:
# Load the VGG16 model with pre-trained weights, excluding the top classification layer
base_model = VGG16(weights='imagenet', include_top=False, input_shape=(image_size[0], image_size[1], 3))

# Freeze the layers in the base model so they are not trained
for layer in base_model.layers:
    layer.trainable = False

# Add custom layers on top of the base model
x = layers.Flatten()(base_model.output)
x = layers.Dense(256, activation='relu')(x)
x = layers.Dropout(0.5)(x) # Add dropout for regularization
predictions = layers.Dense(1, activation='sigmoid')(x) # Output layer for binary classification

# Create the full model
model = Model(inputs=base_model.input, outputs=predictions)

# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Display the model summary
model.summary()

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/vgg16/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m58889256/58889256[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


In [17]:
# Train the model
epochs = 10 # You can adjust the number of epochs

history = model.fit(
    train_generator,
    steps_per_epoch=train_generator.samples // batch_size,
    epochs=epochs,
    validation_data=val_generator,
    validation_steps=val_generator.samples // batch_size if val_generator.samples >= batch_size else val_generator.samples
)

Epoch 1/10
[1m163/163[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m103s[0m 630ms/step - accuracy: 0.8996 - loss: 0.2388 - val_accuracy: 0.6875 - val_loss: 0.5570
Epoch 2/10




[1m163/163[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m143s[0m 638ms/step - accuracy: 0.8983 - loss: 0.2387 - val_accuracy: 0.7500 - val_loss: 0.3526
Epoch 3/10
[1m163/163[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m140s[0m 627ms/step - accuracy: 0.8985 - loss: 0.2412 - val_accuracy: 0.6875 - val_loss: 0.9246
Epoch 4/10
[1m163/163[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m103s[0m 630ms/step - accuracy: 0.8968 - loss: 0.2523 - val_accuracy: 0.6875 - val_loss: 0.5482
Epoch 5/10
[1m163/163[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m103s[0m 629ms/step - accuracy: 0.8991 - loss: 0.2298 - val_accuracy: 0.8125 - val_loss: 0.3076
Epoch 6/10
[1m163/163[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m142s[0m 628ms/step - accuracy: 0.9073 - loss: 0.2247 - val_accuracy: 0.8125 - val_loss: 0.3575
Epoch 7/10
[1m163/163[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m103s[0m 633ms/step - accuracy: 0.8975 - loss: 0.2446 - val_accuracy: 0.6875 - val_loss: 0.6009
Epoch 8/10
[1m

In [18]:
# Evaluate the model on the test dataset
test_loss, test_acc = model.evaluate(test_generator)

# Print the test loss and test accuracy
print(f"Test Loss: {test_loss}")
print(f"Test Accuracy: {test_acc}")

  self._warn_if_super_not_called()


[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 280ms/step - accuracy: 0.9204 - loss: 0.3672
Test Loss: 0.35740140080451965
Test Accuracy: 0.9118589758872986


In [20]:
# Save the model
model.save('/kaggle/working/cnn_model.h5')





In [22]:
# Test prediction:
# Load the trained model
model = tf.keras.models.load_model('/kaggle/working/cnn_model.h5')

# Load an example image for prediction
image_path = '/content/val/val/PNEUMONIA/person1949_bacteria_4880.jpeg'
img = image.load_img(image_path, target_size=(224, 224))
img = image.img_to_array(img)
img = np.expand_dims(img, axis=0)

# Make prediction
predictions = model.predict(img)

# Interpret the prediction
if predictions[0] < 0.5:
    print("The image is NORMAL.")
else:
    print("The image indicates PNEUMONIA.")



[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step
The image indicates PNEUMONIA.
