In [57]:
from tensorflow.keras import models ,layers ,optimizers
import seaborn as sns

In [33]:
import os
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications.vgg16 import VGG16, preprocess_input
from tensorflow.keras.layers import GlobalAveragePooling2D, Dense, Dropout
from tensorflow.keras.models import Model
from sklearn.metrics import classification_report, confusion_matrix
import matplotlib.pyplot as plt

In [34]:
data_dir = path+'/Lung X-Ray Image'+'/Lung X-Ray Image'
data_dir

'/kaggle/input/lung-disease/Lung X-Ray Image/Lung X-Ray Image'

In [35]:
os.listdir(data_dir)

['Normal', 'Lung_Opacity', 'Viral Pneumonia']

In [48]:
def load_dataset_from_directory(data_dir, target_size=(224, 224), batch_size=32):

    datagen = ImageDataGenerator(rescale=1./255)
    dataset = datagen.flow_from_directory(
        data_dir,
        target_size=target_size,
        batch_size=batch_size,
        class_mode='categorical'
    )
    return dataset

dataset = load_dataset_from_directory(data_dir)

Found 3475 images belonging to 3 classes.


In [37]:
dataset.class_indices

{'Lung_Opacity': 0, 'Normal': 1, 'Viral Pneumonia': 2}

In [52]:
dataset.class_mode

'categorical'

In [49]:
dataset.image_shape

(224, 224, 3)

In [51]:
dataset.num_batches

109

In [41]:
#datagen = ImageDataGenerator(rescale=1./255, validation_split=0.2)
target_size=(224, 224)
batch_size=32
EPOCHS = 20
train_datagen= ImageDataGenerator(
    preprocessing_function=preprocess_input,
    rotation_range=10,
    width_shift_range=0.1,
    height_shift_range=0.1,
    horizontal_flip=True
)
val_datagen= ImageDataGenerator(preprocessing_function=preprocess_input)
train_gen= train_datagen.flow_from_directory(
        data_dir,
        target_size=target_size,
        batch_size=batch_size,
        class_mode='categorical' ,
    )
val_gen= val_datagen.flow_from_directory(
        data_dir,
        target_size=target_size,
        batch_size=batch_size,
        class_mode='categorical' ,
    )
NUM_CLASSES = train_gen.num_classes
CLASS_NAMES = list(train_gen.class_indices.keys())


Found 3475 images belonging to 3 classes.
Found 3475 images belonging to 3 classes.


In [42]:
def build_cnn():
    m = models.Sequential([
        layers.Input((*target_size, 3)),
        layers.Conv2D(32, 3, activation='relu'), layers.MaxPool2D(),
        layers.Conv2D(64, 3, activation='relu'), layers.MaxPool2D(),
        layers.Conv2D(128, 3, activation='relu'), layers.MaxPool2D(),
        layers.Flatten(), layers.Dense(256, activation='relu'), layers.Dropout(0.5),
        layers.Dense(NUM_CLASSES, activation='softmax')
    ])
    m.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    return m

In [43]:
def build_vgg():
    base = VGG16(weights='imagenet', include_top=False, input_shape=(*target_size,3))
    base.trainable = False
    x = layers.GlobalAveragePooling2D()(base.output)
    x = layers.Dense(512, activation='relu')(x)
    x = layers.Dropout(0.5)(x)
    out = layers.Dense(NUM_CLASSES, activation='softmax')(x)
    m = models.Model(inputs=base.input, outputs=out)
    m.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    return m, base

In [46]:
cnn = build_cnn()
vgg, vgg_base = build_vgg()

callbacks = [
    tf.keras.callbacks.ReduceLROnPlateau('val_loss', factor=0.2, patience=3),
    tf.keras.callbacks.EarlyStopping('val_loss', patience=5, restore_best_weights=True)
]

print("Training CNN from scratch")
history_cnn = cnn.fit(train_gen, epochs=EPOCHS, validation_data=val_gen, callbacks=callbacks)

print("Training VGG16 transfer learning")
history_vgg = vgg.fit(train_gen, epochs=EPOCHS, validation_data=val_gen, callbacks=callbacks)

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/vgg16/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m58889256/58889256[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
Training CNN from scratch


  self._warn_if_super_not_called()


Epoch 1/20
[1m109/109[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m74s[0m 608ms/step - accuracy: 0.5835 - loss: 41.7336 - val_accuracy: 0.7652 - val_loss: 0.5349 - learning_rate: 0.0010
Epoch 2/20
[1m109/109[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m49s[0m 449ms/step - accuracy: 0.7384 - loss: 0.6182 - val_accuracy: 0.7528 - val_loss: 0.5484 - learning_rate: 0.0010
Epoch 3/20
[1m109/109[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m49s[0m 452ms/step - accuracy: 0.7393 - loss: 0.5896 - val_accuracy: 0.8176 - val_loss: 0.4306 - learning_rate: 0.0010
Epoch 4/20
[1m109/109[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m49s[0m 453ms/step - accuracy: 0.7838 - loss: 0.5165 - val_accuracy: 0.8204 - val_loss: 0.4108 - learning_rate: 0.0010
Epoch 5/20
[1m109/109[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m50s[0m 463ms/step - accuracy: 0.7960 - loss: 0.5062 - val_accuracy: 0.8403 - val_loss: 0.3950 - learning_rate: 0.0010
Epoch 6/20
[1m109/109[0m [32m━━━━━━━━━━━━━━━━━━━━[

In [58]:
vgg_base.trainable = True
for layer in vgg_base.layers[:-4]:
    layer.trainable = False

vgg.compile(optimizer=optimizers.Adam(1e-5),
            loss='categorical_crossentropy', metrics=['accuracy'])
fine_hist = vgg.fit(train_gen, epochs=10, validation_data=val_gen, callbacks=callbacks)

Epoch 1/10
[1m109/109[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m75s[0m 637ms/step - accuracy: 0.9137 - loss: 0.2242 - val_accuracy: 0.9306 - val_loss: 0.1778 - learning_rate: 1.0000e-05
Epoch 2/10
[1m109/109[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m65s[0m 600ms/step - accuracy: 0.9356 - loss: 0.1753 - val_accuracy: 0.9430 - val_loss: 0.1447 - learning_rate: 1.0000e-05
Epoch 3/10
[1m109/109[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m82s[0m 598ms/step - accuracy: 0.9387 - loss: 0.1571 - val_accuracy: 0.9419 - val_loss: 0.1385 - learning_rate: 1.0000e-05
Epoch 4/10
[1m109/109[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m65s[0m 599ms/step - accuracy: 0.9468 - loss: 0.1456 - val_accuracy: 0.9540 - val_loss: 0.1119 - learning_rate: 1.0000e-05
Epoch 5/10
[1m109/109[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m65s[0m 594ms/step - accuracy: 0.9442 - loss: 0.1349 - val_accuracy: 0.9623 - val_loss: 0.1029 - learning_rate: 1.0000e-05
Epoch 6/10
[1m109/109[0m [32m━━━

In [None]:
def eval_model(m, name):
    preds = m.predict(val_gen)
    y_true = val_gen.classes
    y_pred = np.argmax(preds, axis=1)
    print(f"=== {name} Classification Report ===")
    print(classification_report(y_true, y_pred, target_names=CLASS_NAMES))
    cm = confusion_matrix(y_true, y_pred)
    plt.figure(figsize=(6,5))
    sns.heatmap(cm, annot=True, fmt="d", xticklabels=CLASS_NAMES, yticklabels=CLASS_NAMES)
    plt.title(name); plt.xlabel('Predicted'); plt.ylabel('Actual'); plt.show()

eval_model(cnn, "CNN Scratch")
eval_model(vgg, "VGG16 Transfer (Fine-tuned)")