# Lung detection using VGG16

### Import Libs

In [1]:
import tensorflow as tf
from tensorflow.keras.applications import VGG16, VGG19
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Flatten
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import os

### Create VGG16 deep learning model 

In [None]:
# 1. Create VGG16 transfer learning model for binary classification
def create_vgg16_binary_model():
    base_model = VGG16(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
    x = base_model.output
    x = Flatten()(x)
    x = Dense(256, activation='relu')(x) 
    predictions = Dense(1, activation='sigmoid')(x) # Binary class ---> sigmoid or multi class ---> softmax
    
    model = Model(inputs=base_model.input, outputs=predictions)
    
    for layer in base_model.layers:
        layer.trainable = False # Frezzing for a model
    
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    return model

In [None]:
# 2. Create training data generator with augmentation
def create_train_generator(data_dir, batch_size=32):
    train_datagen = ImageDataGenerator(
        rotation_range=20,
        width_shift_range=0.2,
        height_shift_range=0.2,
        shear_range=0.15,
        zoom_range=0.2,
        horizontal_flip=True,
        fill_mode='nearest',
        preprocessing_function=tf.keras.applications.vgg16.preprocess_input
    )
    
    train_generator = train_datagen.flow_from_directory(
        data_dir,
        target_size=(224, 224),
        batch_size=batch_size,
        class_mode='binary'
    )
    return train_generator

In [None]:
# 3. Create test data generator (no augmentation, same data for evaluation)
def create_test_generator(data_dir, batch_size=32):
    test_datagen = ImageDataGenerator(
        preprocessing_function=tf.keras.applications.vgg16.preprocess_input
    )
    
    test_generator = test_datagen.flow_from_directory(
        data_dir,
        target_size=(224, 224),
        batch_size=batch_size,
        class_mode='binary',
        shuffle=False
    )
    return test_generator

In [None]:
# 4. Main function to run the pipeline
def main():
    # Define path to your data
    data_dir = 'Augmented IQ-OTHNCCD lung cancer dataset'  # Directory containing Malignant/ and Normal/ folders
    
    # Verify data exists
    if not os.path.exists(os.path.join(data_dir, 'Malignant cases')) or not os.path.exists(os.path.join(data_dir, 'Normal cases')):
        raise FileNotFoundError("Malignant/ or Normal/ folder not found in the specified data_dir")
    
    # Create generators
    train_generator = create_train_generator(data_dir, batch_size=32)
    test_generator = create_test_generator(data_dir, batch_size=32)
    
    # Create and train model
    model = create_vgg16_binary_model()
    model.fit(
        train_generator,
        epochs=5,
        validation_data=test_generator
    )
    
    # Evaluate model
    loss, accuracy = model.evaluate(test_generator)
    print(f"Test Loss: {loss:.4f}, Test Accuracy: {accuracy:.4f}")
    
    return model

# Uncomment to run
if __name__ == "__main__":
    model = main()

Found 2409 images belonging to 2 classes.


Found 2409 images belonging to 2 classes.
Epoch 1/5
[1m76/76[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m214s[0m 3s/step - accuracy: 0.7959 - loss: 5.5042 - val_accuracy: 0.9697 - val_loss: 0.1264
Epoch 2/5
[1m76/76[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m224s[0m 3s/step - accuracy: 0.9390 - loss: 0.3279 - val_accuracy: 0.9680 - val_loss: 0.1254
Epoch 3/5
[1m76/76[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m232s[0m 3s/step - accuracy: 0.9566 - loss: 0.1767 - val_accuracy: 0.9788 - val_loss: 0.0678
Epoch 4/5
[1m76/76[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m220s[0m 3s/step - accuracy: 0.9628 - loss: 0.1466 - val_accuracy: 0.9792 - val_loss: 0.0671
Epoch 5/5
[1m76/76[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m243s[0m 3s/step - accuracy: 0.9646 - loss: 0.1325 - val_accuracy: 0.9759 - val_loss: 0.0838
[1m76/76[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m130s[0m 2s/step - accuracy: 0.9921 - loss: 0.0293
Test Loss: 0.0838, Test Accuracy: 0.9759


## VGG 19

In [3]:
# 1. Create VGG16 transfer learning model for binary classification
def create_vgg19_binary_model():
    base_model = VGG19(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
    x = base_model.output
    x = Flatten()(x)
    x = Dense(256, activation='relu')(x) 
    predictions = Dense(1, activation='sigmoid')(x) # Binary class ---> sigmoid or multi class ---> softmax
    
    model = Model(inputs=base_model.input, outputs=predictions)
    
    for layer in base_model.layers:
        layer.trainable = False # Frezzing for a model
    
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    return model

In [4]:
# 2. Create training data generator with augmentation
def create_train_generator(data_dir, batch_size=32):
    train_datagen = ImageDataGenerator(
        rotation_range=20,
        width_shift_range=0.2,
        height_shift_range=0.2,
        shear_range=0.15,
        zoom_range=0.2,
        horizontal_flip=True,
        fill_mode='nearest',
        preprocessing_function=tf.keras.applications.vgg16.preprocess_input
    )
    
    train_generator = train_datagen.flow_from_directory(
        data_dir,
        target_size=(224, 224),
        batch_size=batch_size,
        class_mode='binary'
    )
    return train_generator

In [5]:
# 3. Create test data generator (no augmentation, same data for evaluation)
def create_test_generator(data_dir, batch_size=32):
    test_datagen = ImageDataGenerator(
        preprocessing_function=tf.keras.applications.vgg16.preprocess_input
    )
    
    test_generator = test_datagen.flow_from_directory(
        data_dir,
        target_size=(224, 224),
        batch_size=batch_size,
        class_mode='binary',
        shuffle=False
    )
    return test_generator

In [None]:
# 4. Main function to run the pipeline
def main():
    # Define path to your data
    data_dir = 'The IQ-OTHNCCD lung cancer dataset'  # Directory containing Malignant/ and Normal/ folders
    
    # Verify data exists
    if not os.path.exists(os.path.join(data_dir, 'Malignant cases')) or not os.path.exists(os.path.join(data_dir, 'Normal cases')):
        raise FileNotFoundError("Malignant/ or Normal/ folder not found in the specified data_dir")
    
    # Create generators
    train_generator = create_train_generator(data_dir, batch_size=32)
    test_generator = create_test_generator(data_dir, batch_size=32)
    
    # Create and train model
    model = create_vgg19_binary_model()
    model.fit(
        train_generator,
        epochs=5,
        validation_data=test_generator
    )
    
    # Evaluate model
    loss, accuracy = model.evaluate(test_generator)
    print(f"Test Loss: {loss:.4f}, Test Accuracy: {accuracy:.4f}")
    
    return model

# Uncomment to run
if __name__ == "__main__":
    model = main()

Found 977 images belonging to 2 classes.


Found 977 images belonging to 2 classes.
Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/vgg19/vgg19_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m80134624/80134624[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m47s[0m 1us/step
Epoch 1/5


  self._warn_if_super_not_called()


[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m113s[0m 4s/step - accuracy: 0.8038 - loss: 6.0691 - val_accuracy: 0.9806 - val_loss: 0.4536
Epoch 2/5
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m114s[0m 4s/step - accuracy: 0.9518 - loss: 1.3042 - val_accuracy: 0.9754 - val_loss: 0.4528
Epoch 3/5
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m117s[0m 4s/step - accuracy: 0.9663 - loss: 0.5984 - val_accuracy: 0.9826 - val_loss: 0.3180
Epoch 4/5
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m118s[0m 4s/step - accuracy: 0.9723 - loss: 0.5527 - val_accuracy: 0.9846 - val_loss: 0.3118
Epoch 5/5
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m118s[0m 4s/step - accuracy: 0.9783 - loss: 0.3464 - val_accuracy: 0.9898 - val_loss: 0.1193
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m58s[0m 2s/step - accuracy: 0.9888 - loss: 0.1292
Test Loss: 0.1193, Test Accuracy: 0.9898


: 