In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
import os
import shutil
import numpy as np
from sklearn.metrics import classification_report
from sklearn.ensemble import RandomForestClassifier
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import os
import numpy as np

In [4]:
# Define data directory
data_dir = "drive/MyDrive/NewLungData"

# Define mean and standard deviation for normalization
mean = np.array([0.485, 0.456, 0.406])
std = np.array([0.229, 0.224, 0.225])

# Define image size
image_size = (256, 256)

# Define batch size
batch_size = 32

# Define data generators for train, validation, and test sets
train_datagen = ImageDataGenerator(
    rescale=1./255
)

val_datagen = ImageDataGenerator(rescale=1./255)
test_datagen = ImageDataGenerator(rescale=1./255)

train_generator = train_datagen.flow_from_directory(
    os.path.join(data_dir, 'train'),
    target_size=image_size,
    batch_size=batch_size,
    class_mode='categorical'
)

val_generator = val_datagen.flow_from_directory(
    os.path.join(data_dir, 'val'),
    target_size=image_size,
    batch_size=batch_size,
    class_mode='categorical'
)

test_generator = test_datagen.flow_from_directory(
    os.path.join(data_dir, 'test'),
    target_size=image_size,
    batch_size=batch_size,
    class_mode='categorical'
)

Found 5641 images belonging to 2 classes.
Found 1297 images belonging to 2 classes.
Found 1622 images belonging to 2 classes.


In [5]:
import tensorflow as tf

# Load a saved model
def load_model(model_load_path):
    loaded_model = tf.keras.models.load_model(model_load_path)
    return loaded_model

In [6]:
# Function for validation
def validate_model(model, val_dataset):
    # Define optimizer and learning rate
    optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)

    # Compile the model (if not compiled previously)
    model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy'])

    # Evaluate the model on the validation dataset
    val_loss, val_accuracy = model.evaluate(val_dataset)

    # Make predictions on the validation dataset
    val_predictions = model.predict(val_dataset)
    val_pred_labels = np.argmax(val_predictions, axis=1)

    # Get true labels from the validation dataset
    y_true = val_dataset.classes

    # Calculate precision, recall, and F1-score
    report = classification_report(y_true, val_pred_labels, target_names=val_dataset.class_indices)

    print("Validation Loss:", val_loss)
    print("Validation Accuracy:", val_accuracy)
    print("Validation Classification Report:")
    print(report)

In [7]:
# Define a function for testing
def test_model(model, test_dataset):
    # Define optimizer and learning rate
    optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)

    # Compile the model (if not compiled previously)
    model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy'])

     # Evaluate the model on the test dataset
    test_loss, test_accuracy = model.evaluate(test_dataset)

    # Make predictions on the test dataset
    test_predictions = model.predict(test_dataset)
    test_pred_labels = np.argmax(test_predictions, axis=1)

    # Get true labels from the validation dataset
    y_true = test_dataset.classes

    # Calculate precision, recall, and F1-score
    report = classification_report(y_true, test_pred_labels, target_names=test_dataset.class_indices)

    print("Test Loss:", test_loss)
    print("Test Accuracy:", test_accuracy)
    print("Test Classification Report:")
    print(report)

In [8]:
# Load DenseNet169 model which is pretrained from 450 layer
Dense169_model=load_model("drive/MyDrive/NewLungData/tenserflowModels/DenseNet169_fn_450")

In [9]:
# Load ResNet101 model which is pretrained from 200 layer
resnet_101_model=load_model("drive/MyDrive/NewLungData/tenserflowModels/ResNet101_fn_200")

In [None]:
# Load VGG16 model which is pretrained from 200 layer
vgg16_model=load_model("drive/MyDrive/NewLungData/tenserflowModels/vgg16_fn_5")

In [None]:
# Rename the model using the name attribute
Dense169_model._name = 'Dense169_model'
Dense169_model.summary()

In [None]:
resnet_101_model._name="resnet101_model"
resnet_101_model.summary()

In [None]:
vgg16_model._name="vgg16_model"
vgg16_model.summary()

#Technique 01: Ranked base with weights

In [41]:
import numpy as np

# Define rankings of accuracy and overfitting for each model
accuracy = {'VGG16': 0.942046880722045, 'DenseNet': 0.92601728439331, 'ResNet': 0.900123298168182}

# Define rankings of accuracy for each model
accuracy_rankings = {'VGG16': 3, 'DenseNet': 2, 'ResNet': 1}

# Calculate total accuracy
total_accuracy = sum(accuracy_rankings.values())

# Normalize each model accuracy
normalized_accuracy_rankings = {model: accuracy / total_accuracy for model, accuracy in accuracy_rankings.items()}

print("Normalized Accuracy Rankings:", normalized_accuracy_rankings)

# Define overfitting rankings
overfit_rankings = {'DenseNet': 3, 'ResNet': 2, 'VGG16': 1}

# Convert overfitting rankings to array
ranks = np.array(list(overfit_rankings.values()))

# Normalize overfitting ranks
normalized_ranks = ranks / np.sum(ranks)

# Update overfitting rankings with normalized ranks
normalized_overfit_rankings = {model: rank for model, rank in zip(overfit_rankings.keys(), normalized_ranks)}

print("Original Overfitting Rankings:", overfit_rankings)
print("Normalized Overfitting Rankings:", normalized_overfit_rankings)

# Define weights for accuracy and overfitting
accuracy_weight = 0.5
overfitting_weight = 0.5

# Multiply accuracy rankings by accuracy weight
weighted_accuracy_rankings = {model: accuracy * accuracy_weight for model, accuracy in normalized_accuracy_rankings.items()}

# Multiply normalized overfitting rankings by overfitting weight
weighted_overfitting_rankings = {model: overfit * overfitting_weight for model, overfit in normalized_overfit_rankings.items()}

# Combine the weighted rankings
combined_rankings = {}
for model in accuracy_rankings.keys():
    combined_rankings[model] = weighted_accuracy_rankings[model] + weighted_overfitting_rankings[model]

# Sort models based on combined rankings
sorted_models = sorted(combined_rankings.items(), key=lambda x: x[1], reverse=True)

print("Combined Rankings:", sorted_models)


Original Overfitting Rankings: {'DenseNet': 3, 'ResNet': 2, 'VGG16': 1}
Normalized Overfitting Rankings: {'DenseNet': 0.5, 'ResNet': 0.3333333333333333, 'VGG16': 0.16666666666666666}
Combined Rankings: [('DenseNet', 0.713008642196655), ('ResNet', 0.6167283157507577), ('VGG16', 0.5543567736943559)]


In [None]:
# Define the validation data generator for ensemble model
def validation_ensemble_data_generator(val_generator):
    for x_batch, y_batch in val_generator:
        # Resize images for Dense169 and ResNet-101 (256x256)
        resized_x_batch_256 = tf.image.resize(x_batch, (256, 256))
        # Resize images for VGG16 (224x224)
        resized_x_batch_224 = tf.image.resize(x_batch, (224, 224))
        yield ([resized_x_batch_256, resized_x_batch_224], y_batch)

In [None]:
# Define the test data generator for ensemble model
def test_ensemble_data_generator(test_generator):
    for x_batch, y_batch in test_generator:
        # Resize images for Dense169 and ResNet-101 (256x256)
        resized_x_batch_256 = tf.image.resize(x_batch, (256, 256))
        # Resize images for VGG16 (224x224)
        resized_x_batch_224 = tf.image.resize(x_batch, (224, 224))
        yield ([resized_x_batch_256, resized_x_batch_224], y_batch)


In [None]:
# Define a function for ensemble model validation
def validate_ensemble_model(model, val_dataset, steps):
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    val_loss, val_accuracy = model.evaluate(val_dataset, steps=steps)

    print("Validation Loss:", val_loss)
    print("Validation Accuracy:", val_accuracy)

In [None]:
# Define a function for ensemble model validation
def test_rank_based_model(model, val_dataset, steps):
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    test_loss, test_accuracy = model.evaluate(val_dataset, steps=steps)

    print("Test Loss:", test_loss)
    print("Test Accuracy:", test_accuracy)

In [42]:
import tensorflow as tf
import numpy as np

# Input layer
image_size = (256, 256, 3)
inputs = tf.keras.Input(shape=image_size)

# Set the loaded models to non-trainable
Dense169_model.trainable = False
resnet_101_model.trainable = False
vgg16_model.trainable = False

# Get the outputs of the loaded models
dense_output = Dense169_model(inputs)
resnet_output = resnet_101_model(inputs)

# VGG16 model requires different image size as input, hence input size needs to be changed to get the output
vgg_image_size = (224, 224, 3)
vgg_inputs = tf.keras.Input(shape=vgg_image_size)
vgg16_output = vgg16_model(vgg_inputs)

# Assign calculated combined ranking score for each model
dense169_cr_score = 0.713008642196655
resnet101_cr_score = 0.6167283157507577
vgg16_cr_score = 0.5543567736943559

# Calculate weights based on combined ranking and tanh function
cr_models = np.array([dense169_cr_score, resnet101_cr_score,vgg16_cr_score])
ranked_indices = np.argsort(cr_models)[::-1]
weights = tf.math.tanh(1 / (ranked_indices + 1))

# Normalize the weights
normalized_weights = weights / tf.reduce_sum(weights)

dense169_weight = normalized_weights[0].numpy()
resnet101_weight = normalized_weights[1].numpy()
vgg16_weight= normalized_weights[2].numpy()

# Combine the outputs with weighted voting
weighted_output = tf.keras.layers.Lambda(lambda x: (x[0] * dense169_weight
                                                    + x[1] * resnet101_weight
                                                    + x[2] * vgg16_weight)
                      / (dense169_weight + resnet101_weight + vgg16_weight))
                      ([dense_output, resnet_output, vgg16_output])

# Round the weighted output to obtain binary predictions
voted_output = tf.keras.layers.Lambda(lambda x: tf.math.round(x))(weighted_output)

# Define the ensemble model
ensemble_model_tan = tf.keras.Model(inputs=[inputs, vgg_inputs], outputs=voted_output, name='ensemble_model_rank')

# Calculate the total number of samples in the validation set
total_val_samples = len(val_generator)
# Calculate the total number of steps based on the batch size
val_steps = total_val_samples // batch_size * 10

# Validate the ensemble model
validate_ensemble_model(ensemble_model_tan, validation_ensemble_data_generator(val_generator), val_steps)

Validation Loss: 0.42276981472969055
Validation Accuracy: 0.9737704992294312


In [43]:
# Calculate the total number of samples in the validation set
total_test_samples = len(test_generator)
# Calculate the total number of steps based on the batch size
test_steps = total_test_samples // batch_size * 10

 # Test the ensemble model
test_rank_based_model(ensemble_model_tan, test_ensemble_data_generator(test_generator), test_steps)

Test Loss: 0.9570119976997375
Test Accuracy: 0.940625011920929


#Technique 02: Average

In [44]:
import tensorflow as tf
import numpy as np

# Input layer
image_size = (256, 256, 3)
inputs = tf.keras.Input(shape=image_size)

# Set the loaded models to non-trainable
Dense169_model.trainable = False
resnet_101_model.trainable = False
vgg16_model.trainable = False

# Get the outputs of the loaded models
dense_output = Dense169_model(inputs)
resnet_output = resnet_101_model(inputs)

# VGG16 model requires different image size as input, hence input size needs to be changed to get the output
vgg_image_size = (224, 224, 3)
vgg_inputs = tf.keras.Input(shape=vgg_image_size)
vgg16_output = vgg16_model(vgg_inputs)

# Average the predictions
averaged_output = tf.keras.layers.Average()([dense_output, resnet_output, vgg16_output])

# Define the ensemble model
ensemble_model_average = tf.keras.Model(inputs=[inputs, vgg_inputs], outputs=averaged_output, name='ensemble_model_average')

# Validate the ensemble model
validate_ensemble_model(ensemble_model_average, validation_ensemble_data_generator(val_generator), val_steps)

Validation Loss: 0.20499460399150848
Validation Accuracy: 0.9624999761581421


In [45]:
# Test the ensemble model
test_rank_based_model(ensemble_model_average, test_ensemble_data_generator(test_generator), test_steps)

Test Loss: 0.28086769580841064
Test Accuracy: 0.9290322661399841


#Technique 03: Hard Voting

In [46]:
import tensorflow as tf
import numpy as np

# Input layer
image_size = (256, 256, 3)
inputs = tf.keras.Input(shape=image_size)

# Set the loaded models to non-trainable
Dense169_model.trainable = False
resnet_101_model.trainable = False
vgg16_model.trainable = False

# Get the outputs of the loaded models
dense_output = Dense169_model(inputs)
resnet_output = resnet_101_model(inputs)

# VGG16 model requires different image size as input, hence input size needs to be changed to get the output
vgg_image_size = (224, 224, 3)
vgg_inputs = tf.keras.Input(shape=vgg_image_size)
vgg16_output = vgg16_model(vgg_inputs)

voted_output = tf.keras.layers.Lambda(lambda x: tf.math.round(tf.reduce_mean(x, axis=0)))([dense_output, resnet_output])

# Define the ensemble model
ensemble_model_hard_voting = tf.keras.Model(inputs=[inputs, vgg_inputs], outputs=voted_output, name='ensemble_model_voting')

# Validate the ensemble model
validate_ensemble_model(ensemble_model_hard_voting, validation_ensemble_data_generator(val_generator), val_steps)

Validation Loss: 1.158488154411316
Validation Accuracy: 0.9281250238418579


In [47]:
# Test the ensemble model
test_rank_based_model(ensemble_model_hard_voting, test_ensemble_data_generator(test_generator), test_steps)

Test Loss: 0.21819451451301575
Test Accuracy: 0.953125


#Technique 04: Stacking-Random Forest Classifier

In [48]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

# Train the base models
Dense169_model.trainable = False
resnet_101_model.trainable = False
vgg16_model.trainable = False

# Get output of train, validation and test data set
dense_output_train = Dense169_model.predict(train_generator)
dense_output_val = Dense169_model.predict(val_generator)
dense_output_test = Dense169_model.predict(test_generator)

resnet_output_train = resnet_101_model.predict(train_generator)
resnet_output_val = resnet_101_model.predict(val_generator)
resnet_output_test = resnet_101_model.predict(test_generator)


vgg_output_train = vgg16_model.predict(train_generator)
vgg_output_val = vgg16_model.predict(val_generator)
vgg_output_test = vgg16_model.predict(test_generator)

# Concatenate the outputs to get as features
X_train_combined = np.concatenate((dense_output_train, resnet_output_train,vgg_output_train), axis=1)
X_val_combined = np.concatenate((dense_output_val, resnet_output_val,vgg_output_val), axis=1)
X_test_combined = np.concatenate((dense_output_test, resnet_output_test,vgg_output_test), axis=1)

# Define the labels
y_train = train_generator.classes
y_val = val_generator.classes
y_test = test_generator.classes

# Train Random Forest Classifier
rf_classifier = RandomForestClassifier(n_estimators=100, random_state=42)
rf_classifier.fit(X_train_combined, y_train)

# Validation
val_predictions = rf_classifier.predict(X_val_combined)
val_accuracy = accuracy_score(y_val, val_predictions)
print("Validation Accuracy:", val_accuracy)
print("Validation Classification Report:")
print(classification_report(y_val, val_predictions))

# Testing
test_predictions = rf_classifier.predict(X_test_combined)
test_accuracy = accuracy_score(y_test, test_predictions)
print("Test Accuracy:", test_accuracy)
print("Test Classification Report:")
print(classification_report(y_test, test_predictions))


Validation Accuracy: 0.8033924441017734
Validation Classification Report:
              precision    recall  f1-score   support

           0       0.83      0.95      0.89      1073
           1       0.28      0.08      0.13       224

    accuracy                           0.80      1297
   macro avg       0.55      0.52      0.51      1297
weighted avg       0.74      0.80      0.76      1297

Test Accuracy: 0.7916152897657214
Test Classification Report:
              precision    recall  f1-score   support

           0       0.83      0.95      0.88      1340
           1       0.18      0.06      0.09       282

    accuracy                           0.79      1622
   macro avg       0.50      0.50      0.48      1622
weighted avg       0.71      0.79      0.74      1622



#Technique 05: Stacking-Support Vector Classification

In [50]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score

# Train the base models
Dense169_model.trainable = False
resnet_101_model.trainable = False
vgg16_model.trainable = False

dense_output_train = Dense169_model.predict(train_generator)
dense_output_val = Dense169_model.predict(val_generator)
dense_output_test = Dense169_model.predict(test_generator)

resnet_output_train = resnet_101_model.predict(train_generator)
resnet_output_val = resnet_101_model.predict(val_generator)
resnet_output_test = resnet_101_model.predict(test_generator)


vgg_output_train = vgg16_model.predict(train_generator)
vgg_output_val = vgg16_model.predict(val_generator)
vgg_output_test = vgg16_model.predict(test_generator)

# Concatenate the outputs to use as features
X_train_combined = np.concatenate((dense_output_train, resnet_output_train,vgg_output_train), axis=1)
X_val_combined = np.concatenate((dense_output_val, resnet_output_val,vgg_output_val), axis=1)
X_test_combined = np.concatenate((dense_output_test, resnet_output_test,vgg_output_test), axis=1)

# Define the labels
y_train = train_generator.classes
y_val = val_generator.classes
y_test = test_generator.classes

# Train SVM Classifier
svm_classifier = SVC(kernel='rbf', random_state=42)
svm_classifier.fit(X_train_combined, y_train)

# Validation
val_predictions = svm_classifier.predict(X_val_combined)
val_accuracy = accuracy_score(y_val, val_predictions)
print("Validation Accuracy:", val_accuracy)
print("Validation Classification Report:")
print(classification_report(y_val, val_predictions))

# Testing
test_predictions = rf_classifier.predict(X_test_combined)
test_accuracy = accuracy_score(y_test, test_predictions)
print("Test Accuracy:", test_accuracy)
print("Test Classification Report:")
print(classification_report(y_test, test_predictions))

Validation Accuracy: 0.8272937548188126
Validation Classification Report:
              precision    recall  f1-score   support

           0       0.83      1.00      0.91      1073
           1       0.00      0.00      0.00       224

    accuracy                           0.83      1297
   macro avg       0.41      0.50      0.45      1297
weighted avg       0.68      0.83      0.75      1297

Test Accuracy: 0.7940813810110974
Test Classification Report:
              precision    recall  f1-score   support

           0       0.83      0.95      0.88      1340
           1       0.18      0.05      0.08       282

    accuracy                           0.79      1622
   macro avg       0.50      0.50      0.48      1622
weighted avg       0.71      0.79      0.74      1622



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
