In [5]:
import tensorflow as tf

from tensorflow.keras.applications import DenseNet169
from tensorflow.keras.models import Model
from tensorflow.keras.layers import GlobalAveragePooling2D

from sklearn.svm import SVC
import numpy as np

import os

import random

def load_and_preprocess_image(path):
    image = tf.io.read_file(path)
    image = tf.image.decode_png(image, channels=3)
    image = tf.image.resize(image, [224, 224])
    image = tf.keras.applications.densenet.preprocess_input(image) # specific for DenseNet
    return image

def create_densenet_feature_extractor():
    base_model = DenseNet169(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
    x = base_model.output
    x = GlobalAveragePooling2D()(x) 
    feature_extractor = Model(inputs=base_model.input, outputs=x)
    return feature_extractor
    
feature_extractor = create_densenet_feature_extractor()

def extract_features(image_paths, y_lst):
    features = []
    for idx, path in enumerate(image_paths):
        if not [file_type for file_type in ["jpeg", "png", "gif", "bmp"] if file_type in path]:
            del y_lst[idx]
        else:
            image = load_and_preprocess_image(path)
            image = np.expand_dims(image, axis=0)  
            feature = feature_extractor.predict(image, verbose=0)
            features.append(feature.flatten())
    return np.array(features), np.array(y_lst)

labels = ["NORMAL", "PNEUMONIA"]
train_dir = "chest_xray/chest_xray/train/"
train_dir_pneumonia = train_dir + "PNEUMONIA/"
train_dir_normal = train_dir + "NORMAL/"

test_dir = "chest_xray/chest_xray/test/"
val_dir = "chest_xray/chest_xray/val/"

normal_img_lst = [train_dir_normal + path_name for path_name in os.listdir(train_dir_normal)]
pneumonia_img_lst = [train_dir_pneumonia + path_name for path_name in os.listdir(train_dir_pneumonia)]

train_image_paths = normal_img_lst + pneumonia_img_lst
y_lst = [0 for i in range(len(normal_img_lst))] + [1 for i in range(len(pneumonia_img_lst))]

In [6]:
print("Extracting Features...")
X_train, y_train = extract_features(train_image_paths, y_lst)
print(X_train.shape, y_train.shape)
print("Extracted training data features successfully")

Extracting Features...
(5216, 1664) (5216,)


In [7]:
svm_classifier = SVC(kernel='linear', probability=True)
svm_classifier.fit(X_train, y_train)
print("Model trained successfully")

In [8]:
test_normal_dir = test_dir + "NORMAL/"
test_pneumonia_dir = test_dir + "PNEUMONIA/"

normal_test_imgs = [test_normal_dir + path_name for path_name in os.listdir(test_normal_dir)]
pneumonia_test_imgs = [test_pneumonia_dir + path_name for path_name in os.listdir(test_pneumonia_dir)]

test_image_paths = normal_test_imgs + pneumonia_test_imgs
test_labels = [0] * len(normal_test_imgs) + [1] * len(pneumonia_test_imgs)

print("Extracting Features...")
X_test, y_test = extract_features(test_image_paths, test_labels)
print("Extracted testing data features successfully")

Extracting Features...


In [7]:
predicted_labels = svm_classifier.predict(X_test)

In [8]:
from sklearn.metrics import accuracy_score, classification_report

accuracy = accuracy_score(y_test, predicted_labels)
print(f"Accuracy: {accuracy:.2f}")

report = classification_report(y_test, predicted_labels, target_names=['Normal', 'Pneumonia'])
print(report)

Accuracy: 0.80
              precision    recall  f1-score   support

      Normal       0.97      0.49      0.65       234
   Pneumonia       0.76      0.99      0.86       390

    accuracy                           0.80       624
   macro avg       0.87      0.74      0.76       624
weighted avg       0.84      0.80      0.78       624



In [9]:
from sklearn.model_selection import GridSearchCV

param_grid = [
    {"C":[1, 10, 100, 1000], "kernel":["linear"]},
    {"C":[1, 10, 100, 1000], "kernel":["rbf"], "gamma":[0.001, 0.0001]},
    {"C":[1, 10, 100, 1000], "kernel":["rbf"], "gamma":[0.001, 0.0001], "degree":[3, 4, 5]}
]

print("Grid Search Started...")
svm_classifier = SVC()
grid_search = GridSearchCV(svm_classifier, param_grid, cv=5, scoring="accuracy", verbose=1)
grid_search.fit(X_train, y_train)

Grid Search Started...
Fitting 5 folds for each of 36 candidates, totalling 180 fits


In [10]:
print("Best Parameter:", grid_search.best_params_)
print("Best cross-validation score: {:.2f}".format(grid_search.best_score_))

Best Parameter: {'C': 10, 'gamma': 0.001, 'kernel': 'rbf'}
Best cross-validation score: 0.98


In [11]:
predicted_labels = grid_search.predict(X_test)

In [12]:
from sklearn.metrics import accuracy_score, classification_report

accuracy = accuracy_score(y_test, predicted_labels)
print(f"Accuracy: {accuracy:.2f}")

report = classification_report(y_test, predicted_labels, target_names=labels)
print(report)

Accuracy: 0.82
              precision    recall  f1-score   support

      NORMAL       0.97      0.53      0.69       234
   PNEUMONIA       0.78      0.99      0.87       390

    accuracy                           0.82       624
   macro avg       0.87      0.76      0.78       624
weighted avg       0.85      0.82      0.80       624



In [20]:
import tensorflow as tf
from tensorflow.keras import layers

deep_model = tf.keras.models.Sequential([
    layers.Input(shape=(1664,)),  # Input layer 
    layers.Dense(1024, activation='relu'),  # Dense layer with 1024 neurons
    layers.BatchNormalization(),  # Batch normalization
    layers.Dropout(0.3),  # Dropout for regularization

    layers.Dense(512, activation='relu'),  # Dense layer
    layers.BatchNormalization(),  # Batch normalization
    layers.Dropout(0.3),  # Dropout for regularization

    layers.Dense(256, activation='relu'),  # Dense layer
    layers.BatchNormalization(),  # Batch normalization
    layers.Dropout(0.3),  # Dropout for regularization

    layers.Dense(128, activation='relu'),  # Dense layer
    layers.BatchNormalization(),  # Batch normalization
    layers.Dropout(0.3),  # Dropout for regularization

    layers.Dense(1, activation='sigmoid')  # Output layer
])

print("Model created successfully.")

Model created successfully for 1D input data.


In [21]:
print("Training...")
deep_model.compile(
    loss='binary_crossentropy',
    optimizer='adam',
    metrics=['accuracy']
)

print(X_train.shape, y_train.shape)
deep_model.fit(X_train, y_train)
print("Deep NN trained successfully")

Training...
(5216, 1664) (5216,)
[1m163/163[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 6ms/step - accuracy: 0.8823 - loss: 0.3110


<keras.src.callbacks.history.History at 0x2f5847c40>

In [22]:
predicted_labels = deep_model.predict(X_test)

[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step


In [17]:
predicted_labels = np.rint(predicted_labels).astype(int)

from sklearn.metrics import accuracy_score, classification_report

accuracy = accuracy_score(y_test, predicted_labels)
print(f"Accuracy: {accuracy:.2f}")

report = classification_report(y_test, predicted_labels, target_names=['Normal', 'Pneumonia'])
print(report)

Accuracy: 0.87
              precision    recall  f1-score   support

      Normal       0.92      0.72      0.81       234
   Pneumonia       0.85      0.96      0.90       390

    accuracy                           0.87       624
   macro avg       0.89      0.84      0.86       624
weighted avg       0.88      0.87      0.87       624



In [23]:
# combining models by averaging probabilities

prob_svm = svm_classifier.predict_proba(X_test)[:, 1]  
prob_nn = deep_model.predict(X_test).flatten()

average_prob = (prob_svm + prob_nn) / 2
predictions = (average_prob > 0.5).astype(int)

[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 


In [25]:
from sklearn.metrics import accuracy_score, classification_report

accuracy = accuracy_score(y_test, predictions)
print(f"Accuracy: {accuracy:.2f}")

report = classification_report(y_test, predictions, target_names=['Normal', 'Pneumonia'])
print(report)

Accuracy: 0.82
              precision    recall  f1-score   support

      Normal       0.96      0.53      0.69       234
   Pneumonia       0.78      0.99      0.87       390

    accuracy                           0.82       624
   macro avg       0.87      0.76      0.78       624
weighted avg       0.85      0.82      0.80       624



In [29]:
from sklearn.linear_model import LogisticRegression

stacked_features = np.column_stack((prob_svm, prob_nn))

meta_model = LogisticRegression()
meta_model.fit(stacked_features, y_test)

test_prob_svm = svm_classifier.predict_proba(X_test)[:, 1]
test_prob_nn = deep_model.predict(X_test).flatten()

test_stacked_features = np.column_stack((test_prob_svm, test_prob_nn))

final_predictions = meta_model.predict(test_stacked_features)

[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 


In [30]:
from sklearn.metrics import accuracy_score, classification_report

accuracy = accuracy_score(y_test, final_predictions)
print(f"Accuracy: {accuracy:.2f}")

report = classification_report(y_test, final_predictions, target_names=['Normal', 'Pneumonia'])
print(report)

Accuracy: 0.87
              precision    recall  f1-score   support

      Normal       0.95      0.68      0.80       234
   Pneumonia       0.84      0.98      0.90       390

    accuracy                           0.87       624
   macro avg       0.90      0.83      0.85       624
weighted avg       0.88      0.87      0.86       624

