<a href="https://colab.research.google.com/github/Evans-Junior/ChestDiseasePrediction_Group17/blob/main/PneumoniaModel.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **MOUNTING THE DATA FROM DRIVE**

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


# **DIRECTORY TO IMAGES**

In [None]:
tuberculosis_dir = '/content/drive/MyDrive/AI/Dataset/main/'
normal_dir = '/content/drive/My Drive/AI/Dataset/TuberculosisData/Normal'


***Updating Keras Version***

In [None]:
!pip install  keras==2.12.0

Collecting keras==2.12.0
  Downloading keras-2.12.0-py2.py3-none-any.whl (1.7 MB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/1.7 MB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m╸[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/1.7 MB[0m [31m1.5 MB/s[0m eta [36m0:00:02[0m[2K     [91m━━━━━━━[0m[90m╺[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.3/1.7 MB[0m [31m5.4 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.7/1.7 MB[0m [31m19.5 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: keras
  Attempting uninstall: keras
    Found existing installation: keras 2.14.0
    Uninstalling keras-2.14.0:
      Successfully uninstalled keras-2.14.0
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
tensorflow 2.14.0 requires keras<2.15,>=2.14.0, b

# **IMAGE PRE-PROCESSING**

In [None]:
import os
import random
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
normal_data ='/content/drive/MyDrive/chestData/normal'
pneumonia_data ='/content/drive/MyDrive/chestData/PNEUMONIA'

# Collecting paths to all the images
pneumonia_path = [(path,'PNEUMONIA') for path in os.listdir(pneumonia_data) if path.endswith(('.jpeg','.png'))]
normal_path = [(path,'normal') for path in os.listdir(normal_data) if path.endswith(('.jpeg','.png'))]


# Combine and shuffle data
all_paths = pneumonia_path + normal_path
random.shuffle(all_paths)

# Display shuffled images with labels
plt.figure(figsize=(12,12))
for i in range(16):
  plt.subplot(4, 4, i+1)
  img_path, label = all_paths[i]
  img = mpimg.imread(os.path.join(pneumonia_data if label == 'PNEUMONIA' else normal_data,img_path))
  plt.imshow(img, cmap='gray')
  plt.title(f'Label: {label}')
  plt.axis('off')

plt.show()

In [None]:
import keras,os
import tensorflow as tf
import numpy as np
import tensorflow as tf
from keras.preprocessing.image import ImageDataGenerator
from keras.utils import to_categorical
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder


# Function to load and preprocess images from a directory
def load_images_from_directory(directory):
    images = []
    labels = []
    label = os.path.basename(directory)
    for filename in os.listdir(directory):
        if filename.endswith(".jpeg") or filename.endswith(".png"):  # Adjust file extensions as needed
            filepath = os.path.join(directory, filename)
            img = tf.keras.preprocessing.image.load_img(filepath, target_size=(224, 224))
            img_array = tf.keras.preprocessing.image.img_to_array(img)
            img_array /= 255.0  # Normalize pixel values to [0, 1]
            images.append(img_array)
            labels.append(label)
    return np.array(images), np.array(labels)

# Load images and labels from directories
pheumonia_images, pheumonia_labels = load_images_from_directory('/content/drive/MyDrive/AI/phe/new')
normal_images, normal_labels = load_images_from_directory('/content/drive/MyDrive/AI/phe/normal')

# Combine the data
all_images = np.concatenate([pheumonia_images, normal_images])
all_labels = np.concatenate([pheumonia_labels, normal_labels])

# **SPLITTING INTO TEST, TRAIN, AND VALIDATION IMAGES**

In [None]:
# Encode labels (0 for Tuberculosis, 1 for Normal)
label_encoder = LabelEncoder()
all_labels_encoded = label_encoder.fit_transform(all_labels)

# Split the data into training and testing sets
x_train, x_temp, y_train, y_temp = train_test_split(all_images, all_labels_encoded, test_size=0.2, random_state=42)
x_val, x_test, y_val, y_test = train_test_split(x_temp, y_temp, test_size=0.5, random_state=42)


**Module Importations for Convolutional Neural Network**

In [None]:
from keras.layers import Dense, Dropout, Input, Conv2D, MaxPooling2D, Flatten
from keras.models import Model
from keras.optimizers import Adam

# Constants
image_size = 224  # Adjust as needed
batch_size = 32
filters = 32
kernel_size = (3, 3)
dropout = 0.5

**Building Convolutional Neural Network (CNN) using GridSearchCV**

In [None]:
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import GridSearchCV
# Function to create the model using functional API
def create_model():
    inputs = Input(shape=(image_size, image_size, 3))  # Assuming RGB images
    y = Conv2D(filters=32, kernel_size=(3, 3), activation='relu')(inputs)
    y = MaxPooling2D()(y)
    y = Conv2D(filters=32, kernel_size=(3, 3), activation='relu')(y)
    y = MaxPooling2D()(y)
    y = Conv2D(filters=32, kernel_size=(3, 3), activation='relu')(y)
    y = Flatten()(y)
    y = Dropout(0.5)(y)
    outputs = Dense(1, activation='sigmoid')(y)  # Binary classification, use 'sigmoid' activation

    model = Model(inputs=inputs, outputs=outputs)
    optimizer = Adam(learning_rate=0.001)
    model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy'])

    return model

# Create KerasClassifier
keras_classifier = KerasClassifier(build_fn=create_model)

# Define the hyperparameter grid
param_grid = {
    'epochs': [ 20, 30],
    'batch_size': [32, 64]
}

# Create GridSearchCV
grid_search = GridSearchCV(estimator=keras_classifier, param_grid=param_grid, cv=3)
grid_result = grid_search.fit(x_train, y_train, validation_data=(x_val, y_val))

# Print the best parameters
print("Best parameters: ", grid_result.best_params_)

  keras_classifier = KerasClassifier(build_fn=create_model)


Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoc

In [None]:
from sklearn.metrics import roc_auc_score, accuracy_score

# Predictions on validation set
y_val_pred = grid_result.predict(x_val)
y_val_pred_proba = grid_result.predict_proba(x_val)[:, 1]

# Predictions on test set
y_test_pred = grid_result.predict(x_test)
y_test_pred_proba = grid_result.predict_proba(x_test)[:, 1]

# AUC and accuracy scores for validation set
auc_val = roc_auc_score(y_val, y_val_pred_proba)
accuracy_val = accuracy_score(y_val, y_val_pred)

# AUC and accuracy scores for test set
auc_test = roc_auc_score(y_test, y_test_pred_proba)
accuracy_test = accuracy_score(y_test, y_test_pred)

# Print the results
print("Validation AUC:", auc_val)
print("Validation Accuracy:", accuracy_val)
print("\nTest AUC:", auc_test)
print("Test Accuracy:", accuracy_test)

Validation AUC: 0.9975490196078431
Validation Accuracy: 0.9875

Test AUC: 0.998868778280543
Test Accuracy: 0.9629629629629629


**Validation and Testing of results**

In [None]:
# Assuming grid_result is the result of your GridSearchCV
best_model = grid_result.best_estimator_

# Predictions on validation set
y_val_pred = best_model.predict(x_val)
y_val_pred_proba = best_model.predict_proba(x_val)[:, 1]

# Predictions on test set
y_test_pred = best_model.predict(x_test)
y_test_pred_proba = best_model.predict_proba(x_test)[:, 1]

# AUC and accuracy scores for validation set
auc_val = roc_auc_score(y_val, y_val_pred_proba)
accuracy_val = accuracy_score(y_val, y_val_pred)

# AUC and accuracy scores for test set
auc_test = roc_auc_score(y_test, y_test_pred_proba)
accuracy_test = accuracy_score(y_test, y_test_pred)

# Print the results
print("Validation AUC:", auc_val)
print("Validation Accuracy:", accuracy_val)
print("\nTest AUC:", auc_test)
print("Test Accuracy:", accuracy_test)


Validation AUC: 0.9975490196078431
Validation Accuracy: 0.9875

Test AUC: 0.998868778280543
Test Accuracy: 0.9629629629629629


In [None]:
best_model.model.save("PneumoniaModel.h5")