In [None]:
!pip install kaggle

In [None]:
 # Select your kaggle.json file
from google.colab import files
files.upload()

In [None]:
!ls -l #is used to display the contents of a directory in a detailed (long) list format

In [None]:
!mv "Kaggle.json" kaggle.json #Renames or moves the file Kaggle.json to kaggle.json.

In [None]:
!mkdir -p ~/.kaggle #Creates the directory ~/.kaggle (in the user's home directory). If the directory already exists, no error occurs.
# It creates parent directories if needed.

!mv kaggle.json ~/.kaggle/ # Purpose: Moves the kaggle.json file into the .kaggle directory under the user's home folder (~/.kaggle).


In [None]:
!chmod 600 ~/.kaggle/kaggle.json #s used to set file permissions for the kaggle.json file located in the .kaggle directory under the user's home folder.

In [None]:
!kaggle datasets download -d paultimothymooney/chest-xray-pneumonia #Downloads the specified dataset from Kaggle.Downloads a ZIP file containing the dataset into the current working directory.


In [None]:
!unzip chest-xray-pneumonia.zip -d /content/chest_xray #Extracts the downloaded dataset into a specific directory. Extracts all files and folders from chest-xray-pneumonia.zip into /content/chest_xray.

In [None]:
import os #Imports the os module, which provides functions for interacting with the file system.
for dirname, _, filenames in os.walk('/content/chest_xray'):
    for filename in filenames: #Loops through the list of filenames in each directory.
        print(os.path.join(dirname, filename)) #Outputs the full path of each file to the console.

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt # Used for creating graphs and visualizations (e.g., line plots, histograms, etc.).
import seaborn as sns
import cv2 #Used to read, preprocess, and manipulate images in deep learning tasks.
import tensorflow as tf #Provides tools to build and train machine learning models.
from tensorflow.keras.models import Sequential #Sequential API allows for a linear stack of layers to define a model.
from tensorflow.keras.layers import Dense, Conv2D, MaxPool2D, Flatten, Dropout, BatchNormalization
from tensorflow.keras.preprocessing.image import ImageDataGenerator #Used for real-time data augmentation, which generates batches of image data with transformations like rotation, zoom, or flipping
from tensorflow.keras.callbacks import ReduceLROnPlateau #Callback function that reduces the learning rate when a metric (like validation loss) has stopped improving.
from sklearn.model_selection import train_test_split #Splits datasets into training and test subsets.
from sklearn.metrics import classification_report, confusion_matrix # Provides precision, recall, F1-score, and support for classification models.
#valuates the performance of classification models by showing true vs. predicted classes.

In [None]:
#This script is designed to load and preprocess image data for a classification task (e.g., detecting pneumonia). The images are resized, reshaped, and labeled for model training, testing, and validation.
labels = ['PNEUMONIA', 'NORMAL'] #Specifies the two classes in the dataset.
img_size = 150 #The target size (height and width) for resizing images.

def get_training_data(data_dir): #processes  all images in a given directory (data_dir) and prepares them for model training or evaluation.
    data = [] #Initializes an empty list to store image data and corresponding labels.
    for label in labels:
        path = os.path.join(data_dir, label) #Constructs the directory path for the current label.
        class_num = labels.index(label) #Assigns a numeric label to the class (0 for PNEUMONIA, 1 for NORMAL).
        for img in os.listdir(path):
            try:
                # Read the image as grayscale
                img_arr = cv2.imread(os.path.join(path, img), cv2.IMREAD_GRAYSCALE) #Reads the image in grayscale using OpenCV
                # Resize the image
                resized_arr = cv2.resize(img_arr, (img_size, img_size))
                # Reshape the image array to include a channel dimension
                resized_arr = resized_arr.reshape((img_size, img_size, 1))
                # Append the resized image and class number to the data list
                data.append([resized_arr, class_num])
            except Exception as e:
                print(e)
    # Convert the data list to a NumPy array with a consistent dtype
    return np.array(data, dtype=object)

train = get_training_data('/content/chest_xray/chest_xray/chest_xray/train')
test = get_training_data('/content/chest_xray/chest_xray/chest_xray/test')
val = get_training_data('/content/chest_xray/chest_xray/chest_xray/val')

In [None]:
!ls /content

In [None]:
!ls /content/chest_xray
!ls /content/chest_xray/train
!ls /content/chest_xray/train/PNEUMONIA

In [None]:
!ls -R /content/chest_xray

In [None]:
# This block processes the data prepared earlier (train, test, and val) into separate
# feature and label arrays, normalizes the images, and reshapes them for input into a machine learning or deep learning model
x_train, y_train = [], [] #Initializes empty lists for features (x_*) and labels (y_*) for the training, testing, and validation sets.
x_test, y_test = [], []
x_val, y_val = [], []

for feature, label in train: #For each tuple (feature, label) in the dataset:
# x_*: Appends the image array (features).
# y_*: Appends the corresponding label.
    x_train.append(feature)
    y_train.append(label)

for feature, label in test:
    x_test.append(feature)
    y_test.append(label)

for feature, label in val:
    x_val.append(feature)
    y_val.append(label)

# Normalize data
x_train = np.array(x_train) / 255.0
x_test = np.array(x_test) / 255.0
x_val = np.array(x_val) / 255.0

# Reshape data for the model
x_train = x_train.reshape(-1, img_size, img_size, 1)
x_test = x_test.reshape(-1, img_size, img_size, 1)
x_val = x_val.reshape(-1, img_size, img_size, 1)

y_train = np.array(y_train)
y_test = np.array(y_test)
y_val = np.array(y_val)


In [None]:
# This snippet uses the ImageDataGenerator class from Keras to perform real-time data augmentation on the training data.
# Data augmentation generates transformed versions of the existing images, which helps improve the model's ability to generalize.

datagen = ImageDataGenerator(
    rotation_range=30,
    zoom_range=0.2,
    width_shift_range=0.1,
    height_shift_range=0.1,
    horizontal_flip=True
)
datagen.fit(x_train)


In [None]:
# This script defines and compiles a Convolutional Neural Network (CNN) for binary classification, suitable for detecting conditions like pneumonia in grayscale chest X-ray images.
model = Sequential([
    Conv2D(32, (3,3), activation='relu', input_shape=(img_size, img_size, 1)), #Extract spatial features from images using filters.
    BatchNormalization(), #Normalizes the output of a layer to stabilize and accelerate training.
    MaxPool2D(2,2), #Purpose: Reduces the spatial dimensions of feature maps, retaining only the most important features.
    Conv2D(64, (3,3), activation='relu'),
    Dropout(0.1), # Randomly "drops" a fraction of neurons during training to prevent overfitting.
    BatchNormalization(),
    MaxPool2D(2,2),
    Conv2D(64, (3,3), activation='relu'),
    BatchNormalization(),
    MaxPool2D(2,2),
    Conv2D(128, (3,3), activation='relu'),
    Dropout(0.2),
    BatchNormalization(),
    MaxPool2D(2,2),
    Conv2D(256, (3,3), activation='relu'),
    Dropout(0.2),
    BatchNormalization(),
    MaxPool2D(2,2),
    Flatten(), #Flattens the 2D feature maps into a 1D vector to connect to fully connected layers.
    Dense(128, activation='relu'),
    Dropout(0.2),
    Dense(1, activation='sigmoid')
])
model.compile(optimizer='rmsprop', loss='binary_crossentropy', metrics=['accuracy'])
model.summary()


In [None]:
#This callback adjusts the learning rate during training when the model's performance plateaus, specifically when the validation accuracy doesn't improve for a set number of epochs.
learning_rate_reduction = ReduceLROnPlateau(monitor='val_accuracy', patience=2, factor=0.3, min_lr=0.000001)


In [None]:
# This line starts the training process of the model using the augmented data from the ImageDataGenerator and applies the learning rate reduction callback to adjust the learning rate during training based on the validation accuracy.
history = model.fit(
    datagen.flow(x_train, y_train, batch_size=32),
    epochs=12,
    validation_data=(x_val, y_val),
    callbacks=[learning_rate_reduction]
)


In [None]:
loss, accuracy = model.evaluate(x_test, y_test)
print(f"Model Test Loss: {loss}")
print(f"Model Test Accuracy: {accuracy * 100:.2f}%")


In [None]:
epochs = [i for i in range(12)]
fig , ax = plt.subplots(1,2) #Creates a 1x2 grid of subplots, meaning two plots side by side in the same figure.
train_acc = history.history['accuracy']
train_loss = history.history['loss']
val_acc = history.history['val_accuracy']
val_loss = history.history['val_loss']
fig.set_size_inches(20,10)

# First Plot: Training & Validation Accuracy

ax[0].plot(epochs , train_acc , 'go-' , label = 'Training Accuracy')
ax[0].plot(epochs , val_acc , 'ro-' , label = 'Validation Accuracy')
ax[0].set_title('Training & Validation Accuracy')
ax[0].legend()
ax[0].set_xlabel("Epochs")
ax[0].set_ylabel("Accuracy")

# Second Plot: Training & Validation Loss

ax[1].plot(epochs , train_loss , 'g-o' , label = 'Training Loss')
ax[1].plot(epochs , val_loss , 'r-o' , label = 'Validation Loss')
ax[1].set_title('Testing Accuracy & Loss')
ax[1].legend()
ax[1].set_xlabel("Epochs")
ax[1].set_ylabel("Training & Validation Loss")
plt.show()

In [None]:
# This code is used to obtain predictions from the trained model, convert those predictions into class labels (0 or 1), and display the first 15 predictions. Here's a step-by-step breakdown
# Get predictions as probabilities
predictions = model.predict(x_test)

# Convert probabilities to class labels (0 or 1) by applying a threshold
predictions = (predictions > 0.5).astype("int32")

# Flatten the array for easier readability
predictions = predictions.reshape(1, -1)[0]

# Display the first 15 predictions
predictions[:15]


In [None]:
print(classification_report(y_test, predictions, target_names = ['Pneumonia (Class 0)','Normal (Class 1)']))

In [None]:
cm = confusion_matrix(y_test,predictions)
cm

In [None]:
cm = pd.DataFrame(cm , index = ['0','1'] , columns = ['0','1'])

In [None]:
plt.figure(figsize = (10,10))
sns.heatmap(cm,cmap= "Blues", linecolor = 'black' , linewidth = 1 , annot = True, fmt='',xticklabels = labels,yticklabels = labels)

In [None]:
correct = np.nonzero(predictions == y_test)[0]
incorrect = np.nonzero(predictions != y_test)[0]

In [None]:
i = 0
for c in correct[:6]:
    plt.subplot(3,2,i+1)
    plt.xticks([])
    plt.yticks([])
    plt.imshow(x_test[c].reshape(150,150), cmap="gray", interpolation='none')
    plt.title("Predicted Class {},Actual Class {}".format(predictions[c], y_test[c]))
    plt.tight_layout()
    i += 1

In [None]:
# === USER UPLOADED IMAGE PREDICTION ===
from google.colab import files

print("\n\n=== Upload a chest X-ray image to predict ===")
uploaded = files.upload()

for img_name in uploaded.keys():
    img_path = img_name
    img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
    img = cv2.resize(img, (img_size, img_size))
    img = img.reshape(1, img_size, img_size, 1) / 255.0
    prediction = model.predict(img)
    result = "NORMAL" if prediction[0][0] < 0.5 else "PNEUMONIA"
    plt.imshow(cv2.cvtColor(cv2.imread(img_path), cv2.COLOR_BGR2RGB))
    plt.title(f"Prediction: {result}")
    plt.axis('off')
    plt.show()
    print(f"\nThe uploaded X-ray image is predicted to be: **{result}**")