# **CSCE 5214-Deep Learning**

**Deep Learning Framework for Automated Pneumonia
Detection in Chest X-rays using CNN and Apache Spark**

# **Part-1: Setup**
**Install Java**

In [None]:
!apt-get install openjdk-8-jdk -y

Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
The following additional packages will be installed:
  fonts-dejavu-core fonts-dejavu-extra libatk-wrapper-java
  libatk-wrapper-java-jni libgail-common libgail18 libgtk2.0-0 libgtk2.0-bin
  libgtk2.0-common librsvg2-common libxt-dev libxtst6 libxxf86dga1
  openjdk-8-jdk-headless openjdk-8-jre openjdk-8-jre-headless x11-utils
Suggested packages:
  gvfs libxt-doc openjdk-8-demo openjdk-8-source visualvm libnss-mdns
  fonts-nanum fonts-ipafont-gothic fonts-ipafont-mincho fonts-wqy-microhei
  fonts-wqy-zenhei fonts-indic mesa-utils
The following NEW packages will be installed:
  fonts-dejavu-core fonts-dejavu-extra libatk-wrapper-java
  libatk-wrapper-java-jni libgail-common libgail18 libgtk2.0-0 libgtk2.0-bin
  libgtk2.0-common librsvg2-common libxt-dev libxtst6 libxxf86dga1
  openjdk-8-jdk openjdk-8-jdk-headless openjdk-8-jre openjdk-8-jre-headless
  x11-utils
0 upgraded, 18 newly installed,

**Install findspark**

In [None]:
!pip install -q findspark

In [None]:
# Install Java
!apt-get install openjdk-8-jdk-headless -qq > /dev/null

# Download Spark
!curl -O https://downloads.apache.org/spark/spark-3.5.5/spark-3.5.5-bin-hadoop3.tgz
!tar xf spark-3.5.5-bin-hadoop3.tgz

# Install findspark to locate Spark in the environment
!pip install -q findspark



  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100  382M  100  382M    0     0  51.0M      0  0:00:07  0:00:07 --:--:-- 56.1M


In [None]:
import os
import findspark

# Set JAVA_HOME environment variable
os.environ["JAVA_HOME"] = "/usr/lib/jvm/java-8-openjdk-amd64"

# Set SPARK_HOME environment variable
os.environ["SPARK_HOME"] = "/content/spark-3.5.5-bin-hadoop3"

# Initialize findspark
findspark.init()


In [None]:
!pip install pyspark




In [None]:
!pip install pillow




In [None]:
!pip install --upgrade tensorflow gradio


Collecting tensorflow
  Downloading tensorflow-2.19.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.1 kB)
Collecting gradio
  Downloading gradio-5.27.0-py3-none-any.whl.metadata (16 kB)
Collecting tensorboard~=2.19.0 (from tensorflow)
  Downloading tensorboard-2.19.0-py3-none-any.whl.metadata (1.8 kB)
Collecting ml-dtypes<1.0.0,>=0.5.1 (from tensorflow)
  Downloading ml_dtypes-0.5.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (21 kB)
Collecting aiofiles<25.0,>=22.0 (from gradio)
  Downloading aiofiles-24.1.0-py3-none-any.whl.metadata (10 kB)
Collecting fastapi<1.0,>=0.115.2 (from gradio)
  Downloading fastapi-0.115.12-py3-none-any.whl.metadata (27 kB)
Collecting ffmpy (from gradio)
  Downloading ffmpy-0.5.0-py3-none-any.whl.metadata (3.0 kB)
Collecting gradio-client==1.9.0 (from gradio)
  Downloading gradio_client-1.9.0-py3-none-any.whl.metadata (7.1 kB)
Collecting groovy~=0.1 (from gradio)
  Downloading groovy-0.1.2-py3-none-any.whl.met

In [None]:
!pip install gradio



In [None]:
!pip install tensorflow



In [None]:
!pip install pyspark pillow tensorflow gradio numpy



In [None]:
!pip install numpy



In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
from google.colab import drive
from pyspark.sql import SparkSession
from pyspark.sql.functions import lit, udf
from pyspark.ml.image import ImageSchema
from PIL import Image
from pyspark.ml import image
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, Input
import gradio as gr
from pyspark.sql.types import ArrayType, FloatType
import traceback
import numpy as np
from pyspark import StorageLevel
from tensorflow.keras.utils import to_categorical

In [None]:
train_dir = "/content/drive/MyDrive/chest_xrays/train/train"
test_dir = "/content/drive/MyDrive/chest_xrays/test/test"

In [None]:
import os

normal_count = len(os.listdir(os.path.join(train_dir, "NORMAL")))
pneumonia_count = len(os.listdir(os.path.join(train_dir, "PNEUMONIA")))

print(f"Normal Images: {normal_count}")
print(f"Pneumonia Images: {pneumonia_count}")


Normal Images: 1341
Pneumonia Images: 3875


In [None]:
# Configure Spark session
spark = SparkSession.builder \
    .appName("XRayImageClassification") \
    .master("local[4]") \
    .config("spark.executor.memory", "8g") \
    .config("spark.driver.memory", "8g") \
    .config("spark.driver.maxResultSize", "8g") \
    .config("spark.executor.cores", "4") \
    .getOrCreate()

IMG_WIDTH, IMG_HEIGHT = 150, 150
BATCH_SIZE = 32

In [None]:
# Function to load images from a directory and add labels
def load_images_spark(train_dir, label):
    images_df = spark.read.format("image").option("dropInvalid", "true").load(train_dir)
    images_df = images_df.withColumn("label", lit(label))  # Add label column
    return images_df

# Load NORMAL and PNEUMONIA images from the training set
normal_train_df = load_images_spark(os.path.join(train_dir, "NORMAL"), label=0).sample(withReplacement=False, fraction=1.0, seed=42).limit(500)
pneumonia_train_df = load_images_spark(os.path.join(train_dir, "PNEUMONIA"), label=1).sample(withReplacement=False, fraction=1.0, seed=42).limit(500)

# Combine the dataframes for the training set
train_df = normal_train_df.union(pneumonia_train_df)

# Show the DataFrame
train_df.show(5)

+--------------------+-----+
|               image|label|
+--------------------+-----+
|{file:///content/...|    0|
|{file:///content/...|    0|
|{file:///content/...|    0|
|{file:///content/...|    0|
|{file:///content/...|    0|
+--------------------+-----+
only showing top 5 rows



In [None]:
def preprocess_image(image_path):
    image_path = image_path.replace("file:", "")
    try:
        img = Image.open(image_path).convert("RGB")
        img = img.resize((IMG_WIDTH, IMG_HEIGHT))
        img_array = np.array(img) / 255.0  # Normalize pixel values
        return img_array.tolist()
    except Exception as e:
        print(f"Error processing image {image_path}: {e}")
        traceback.print_exc()
        return None

# Register UDF for image preprocessing
preprocess_udf = udf(preprocess_image, ArrayType(ArrayType(ArrayType(FloatType()))))

# Processing and persisting DataFrame
try:
    processed_df = train_df.withColumn("processed_image", preprocess_udf(train_df["image"]["origin"]))
    processed_df = processed_df.repartition(4)  # Reduce partitions to prevent memory overload
    processed_df.persist(StorageLevel.MEMORY_AND_DISK)
    processed_df.show(5)  # Display processed data to verify transformation
except Exception as e:
    print("Error applying preprocessing UDF to DataFrame:")
    traceback.print_exc()

# Image extraction function for RDD processing
def extract_image_data(row):
    try:
        return np.array(row['processed_image']), row['label']
    except Exception as e:
        print(f"Error extracting image data for row: {row}")
        traceback.print_exc()
        return None, None

# Collect preprocessed data
preprocessed_data = []

try:
    processed_rdd = processed_df.rdd.map(extract_image_data)
    preprocessed_data = processed_rdd.filter(lambda x: x[0] is not None).collect()
except Exception as e:
    print("Error collecting RDD into a list:")
    traceback.print_exc()


# (Code for collecting preprocessed_data remains the same up to this point)
# ...
# preprocessed_data = processed_rdd.filter(lambda x: x[0] is not None).collect() # Your existing line

print(f"Collected {len(preprocessed_data)} items after initial None filter.")

# Define the expected shape
expected_shape = (IMG_WIDTH, IMG_HEIGHT, 3)

# Filter for correct shape *before* converting to NumPy array
# Also checks if the item is indeed array-like (has a shape attribute)
valid_data = []
for item in preprocessed_data:
    img_array, label = item
    # Check if img_array is not None and looks like a numpy array (or list convertible to one)
    # and has the correct shape
    try:
         # Convert inner lists to numpy array first to check shape
         current_img_array = np.array(img_array)
         if current_img_array is not None and current_img_array.shape == expected_shape:
             valid_data.append((current_img_array, label)) # Append the numpy array, not the list
         else:
              print(f"Skipping item with unexpected shape: {current_img_array.shape if hasattr(current_img_array, 'shape') else 'Invalid Structure'}")
    except Exception as shape_check_e:
         print(f"Skipping item due to error during shape check: {shape_check_e}")


print(f"Retained {len(valid_data)} items after shape validation.")

# Converting to NumPy arrays using the filtered data
if valid_data:
    try:
        # Now create the final arrays from the validated data
        train_images = np.array([data[0] for data in valid_data]) # data[0] is already a numpy array here
        train_labels = np.array([data[1] for data in valid_data])

        print(f"train_images shape: {train_images.shape}, train_labels shape: {train_labels.shape}")

        # Save preprocessed data for future use
        np.save("train_images.npy", train_images)
        np.save("train_labels.npy", train_labels)
        print("Preprocessed data saved as NumPy files.")

    except Exception as e:
        print("Error converting VALIDATED data to NumPy arrays or saving files:")
        traceback.print_exc()
else:
    print("No valid preprocessed data available after shape validation for NumPy conversion.")

+--------------------+-----+--------------------+
|               image|label|     processed_image|
+--------------------+-----+--------------------+
|{file:///content/...|    0|[[[0.0, 0.0, 0.0]...|
|{file:///content/...|    0|[[[0.6901961, 0.6...|
|{file:///content/...|    0|[[[0.0, 0.0, 0.0]...|
|{file:///content/...|    0|[[[0.1882353, 0.1...|
|{file:///content/...|    0|[[[0.011764706, 0...|
+--------------------+-----+--------------------+
only showing top 5 rows

Collected 1000 items after initial None filter.
Retained 1000 items after shape validation.
train_images shape: (1000, 150, 150, 3), train_labels shape: (1000,)
Preprocessed data saved as NumPy files.


In [None]:
import os
import numpy as np

# Check if the preprocessed data exists
if os.path.exists("train_images.npy") and os.path.exists("train_labels.npy"):
    # Load preprocessed data
    train_images = np.load("train_images.npy")
    train_labels = np.load("train_labels.npy")
    print("Loaded preprocessed data from saved files.")
else:
    print("Preprocessed data not found. Please run the preprocessing again.")

Loaded preprocessed data from saved files.


In [None]:
# Define Baseline Model
baseline_model = Sequential([
    Input(shape=(IMG_WIDTH, IMG_HEIGHT, 3)),
    Conv2D(16, (3,3), activation='relu'),
    MaxPooling2D(2,2),
    Conv2D(32, (3,3), activation='relu'),
    MaxPooling2D(2,2),
    Flatten(),
    Dense(64, activation='relu'),
    Dropout(0.5),
    Dense(1, activation='sigmoid')
])

baseline_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
baseline_model.summary()


In [None]:
# Train Baseline Model
from tensorflow.keras.callbacks import EarlyStopping

early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

baseline_history = baseline_model.fit(
    train_images, train_labels,
    batch_size=BATCH_SIZE,
    epochs=10,
    validation_split=0.2,
    callbacks=[early_stopping]
)


Epoch 1/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 67ms/step - accuracy: 0.5078 - loss: 1.0475 - val_accuracy: 0.7900 - val_loss: 0.5731
Epoch 2/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 16ms/step - accuracy: 0.8516 - loss: 0.4441 - val_accuracy: 0.8900 - val_loss: 0.2793
Epoch 3/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 17ms/step - accuracy: 0.9152 - loss: 0.2176 - val_accuracy: 0.9000 - val_loss: 0.2218
Epoch 4/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step - accuracy: 0.9332 - loss: 0.1585 - val_accuracy: 0.9000 - val_loss: 0.2469
Epoch 5/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 16ms/step - accuracy: 0.9495 - loss: 0.1224 - val_accuracy: 0.9000 - val_loss: 0.2539
Epoch 6/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step - accuracy: 0.9478 - loss: 0.1114 - val_accuracy: 0.9100 - val_loss: 0.2549
Epoch 7/10
[1m25/25[0m [32m━━━━

In [None]:
# Evaluate Baseline Model on Training Data
train_loss_baseline, train_accuracy_baseline = baseline_model.evaluate(train_images, train_labels)
print(f"✅ Baseline Model Train Accuracy: {train_accuracy_baseline*100:.2f}%")



[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 0.9902 - loss: 0.0388
✅ Baseline Model Train Accuracy: 97.80%


In [None]:
model = Sequential([
    Input(shape=(IMG_WIDTH, IMG_HEIGHT, 3)),  # Define input layer here
    Conv2D(32, (3, 3), activation='relu'),
    MaxPooling2D(pool_size=(2, 2)),
    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D(pool_size=(2, 2)),
    Conv2D(128, (3, 3), activation='relu'),
    MaxPooling2D(pool_size=(2, 2)),
    Flatten(),
    Dense(128, activation='relu'),
    Dropout(0.5),  # Droping out 50% neurons to prevent overfitting
    Dense(1, activation="sigmoid")  # Adjust activation function if binary classification
])

# Compile the model before fitting
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])  # Or any suitable optimizer, loss, and metrics

In [None]:
import os
import numpy as np

# Check if the preprocessed data exists
if os.path.exists("train_images.npy") and os.path.exists("train_labels.npy"):
    # Load preprocessed data
    train_images = np.load("train_images.npy")
    train_labels = np.load("train_labels.npy")
    print("Loaded preprocessed data from saved files.")
else:
    print("Preprocessed data not found. Please run the preprocessing again.")

from tensorflow.keras.callbacks import EarlyStopping
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
model.fit(
    train_images, train_labels,
    batch_size=BATCH_SIZE,
    epochs=10,
    validation_split=0.2,
    callbacks=[early_stopping]
    )

Loaded preprocessed data from saved files.
Epoch 1/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 99ms/step - accuracy: 0.5351 - loss: 0.8867 - val_accuracy: 0.4500 - val_loss: 0.6672
Epoch 2/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 35ms/step - accuracy: 0.7222 - loss: 0.5186 - val_accuracy: 0.9000 - val_loss: 0.2824
Epoch 3/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 31ms/step - accuracy: 0.8977 - loss: 0.2370 - val_accuracy: 0.9200 - val_loss: 0.2286
Epoch 4/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 28ms/step - accuracy: 0.9252 - loss: 0.1980 - val_accuracy: 0.9200 - val_loss: 0.2083
Epoch 5/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 28ms/step - accuracy: 0.9567 - loss: 0.1500 - val_accuracy: 0.8950 - val_loss: 0.2630
Epoch 6/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 29ms/step - accuracy: 0.9492 - loss: 0.1597 - val_accuracy: 0.9200 - val_loss

<keras.src.callbacks.history.History at 0x7dad9b7b1110>

In [None]:
model.save("/content/drive/MyDrive/chest_xrays/model.keras")

In [None]:
from tensorflow.keras.models import load_model
model=load_model("/content/drive/MyDrive/chest_xrays/model.keras")
model.compile(optimizer='RMSprop', loss='binary_crossentropy', metrics=['accuracy'])

In [None]:
# -------------------
# Step 1: Load Test Data
# -------------------
import os
import numpy as np
from PIL import Image

# Load test images properly
def load_images_from_folder(folder_path):
    images = []
    labels = []
    for label_folder in os.listdir(folder_path):
        label_folder_path = os.path.join(folder_path, label_folder)
        label = 0 if label_folder == "NORMAL" else 1  # 0 for Normal, 1 for Pneumonia
        for filename in os.listdir(label_folder_path):
            img_path = os.path.join(label_folder_path, filename)
            try:
                img = Image.open(img_path).convert("RGB")
                img = img.resize((IMG_WIDTH, IMG_HEIGHT))
                img_array = np.array(img) / 255.0
                images.append(img_array)
                labels.append(label)
            except Exception as e:
                print(f"Error loading image {img_path}: {e}")
    return np.array(images), np.array(labels)

# Load Test Data
test_images, test_labels = load_images_from_folder(test_dir)
print(f"Test images shape: {test_images.shape}, Test labels shape: {test_labels.shape}")


Test images shape: (624, 150, 150, 3), Test labels shape: (624,)


In [None]:
import os
import numpy as np
from PIL import Image

# Load test images properly
def load_images_from_folder(folder_path):
    images = []
    labels = []
    for label_folder in os.listdir(folder_path):
        # Skip hidden files and directories like .DS_Store
        if label_folder.startswith("."):
            continue
        label_folder_path = os.path.join(folder_path, label_folder)
        label = 0 if label_folder == "NORMAL" else 1  # 0 for Normal, 1 for Pneumonia
        for filename in os.listdir(label_folder_path):
            # Skip hidden files within subdirectories
            if filename.startswith("."):
                continue
            img_path = os.path.join(label_folder_path, filename)
            try:
                img = Image.open(img_path).convert("RGB")
                img = img.resize((IMG_WIDTH, IMG_HEIGHT))
                img_array = np.array(img) / 255.0
                images.append(img_array)
                labels.append(label)
            except Exception as e:
                print(f"Error loading image {img_path}: {e}")
    return np.array(images), np.array(labels)

# Load Test Data
test_images, test_labels = load_images_from_folder(test_dir)
print(f"Test images shape: {test_images.shape}, Test labels shape: {test_labels.shape}")

Test images shape: (624, 150, 150, 3), Test labels shape: (624,)


In [None]:
test_loss, test_accuracy = model.evaluate(test_images, test_labels)
print(f"✅ Model Test Accuracy on unseen images: {test_accuracy * 100:.2f}%")


[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 51ms/step - accuracy: 0.9134 - loss: 0.4451
✅ Model Test Accuracy on unseen images: 72.44%


In [None]:
# Evaluate Baseline Model on Test Data
test_loss_baseline, test_accuracy_baseline = baseline_model.evaluate(test_images, test_labels)
print(f"✅ Baseline Model Test Accuracy: {test_accuracy_baseline*100:.2f}%")

[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 40ms/step - accuracy: 0.9178 - loss: 0.3673
✅ Baseline Model Test Accuracy: 75.32%


In [None]:
def classify_image(img):
    img = img.resize((IMG_WIDTH, IMG_HEIGHT))
    img_array = np.array(img) / 255.0
    img_array = img_array.reshape((1, IMG_WIDTH, IMG_HEIGHT, 3))

    prediction = model.predict(img_array)[0][0]
    print("Prediction Score:", prediction)

    threshold = 0.5  # You can tweak this to 0.4 or 0.6 based on performance
    if prediction > threshold:
        return " Pneumonia Detected"
    else:
        return " Normal Chest X-ray"


In [None]:
import random

# Randomly select and predict 5 images from test set
for i in range(5):
    idx = random.randint(0, len(test_images) - 1)
    img = test_images[idx]
    true_label = test_labels[idx]

    img_input = img.reshape(1, IMG_WIDTH, IMG_HEIGHT, 3)
    pred = model.predict(img_input)[0][0]
    predicted_label = "Pneumonia" if pred > 0.5 else "Normal"
    true_label_text = "Pneumonia" if true_label == 1 else "Normal"

    print(f"Image {i+1}: True Label = {true_label_text}, Predicted = {predicted_label}, Score = {pred}")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 653ms/step
Image 1: True Label = Normal, Predicted = Pneumonia, Score = 0.9974537491798401
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step
Image 2: True Label = Normal, Predicted = Pneumonia, Score = 0.951369047164917
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step
Image 3: True Label = Pneumonia, Predicted = Pneumonia, Score = 1.0
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step
Image 4: True Label = Pneumonia, Predicted = Pneumonia, Score = 0.9998936653137207
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step
Image 5: True Label = Pneumonia, Predicted = Pneumonia, Score = 0.9999897480010986


In [None]:

import gradio as gr

interface = gr.Interface(
    fn=classify_image,
    inputs=gr.Image(type="pil"),
    outputs="text",
    title="Chest X-Ray Pneumonia Detection (Final Version With Your Model)",
    description="Upload a chest X-ray image. Model will predict whether it shows signs of pneumonia or is normal."
)

interface.launch(share=True)  # share=True generates a public link


Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://f9cfbdaa792bcbcb24.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)




In [None]:
def classify_Baseline_image(img):
    img = img.resize((IMG_WIDTH, IMG_HEIGHT))
    img_array = np.array(img) / 255.0
    img_array = img_array.reshape((1, IMG_WIDTH, IMG_HEIGHT, 3))

    prediction = baseline_model.predict(img_array)[0][0]
    print("Prediction Score:", prediction)

    threshold = 0.5  # You can tweak this to 0.4 or 0.6 based on performance
    if prediction > threshold:
        return " Pneumonia Detected"
    else:
        return " Normal Chest X-ray"

In [None]:
import random

# Randomly select and predict 5 images from test set
for i in range(5):
    idx = random.randint(0, len(test_images) - 1)
    img = test_images[idx]
    true_label = test_labels[idx]

    img_input = img.reshape(1, IMG_WIDTH, IMG_HEIGHT, 3)
    pred = baseline_model.predict(img_input)[0][0]
    predicted_label = "Pneumonia" if pred > 0.5 else "Normal"
    true_label_text = "Pneumonia" if true_label == 1 else "Normal"

    print(f"Image {i+1}: True Label = {true_label_text}, Predicted = {predicted_label}, Score = {pred}")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 639ms/step
Image 1: True Label = Pneumonia, Predicted = Pneumonia, Score = 0.997994065284729
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 51ms/step
Image 2: True Label = Normal, Predicted = Pneumonia, Score = 0.6789387464523315
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 40ms/step
Image 3: True Label = Pneumonia, Predicted = Pneumonia, Score = 0.9999642372131348
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 46ms/step
Image 4: True Label = Pneumonia, Predicted = Pneumonia, Score = 0.9999734163284302
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 40ms/step
Image 5: True Label = Pneumonia, Predicted = Pneumonia, Score = 0.9999916553497314


In [None]:
import gradio as gr

interface = gr.Interface(
    fn=classify_Baseline_image,
    inputs=gr.Image(type="pil"),
    outputs="text",
    title="Chest X-Ray Pneumonia Detection (Final Version With BaseLine Model )",
    description="Upload a chest X-ray image. Model will predict whether it shows signs of pneumonia or is normal."
)

interface.launch(share=True)  # share=True generates a public link

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://893d5820004b4402f9.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)




# **Pushing the code to Github Repository**

In [None]:
!apt-get install git
#installing git

Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
git is already the newest version (1:2.34.1-1ubuntu1.12).
0 upgraded, 0 newly installed, 0 to remove and 34 not upgraded.


In [None]:
!git config --global user.email "lohithkumar51001@gmail.com"
!git config --global user.name "LohithKumar0510"

In [None]:
!git init

[33mhint: Using 'master' as the name for the initial branch. This default branch name[m
[33mhint: is subject to change. To configure the initial branch name to use in all[m
[33mhint: [m
[33mhint: 	git config --global init.defaultBranch <name>[m
[33mhint: [m
[33mhint: Names commonly chosen instead of 'master' are 'main', 'trunk' and[m
[33mhint: 'development'. The just-created branch can be renamed via this command:[m
[33mhint: [m
[33mhint: 	git branch -m <name>[m
Initialized empty Git repository in /content/.git/


In [4]:
!git remote add origin https://github.com/CSCE5218-Spring2025/module1-LohithKumar0510.git

In [5]:
!git add "DL code final_detection_of_xrays.ipynb"


fatal: pathspec 'DL code final_detection_of_xrays.ipynb' did not match any files


In [6]:
!ls

sample_data
