In [None]:
!pip install -U -q kaggle
!mkdir -p ~/.kaggle

In [None]:
#Download your api kaggle.jdon from kaggle and upload it here
from google.colab import files
files.upload()

: 

In [None]:
! mkdir ~/.kaggle
! cp kaggle.json ~/.kaggle/

mkdir: cannot create directory ‘/root/.kaggle’: File exists


In [None]:
! chmod 600 ~/.kaggle/kaggle.json
! kaggle datasets download -d tawsifurrahman/tuberculosis-tb-chest-xray-dataset

Dataset URL: https://www.kaggle.com/datasets/tawsifurrahman/tuberculosis-tb-chest-xray-dataset
License(s): copyright-authors
Downloading tuberculosis-tb-chest-xray-dataset.zip to /content
100% 662M/663M [00:32<00:00, 22.8MB/s]
100% 663M/663M [00:32<00:00, 21.5MB/s]


In [None]:
#unziping the file
from zipfile import ZipFile
file_name = '/content/tuberculosis-tb-chest-xray-dataset.zip'

with ZipFile(file_name, 'r') as zip:
  zip.extractall()
  print('Done')

Done


In [None]:
! pip install split-folders


Collecting split-folders
  Downloading split_folders-0.5.1-py3-none-any.whl (8.4 kB)
Installing collected packages: split-folders
Successfully installed split-folders-0.5.1


In [None]:
import os
import numpy as np
import cv2
from sklearn.model_selection import train_test_split
import tensorflow as tf

# Define paths
data_dir = '/content/TB_Chest_Radiography_Database'
normal_dir = os.path.join(data_dir, 'Normal')
tb_dir = os.path.join(data_dir, 'Tuberculosis')

# Load images and labels
normal_images = [os.path.join(normal_dir, img) for img in os.listdir(normal_dir)]
tb_images = [os.path.join(tb_dir, img) for img in os.listdir(tb_dir)]
images = normal_images + tb_images
labels = [0] * len(normal_images) + [1] * len(tb_images)  # 0 for normal, 1 for tuberculosis

# Resize and normalize images
def preprocess_image(img_path):
    img = cv2.imread(img_path)
    img = cv2.resize(img, (224, 224))  # Resize to common size
    img = img.astype(np.float32) / 255.0  # Normalize pixel values
    return img

# Preprocess all images
processed_images = [preprocess_image(img) for img in images]

# Split dataset into train and test sets
train_images, test_images, train_labels, test_labels = train_test_split(processed_images, labels, test_size=0.2, random_state=42)

# Convert to TensorFlow tensors
train_dataset = tf.data.Dataset.from_tensor_slices((train_images, train_labels))
test_dataset = tf.data.Dataset.from_tensor_slices((test_images, test_labels))

# Shuffle and batch the datasets
BATCH_SIZE = 32
train_dataset = train_dataset.shuffle(buffer_size=len(train_images)).batch(BATCH_SIZE)
test_dataset = test_dataset.batch(BATCH_SIZE)

# Count the number of samples in each dataset
train_count = len(train_images)
test_count = len(test_images)

# Print the number of samples in each dataset
print("Number of samples in the Train Dataset:", train_count)
print("Number of samples in the Test Dataset:", test_count)


: 

In [None]:
from tensorflow.keras import layers, models, optimizers
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint

# Define DenseNet model architecture
def create_DenseNet(input_shape, num_classes, learning_rate=0.001, dropout_rate=0.5):
    model = models.Sequential([
        layers.Conv2D(64, (7, 7), strides=(2, 2), padding='same', activation='relu', input_shape=input_shape),
        layers.MaxPooling2D((3, 3), strides=(2, 2), padding='same'),
        layers.BatchNormalization(),
        layers.Dropout(dropout_rate),

        layers.Conv2D(128, (1, 1), strides=(1, 1), padding='same', activation='relu'),
        layers.Conv2D(32, (3, 3), strides=(1, 1), padding='same', activation='relu'),
        layers.BatchNormalization(),
        layers.Dropout(dropout_rate),

        layers.Conv2D(128, (1, 1), strides=(1, 1), padding='same', activation='relu'),
        layers.Conv2D(32, (3, 3), strides=(1, 1), padding='same', activation='relu'),
        layers.BatchNormalization(),
        layers.Dropout(dropout_rate),

        layers.Conv2D(128, (1, 1), strides=(1, 1), padding='same', activation='relu'),
        layers.Conv2D(32, (3, 3), strides=(1, 1), padding='same', activation='relu'),
        layers.BatchNormalization(),
        layers.Dropout(dropout_rate),

        layers.Conv2D(128, (1, 1), strides=(1, 1), padding='same', activation='relu'),
        layers.Conv2D(32, (3, 3), strides=(1, 1), padding='same', activation='relu'),
        layers.BatchNormalization(),
        layers.Dropout(dropout_rate),

        layers.Flatten(),
        layers.Dense(num_classes, activation='softmax')
    ])

    # Compile the model
    optimizer = optimizers.Adam(lr=learning_rate)
    model.compile(optimizer=optimizer,
                  loss='sparse_categorical_crossentropy',
                  metrics=['accuracy'])

    return model

# Define input shape and number of classes
input_shape = (224, 224, 3)
num_classes = 2  # 2 classes: Normal and Tuberculosis

# Define hyperparameters for tuning
learning_rates = [0.001, 0.0001]
dropout_rates = [0.5, 0.7]
epochs = 20
batch_size = 32

best_model = None
best_accuracy = 0.0

# Perform hyperparameter tuning
for learning_rate in learning_rates:
    for dropout_rate in dropout_rates:
        print(f"Training model with learning rate: {learning_rate}, dropout rate: {dropout_rate}")

        # Create the DenseNet model
        model = create_DenseNet(input_shape, num_classes, learning_rate, dropout_rate)

        # Define callbacks
        early_stopping = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)
        model_checkpoint = ModelCheckpoint(filepath='densenet_best_model.h5', monitor='val_loss', save_best_only=True)

        # Train the model
        history = model.fit(train_dataset, epochs=epochs, batch_size=batch_size,
                            validation_data=test_dataset, callbacks=[early_stopping, model_checkpoint])

        # Evaluate the model
        _, test_accuracy = model.evaluate(test_dataset)

        print(f"Test accuracy for current configuration: {test_accuracy}")

        # Check if current model is the best so far
        if test_accuracy > best_accuracy:
            best_accuracy = test_accuracy
            best_model = model

print("Hyperparameter tuning completed.")
print(f"Best test accuracy: {best_accuracy}")

# Save the best model
best_model.save('best_densenet_model.h5')


: 

In [None]:
import numpy as np
from sklearn.metrics import confusion_matrix, classification_report
import matplotlib.pyplot as plt
import seaborn as sns
from tensorflow.keras.models import load_model

# Load the best model
best_model = load_model('best_densenet_model.h5')

# Predict on the test dataset
y_pred = np.argmax(best_model.predict(test_dataset), axis=-1)

# Get true labels
y_true = np.concatenate([y for x, y in test_dataset], axis=0)

# Calculate the confusion matrix
cm = confusion_matrix(y_true, y_pred)

# Calculate performance metrics
accuracy = np.trace(cm) / np.sum(cm)
precision = cm[1, 1] / np.sum(cm[:, 1])
recall = cm[1, 1] / np.sum(cm[1, :])
sensitivity = recall

# Print the performance metrics
print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)
print("Sensitivity:", sensitivity)

# Generate a heatmap for the confusion matrix
plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", xticklabels=["Normal", "Tuberculosis"], yticklabels=["Normal", "Tuberculosis"])
plt.xlabel('Predicted labels')
plt.ylabel('True labels')
plt.title('Confusion Matrix')
plt.show()

# Generate classification report
print(classification_report(y_true, y_pred, target_names=["Normal", "Tuberculosis"]))


: 

In [None]:
import os
import numpy as np
import cv2
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow.keras import layers, models, optimizers
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint

# Define paths
data_dir = '/content/TB_Chest_Radiography_Database'
normal_dir = os.path.join(data_dir, 'Normal')
tb_dir = os.path.join(data_dir, 'Tuberculosis')

# Load images and labels
normal_images = [os.path.join(normal_dir, img) for img in os.listdir(normal_dir)]
tb_images = [os.path.join(tb_dir, img) for img in os.listdir(tb_dir)]
images = normal_images + tb_images
labels = [0] * len(normal_images) + [1] * len(tb_images)  # 0 for normal, 1 for tuberculosis

# Resize and normalize images
def preprocess_image(img_path):
    img = cv2.imread(img_path)
    img = cv2.resize(img, (224, 224))  # Resize to common size
    img = img.astype(np.float32) / 255.0  # Normalize pixel values
    return img

# Preprocess all images
processed_images = [preprocess_image(img) for img in images]

# Split dataset into train and test sets
train_images, test_images, train_labels, test_labels = train_test_split(processed_images, labels, test_size=0.2, random_state=42)

# Convert to TensorFlow tensors
train_dataset = tf.data.Dataset.from_tensor_slices((train_images, train_labels))
test_dataset = tf.data.Dataset.from_tensor_slices((test_images, test_labels))

# Shuffle and batch the datasets
BATCH_SIZE = 32
train_dataset = train_dataset.shuffle(buffer_size=len(train_images)).batch(BATCH_SIZE)
test_dataset = test_dataset.batch(BATCH_SIZE)

# Define DenseNet model architecture
def create_DenseNet(input_shape, num_classes, learning_rate=0.001, dropout_rate=0.5):
    model = models.Sequential([
        layers.Conv2D(64, (7, 7), strides=(2, 2), padding='same', activation='relu', input_shape=input_shape),
        layers.MaxPooling2D((3, 3), strides=(2, 2), padding='same'),
        layers.BatchNormalization(),
        layers.Dropout(dropout_rate),

        layers.Conv2D(128, (1, 1), strides=(1, 1), padding='same', activation='relu'),
        layers.Conv2D(32, (3, 3), strides=(1, 1), padding='same', activation='relu'),
        layers.BatchNormalization(),
        layers.Dropout(dropout_rate),

        layers.Conv2D(128, (1, 1), strides=(1, 1), padding='same', activation='relu'),
        layers.Conv2D(32, (3, 3), strides=(1, 1), padding='same', activation='relu'),
        layers.BatchNormalization(),
        layers.Dropout(dropout_rate),

        layers.Conv2D(128, (1, 1), strides=(1, 1), padding='same', activation='relu'),
        layers.Conv2D(32, (3, 3), strides=(1, 1), padding='same', activation='relu'),
        layers.BatchNormalization(),
        layers.Dropout(dropout_rate),

        layers.Conv2D(128, (1, 1), strides=(1, 1), padding='same', activation='relu'),
        layers.Conv2D(32, (3, 3), strides=(1, 1), padding='same', activation='relu'),
        layers.BatchNormalization(),
        layers.Dropout(dropout_rate),

        layers.Flatten(),
        layers.Dense(num_classes, activation='softmax')
    ])

    # Compile the model
    optimizer = optimizers.Adam(learning_rate=learning_rate)
    model.compile(optimizer=optimizer,
                  loss='sparse_categorical_crossentropy',
                  metrics=['accuracy'])

    return model

# Define input shape and number of classes
input_shape = (224, 224, 3)
num_classes = 2  # 2 classes: Normal and Tuberculosis

# Define hyperparameters for tuning
learning_rates = [0.001, 0.0001]
dropout_rates = [0.5, 0.7]
epochs = 10
batch_size = 32
early_stopping_patience = 5  # Adjusted early stopping patience

best_model = None
best_accuracy = 0.0

# Perform hyperparameter tuning
for learning_rate in learning_rates:
    for dropout_rate in dropout_rates:
        print(f"Training model with learning rate: {learning_rate}, dropout rate: {dropout_rate}")

        # Create the DenseNet model
        model = create_DenseNet(input_shape, num_classes, learning_rate, dropout_rate)

        # Define callbacks
        early_stopping = EarlyStopping(monitor='val_loss', patience=early_stopping_patience, restore_best_weights=True)
        model_checkpoint = ModelCheckpoint(filepath='densenet_best_model.h5', monitor='val_loss', save_best_only=True)

        # Train the model
        history = model.fit(train_dataset, epochs=epochs, batch_size=batch_size,
                            validation_data=test_dataset, callbacks=[early_stopping, model_checkpoint])

        # Evaluate the model
        _, test_accuracy = model.evaluate(test_dataset)

        print(f"Test accuracy for current configuration: {test_accuracy}")

        # Check if current model is the best so far
        if test_accuracy > best_accuracy:
            best_accuracy = test_accuracy
            best_model = model

print("Hyperparameter tuning completed.")
print(f"Best test accuracy: {best_accuracy}")

# Save the best model
best_model.save('best_densenet_model.h5')


: 

In [None]:
import numpy as np
from sklearn.metrics import confusion_matrix, classification_report
import matplotlib.pyplot as plt
import seaborn as sns
from tensorflow.keras.models import load_model

# Load the best model
best_model = load_model('best_densenet_model.h5')

# Predict on the test dataset
y_pred = np.argmax(best_model.predict(test_dataset), axis=-1)

# Get true labels
y_true = np.concatenate([y for x, y in test_dataset], axis=0)

# Calculate the confusion matrix
cm = confusion_matrix(y_true, y_pred)

# Calculate performance metrics
accuracy = np.trace(cm) / np.sum(cm)
precision = cm[1, 1] / np.sum(cm[:, 1])
recall = cm[1, 1] / np.sum(cm[1, :])
sensitivity = recall

# Print the performance metrics
print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)
print("Sensitivity:", sensitivity)

# Generate a heatmap for the confusion matrix
plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", xticklabels=["Normal", "Tuberculosis"], yticklabels=["Normal", "Tuberculosis"])
plt.xlabel('Predicted labels')
plt.ylabel('True labels')
plt.title('Confusion Matrix')
plt.show()

# Generate classification report
print(classification_report(y_true, y_pred, target_names=["Normal", "Tuberculosis"]))


: 

In [None]:
import os
import numpy as np
import cv2
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow.keras import layers, models, optimizers
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint

# Define paths
data_dir = '/content/TB_Chest_Radiography_Database'
normal_dir = os.path.join(data_dir, 'Normal')
tb_dir = os.path.join(data_dir, 'Tuberculosis')

# Load images and labels
normal_images = [os.path.join(normal_dir, img) for img in os.listdir(normal_dir)]
tb_images = [os.path.join(tb_dir, img) for img in os.listdir(tb_dir)]
images = normal_images + tb_images
labels = [0] * len(normal_images) + [1] * len(tb_images)  # 0 for normal, 1 for tuberculosis

# Resize and normalize images
def preprocess_image(img_path):
    img = cv2.imread(img_path)
    img = cv2.resize(img, (224, 224))  # Resize to common size
    img = img.astype(np.float32) / 255.0  # Normalize pixel values
    return img

# Preprocess all images
processed_images = [preprocess_image(img) for img in images]

# Split dataset into train and test sets
train_images, test_images, train_labels, test_labels = train_test_split(processed_images, labels, test_size=0.2, random_state=42)

# Convert to TensorFlow tensors
train_dataset = tf.data.Dataset.from_tensor_slices((train_images, train_labels))
test_dataset = tf.data.Dataset.from_tensor_slices((test_images, test_labels))

# Shuffle and batch the datasets
BATCH_SIZE = 32
train_dataset = train_dataset.shuffle(buffer_size=len(train_images)).batch(BATCH_SIZE)
test_dataset = test_dataset.batch(BATCH_SIZE)

# Define DenseNet model architecture
def create_DenseNet(input_shape, num_classes, learning_rate=0.001, dropout_rate=0.5):
    model = models.Sequential([
        layers.Conv2D(64, (7, 7), strides=(2, 2), padding='same', activation='relu', input_shape=input_shape),
        layers.MaxPooling2D((3, 3), strides=(2, 2), padding='same'),
        layers.BatchNormalization(),
        layers.Dropout(dropout_rate),

        layers.Conv2D(128, (1, 1), strides=(1, 1), padding='same', activation='relu'),
        layers.Conv2D(32, (3, 3), strides=(1, 1), padding='same', activation='relu'),
        layers.BatchNormalization(),
        layers.Dropout(dropout_rate),

        layers.Conv2D(128, (1, 1), strides=(1, 1), padding='same', activation='relu'),
        layers.Conv2D(32, (3, 3), strides=(1, 1), padding='same', activation='relu'),
        layers.BatchNormalization(),
        layers.Dropout(dropout_rate),

        layers.Conv2D(128, (1, 1), strides=(1, 1), padding='same', activation='relu'),
        layers.Conv2D(32, (3, 3), strides=(1, 1), padding='same', activation='relu'),
        layers.BatchNormalization(),
        layers.Dropout(dropout_rate),

        layers.Conv2D(128, (1, 1), strides=(1, 1), padding='same', activation='relu'),
        layers.Conv2D(32, (3, 3), strides=(1, 1), padding='same', activation='relu'),
        layers.BatchNormalization(),
        layers.Dropout(dropout_rate),

        layers.Flatten(),
        layers.Dense(num_classes, activation='softmax')
    ])

    # Compile the model
    optimizer = optimizers.Adam(learning_rate=learning_rate)
    model.compile(optimizer=optimizer,
                  loss='sparse_categorical_crossentropy',
                  metrics=['accuracy'])

    return model

# Define input shape and number of classes
input_shape = (224, 224, 3)
num_classes = 2  # 2 classes: Normal and Tuberculosis

# Define hyperparameters for tuning
learning_rates = [0.001, 0.0001]
dropout_rates = [0.5, 0.7]
epochs = 10
batch_size = 32
early_stopping_patience = 5  # Adjusted early stopping patience

best_model = None
best_accuracy = 0.0

# Perform hyperparameter tuning
for learning_rate in learning_rates:
    for dropout_rate in dropout_rates:
        print(f"Training model with learning rate: {learning_rate}, dropout rate: {dropout_rate}")

        # Create the DenseNet model
        model = create_DenseNet(input_shape, num_classes, learning_rate, dropout_rate)

        # Define callbacks
        early_stopping = EarlyStopping(monitor='val_loss', patience=early_stopping_patience, restore_best_weights=True)
        model_checkpoint = ModelCheckpoint(filepath='densenet_best_model.h5', monitor='val_loss', save_best_only=True)

        # Train the model
        history = model.fit(train_dataset, epochs=epochs, batch_size=batch_size,
                            validation_data=test_dataset, callbacks=[early_stopping, model_checkpoint])

        # Evaluate the model
        _, test_accuracy = model.evaluate(test_dataset)

        print(f"Test accuracy for current configuration: {test_accuracy}")

        # Check if current model is the best so far
        if test_accuracy > best_accuracy:
            best_accuracy = test_accuracy
            best_model = model

print("Hyperparameter tuning completed.")
print(f"Best test accuracy: {best_accuracy}")

# Save the best model
best_model.save('best_densenet_model.h5')


Training model with learning rate: 0.001, dropout rate: 0.5
Epoch 1/10
Epoch 2/10
  1/105 [..............................] - ETA: 8s - loss: 0.1065 - accuracy: 0.9688

  saving_api.save_model(


Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Test accuracy for current configuration: 0.9630952477455139
Training model with learning rate: 0.001, dropout rate: 0.7
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Test accuracy for current configuration: 0.8107143044471741
Training model with learning rate: 0.0001, dropout rate: 0.5
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Test accuracy for current configuration: 0.9119047522544861
Training model with learning rate: 0.0001, dropout rate: 0.7
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Test accuracy for current configuration: 0.836904764175415
Hyperparameter tuning completed.
Best test accuracy: 0.9630952477455139


In [None]:
import numpy as np
from sklearn.metrics import confusion_matrix, classification_report
import matplotlib.pyplot as plt
import seaborn as sns
from tensorflow.keras.models import load_model

# Load the best model
best_model = load_model('best_densenet_model.h5')

# Predict on the test dataset
y_pred = np.argmax(best_model.predict(test_dataset), axis=-1)

# Get true labels
y_true = np.concatenate([y for x, y in test_dataset], axis=0)

# Calculate the confusion matrix
cm = confusion_matrix(y_true, y_pred)

# Calculate performance metrics
accuracy = np.trace(cm) / np.sum(cm)
precision = cm[1, 1] / np.sum(cm[:, 1])
recall = cm[1, 1] / np.sum(cm[1, :])
sensitivity = recall

# Print the performance metrics
print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)
print("Sensitivity:", sensitivity)

# Generate a heatmap for the confusion matrix
plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", xticklabels=["Normal", "Tuberculosis"], yticklabels=["Normal", "Tuberculosis"])
plt.xlabel('Predicted labels')
plt.ylabel('True labels')
plt.title('Confusion Matrix')
plt.show()

# Generate classification report
print(classification_report(y_true, y_pred, target_names=["Normal", "Tuberculosis"]))


: 