# STEP-1 
Importing the required libraries

In [None]:
import matplotlib.pyplot as plt
import numpy as np
from keras.models import Sequential
from keras.layers import Dense,MaxPooling2D,Conv2D,Flatten,Dropout
from keras.utils import to_categorical
import cv2
import os

# STEP-2 
Load the dataset

In [None]:
import kagglehub
path = kagglehub.dataset_download("tawsifurrahman/tuberculosis-tb-chest-xray-dataset")
data_path = os.path.join(path, "TB_Chest_Radiography_Database")
print(data_path)

In [None]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator


In [None]:
import pathlib
import shutil

In [None]:
data_dir = pathlib.Path(data_path)

In [None]:
img_count = len(list(data_dir.glob('*/*.png')))
img_count

In [None]:
data = os.listdir(data_path)

In [None]:
print(data)

In [None]:
img_types = {
    'Normal': list(data_dir.glob('Normal/*.png')),
    'Tuberculosis': list(data_dir.glob('Tuberculosis/*.png'))
}

In [None]:
import matplotlib.image as mpimg
fig, axs = plt.subplots(1,2)
i = 0
for key, value in img_types.items():
    img = mpimg.imread(str(img_types[key][3]))
    axs[i].imshow(img)
    axs[i].set_title(key)
    i+=1

In [None]:
labels = ['Normal', 'Tuberculosis']
size = [len(img_types['Normal']), len(img_types['Tuberculosis'])]

# Colors for categories
colors = ['#4CAF50', '#E74C3C']  # Green for Normal, Red for TB
explode = (0.05, 0.05)  # Slightly pull out both slices

plt.figure(figsize=(6,6))
plt.pie(
    size,
    labels=labels,
    autopct='%1.1f%%',
    startangle=140,
    colors=colors,
    explode=explode,
    shadow=True,
    textprops={'fontsize': 12, 'color': 'black'}
)

plt.title("Dataset Distribution: Normal vs Tuberculosis", fontsize=14, fontweight='bold')
plt.show()

In [None]:
# Calculate the number of samples in each class
num_normal = len(img_types['Normal'])
num_tb = len(img_types['Tuberculosis'])

# Determine the minority and majority class
if num_normal < num_tb:
    minority_class = 'Normal'
    majority_class = 'Tuberculosis'
    minority_count = num_normal
    majority_count = num_tb
else:
    minority_class = 'Tuberculosis'
    majority_class = 'Normal'
    minority_count = num_tb
    majority_count = num_normal

# Randomly select samples from the majority class to balance the dataset
random_indices = np.random.choice(range(majority_count), size=minority_count, replace=False)
balanced_majority_samples = np.array(img_types[majority_class])[random_indices]

# Update the image types dictionary with the balanced data
img_types[minority_class] = np.array(img_types[minority_class])
img_types[majority_class] = balanced_majority_samples

In [None]:
# Plot the updated data
labels = [minority_class, majority_class]
size = [minority_count, majority_count]
plt.pie(size, labels=labels, autopct='%1.1f%%')
plt.title('Updated Data Distribution')
plt.show()


In [None]:
from sklearn.utils import class_weight
# Set the image size and batch size
image_size = (224, 224)
batch_size = 32

# Create the image data generator
data_generator = ImageDataGenerator(rescale=1./255)

# Generate the image data for the balanced dataset
unbalanced_image_data = data_generator.flow_from_directory(
    data_path,
    target_size=image_size,
    batch_size=batch_size,
    class_mode='binary',
    shuffle=True,
    subset='training',
    classes=img_types.keys()
)

# Compute class weights
class_weights = class_weight.compute_class_weight(
    'balanced',
    classes=np.unique(unbalanced_image_data.classes),
    y=unbalanced_image_data.classes
)

In [None]:
class_weights_dict = {i: class_weights[i] for i in range(len(class_weights))}

In [None]:
# Generate the image data for the balanced dataset
balanced_image_data = data_generator.flow_from_directory(
    data_path,
    target_size=image_size,
    batch_size=batch_size,
    class_mode='binary',
    shuffle=True,
    subset='training',
    classes=img_types.keys()
)

In [None]:
from keras.layers import BatchNormalization

In [None]:
from tensorflow.keras.applications import EfficientNetB0
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, BatchNormalization, Flatten, Dropout, GlobalAveragePooling2D
from tensorflow.keras.optimizers import Adam

# Pretrained base model (EfficientNetB0 works very well on X-rays)
base_model = EfficientNetB0(
    include_top=False,        # remove the classification head
    weights="imagenet",       # use pretrained weights
    input_shape=(224, 224, 3) # your image size
)

# Freeze base model (only train the new layers first)
base_model.trainable = False

# Build model
model = Sequential([
    base_model,
    GlobalAveragePooling2D(),   # replaces Flatten for better feature compression
    Dense(128, activation='relu'),
    BatchNormalization(),
    Dropout(0.4),               # helps reduce overfitting
    Dense(1, activation='sigmoid') # binary classification (TB vs Normal)
])

# Compile
model.compile(
    optimizer=Adam(learning_rate=1e-4),
    loss='binary_crossentropy',
    metrics=['accuracy']
)

model.summary()


In [None]:
model.compile(optimizer='adam',loss='binary_crossentropy',metrics=['accuracy'])

In [None]:
model.fit(balanced_image_data,
                    epochs=20,
                    class_weight=class_weights_dict)

In [None]:
from sklearn.metrics import classification_report, confusion_matrix

In [None]:
loss, acc = model.evaluate_generator(balanced_image_data)
print("Loss:", loss)
print("Accuracy:", acc)


In [None]:
print("Accuracy Score:", accuracy_score)

**CNN CUSTOM**

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, BatchNormalization, Dropout, Input

model = Sequential()

# Always start with Input() layer in Sequential API
model.add(Input(shape=(224,224,3)))

model.add(Conv2D(16, kernel_size=(3, 3), activation='relu'))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Conv2D(32, kernel_size=(3, 3), activation='relu'))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Conv2D(64, kernel_size=(3, 3), activation='relu'))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(BatchNormalization())
model.add(Dropout(0.5))   # 🔹 Added dropout to reduce overfitting

model.add(Dense(1, activation='sigmoid'))

model.summary()


In [None]:
model.compile(optimizer='adam',loss='binary_crossentropy',metrics=['accuracy'])

In [None]:
model.fit(balanced_image_data,
                    epochs=20,
                    class_weight=class_weights_dict)

In [None]:
loss, acc = model.evaluate(balanced_image_data)
print("Loss:", loss)
print("Accuracy:", acc)
