In [1]:
import tensorflow as tf
from tensorflow.keras.applications import MobileNet
from tensorflow.keras.layers import GlobalAveragePooling2D, Dense, Dropout, Conv2D, DepthwiseConv2D, BatchNormalization, ReLU, Flatten
from tensorflow.keras.models import Model
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.optimizers import Adam
import os

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
dataset_dir = '/content/drive/My Drive/Tuberculosis/TB_Chest_Radiography_Database'

In [12]:
import os
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import MobileNet
from tensorflow.keras.layers import Input, Conv2D, DepthwiseConv2D, BatchNormalization, ReLU, GlobalAveragePooling2D, Dense, Dropout
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from sklearn.utils import class_weight
from tensorflow.keras.callbacks import EarlyStopping

# Define the dataset directory
# dataset_dir = '/path/to/your/dataset'  # Replace this with the actual path to your dataset

# Set image size and batch size
image_size = (75, 100)  # Image size after resizing
batch_size = 64  # Increase batch size

# Create ImageDataGenerator for data augmentation and rescaling
train_datagen = ImageDataGenerator(
    rescale=1./255,  # Normalize pixel values to [0, 1]
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    rotation_range=30,  # Add rotation to augment the dataset
    width_shift_range=0.2,
    height_shift_range=0.2,
    validation_split=0.2  # 20% of data will be used for validation
)

validation_datagen = ImageDataGenerator(
    rescale=1./255,  # Only rescaling for validation
    validation_split=0.2
)

# Load the training data using flow_from_directory with validation split
train_generator = train_datagen.flow_from_directory(
    dataset_dir,
    target_size=image_size,
    batch_size=batch_size,
    class_mode='categorical',  # For multi-class classification
    subset='training'  # Training data
)

# Load the validation data using flow_from_directory
validation_generator = validation_datagen.flow_from_directory(
    dataset_dir,
    target_size=image_size,
    batch_size=batch_size,
    class_mode='categorical',  # For multi-class classification
    subset='validation'  # Validation data
)

import numpy as np
from sklearn.utils import class_weight

# Calculate class weights to handle class imbalance
class_weights = class_weight.compute_class_weight(
    'balanced',
    classes=np.array([0, 1]),  # Convert to numpy array
    y=train_generator.classes
)

# Convert class_weights to dictionary
class_weight_dict = {i: class_weights[i] for i in range(len(class_weights))}

# Input shape to match the shape of training data
input_cnn = Input(shape=(75, 100, 3), name='input_cnn')

# Custom MobileNet Block Definition
def mobilenet_block(x, filters, kernel_size=(3, 3), strides=(1, 1)):
    x = DepthwiseConv2D(kernel_size=kernel_size, strides=strides, padding='same', use_bias=False)(x)
    x = BatchNormalization()(x)
    x = ReLU(6.)(x)
    x = Conv2D(filters, kernel_size=(1, 1), padding='same', use_bias=False)(x)
    x = BatchNormalization()(x)
    x = ReLU(6.)(x)
    return x

# Initial Conv2D layer
cnn_x = Conv2D(32, kernel_size=(3, 3), strides=(2, 2), padding='same', use_bias=False)(input_cnn)
cnn_x = BatchNormalization()(cnn_x)
cnn_x = ReLU(6.)(cnn_x)

# MobileNet blocks
cnn_x = mobilenet_block(cnn_x, 64)
cnn_x = mobilenet_block(cnn_x, 128, strides=(2, 2))
cnn_x = mobilenet_block(cnn_x, 128)
cnn_x = mobilenet_block(cnn_x, 256, strides=(2, 2))
cnn_x = mobilenet_block(cnn_x, 256)
cnn_x = mobilenet_block(cnn_x, 512, strides=(2, 2))

# Flatten and Dense layers
cnn_x = GlobalAveragePooling2D()(cnn_x)
cnn_x = Dense(256, activation='relu')(cnn_x)
cnn_x = Dropout(0.5)(cnn_x)

# Number of classes should match the number of unique classes in the dataset
num_classes = len(train_generator.class_indices)

cnn_output = Dense(num_classes, activation='softmax')(cnn_x)

# Define the model
combined_model = Model(inputs=input_cnn, outputs=cnn_output, name='combined_model')

# Model summary
combined_model.summary()

# Compile the model
combined_model.compile(optimizer=Adam(learning_rate=0.0001), loss='categorical_crossentropy', metrics=['accuracy'])

# Add early stopping to avoid overfitting
early_stopping = EarlyStopping(monitor='val_accuracy', patience=5, restore_best_weights=True)

# Train the model
combined_model.fit(
    train_generator,
    steps_per_epoch=train_generator.samples // batch_size,
    epochs=5,  # Try more epochs for better convergence
    validation_data=validation_generator,
    validation_steps=validation_generator.samples // batch_size,
    class_weight=class_weight_dict,  # Add class weights to handle imbalance
    callbacks=[early_stopping]  # Add early stopping callback
)

# Save the model
combined_model.save('final_model.h5')

# Evaluate the model on validation data
validation_loss, validation_acc = combined_model.evaluate(validation_generator)
print(f'Validation Accuracy: {validation_acc * 100:.2f}%')


Found 3360 images belonging to 2 classes.
Found 840 images belonging to 2 classes.


Epoch 1/5


  self._warn_if_super_not_called()


[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m160s[0m 3s/step - accuracy: 0.5912 - loss: 0.6662 - val_accuracy: 0.8341 - val_loss: 0.6084
Epoch 2/5
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.7500 - loss: 0.4258 - val_accuracy: 0.7500 - val_loss: 0.6297
Epoch 3/5


  self.gen.throw(typ, value, traceback)


[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m189s[0m 2s/step - accuracy: 0.8538 - loss: 0.3675 - val_accuracy: 0.8341 - val_loss: 0.5216
Epoch 4/5
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 58ms/step - accuracy: 0.9219 - loss: 0.2209 - val_accuracy: 0.7500 - val_loss: 0.5790
Epoch 5/5
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m150s[0m 3s/step - accuracy: 0.9026 - loss: 0.2473 - val_accuracy: 0.8317 - val_loss: 0.4734




[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 1s/step - accuracy: 0.8376 - loss: 0.6074
Validation Accuracy: 83.33%
