In [6]:
import os
import tensorflow as tf
from tensorflow.keras.layers import Dense, Dropout, Conv2D, MaxPooling2D, Flatten
from tensorflow.keras import models,layers
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.models import Model
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint
from tensorflow.keras.metrics import binary_accuracy
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import BatchNormalization , MaxPool2D


import matplotlib.pyplot as plt

In [7]:
IMAGE_SIZE = 256
IMAGE_HEIGHT = 256
IMAGE_WIDTH = 256
BATCH_SIZE = 32
CHANNELS=3
EPOCHS = 20

# Fetching Data

In [8]:
dataset = tf.keras.preprocessing.image_dataset_from_directory(
    "Dataset",
    shuffle = True,
    image_size = (IMAGE_SIZE,IMAGE_SIZE),
    batch_size = BATCH_SIZE,
)

Found 5000 files belonging to 2 classes.


In [9]:
class_names = dataset.class_names
class_names

['Lung_Normal', 'Lung_Tuberculosis']

# Splitting Data

In [10]:
#fuction to train test split data : 

def get_dataset_partitions_tf(ds, train_split = 0.8 , val_split = 0.1 , test_split=0.1, shuffle=True,shuffle_size = 10000):
    
    if(shuffle):
        ds.shuffle(shuffle_size,seed=12)
    
    ds_size = len(ds)
    
    train_size = int(ds_size * train_split)
    validation_size = int(ds_size* val_split)
    
    train_dataset = ds.take(train_size)
    
    remaining = ds.skip(train_size)
    validation_dataset = remaining.take(validation_size)
    
    test_dataset = remaining.skip(validation_size)
    
    
    return train_dataset , validation_dataset , test_dataset

In [11]:
train_ds , val_ds , test_ds = get_dataset_partitions_tf(dataset)

In [12]:
#using cache and prefetch to improve performance during training 

train_ds = train_ds.cache().shuffle(1000).prefetch(buffer_size = tf.data.AUTOTUNE)
val_ds = val_ds.cache().shuffle(1000).prefetch(buffer_size = tf.data.AUTOTUNE)
test_ds = test_ds.cache().shuffle(1000).prefetch(buffer_size = tf.data.AUTOTUNE)

In [13]:
#rescaling image rgb to number between 0 and 1 

resize_and_rescale = tf.keras.Sequential([
    layers.experimental.preprocessing.Resizing(IMAGE_HEIGHT,IMAGE_WIDTH),
    layers.experimental.preprocessing.Rescaling(1.0/255),
])


In [14]:
# Using data augumentaion => creating flipped images, rotated , more contrasting , zoomed images 
                            #from original dataset to get a good training sample
    
data_augmentation = tf.keras.Sequential([
    layers.experimental.preprocessing.RandomFlip("horizontal_and_vertical"),
    layers.experimental.preprocessing.RandomRotation(0.2),
#     layers.experimental.preprocessing.RandomZoom(0.2),
#     layers.experimental.preprocessing.RandomContrast(0.2),
#     layers.experimental.preprocessing.RandomTranslation(height_factor=0.2, width_factor=0.2)
])

# Defining CNN Model


In [19]:
model = Sequential()
mode.add(resize_and_rescale)
model.add(data_augmentation)
model.add(Conv2D(32 , (3,3) , strides = 1 , padding = 'same' , activation = 'relu' , input_shape = (256,256,3)))
model.add(BatchNormalization())
model.add(MaxPool2D((2,2) , strides = 2 , padding = 'same'))
model.add(Conv2D(64 , (3,3) , strides = 1 , padding = 'same' , activation = 'relu'))
model.add(Dropout(0.1))
model.add(BatchNormalization())
model.add(MaxPool2D((2,2) , strides = 2 , padding = 'same'))
model.add(Conv2D(64 , (3,3) , strides = 1 , padding = 'same' , activation = 'relu'))
model.add(BatchNormalization())
model.add(MaxPool2D((2,2) , strides = 2 , padding = 'same'))
model.add(Conv2D(128 , (3,3) , strides = 1 , padding = 'same' , activation = 'relu'))
model.add(Dropout(0.2))
model.add(BatchNormalization())
model.add(MaxPool2D((2,2) , strides = 2 , padding = 'same'))
model.add(Conv2D(256 , (3,3) , strides = 1 , padding = 'same' , activation = 'relu'))
model.add(Dropout(0.2))
model.add(BatchNormalization())
model.add(MaxPool2D((2,2) , strides = 2 , padding = 'same'))
model.add(Flatten())
model.add(Dense(units = 128 , activation = 'relu'))
model.add(Dropout(0.2))
model.add(Dense(units = 1 , activation = 'sigmoid'))
model.compile(optimizer = "rmsprop" , loss = 'binary_crossentropy' , metrics = ['accuracy'])
model.summary()

# Train the model
history = model.fit(train_ds, epochs=EPOCHS,verbose=1, validation_data=val_ds)

# Evaluate the model
test_loss, test_acc = model.evaluate(test_ds)
print('Test accuracy:', test_acc)


Model: "sequential_6"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_8 (Conv2D)           (None, 256, 256, 32)      896       
                                                                 
 batch_normalization_7 (Batc  (None, 256, 256, 32)     128       
 hNormalization)                                                 
                                                                 
 max_pooling2d_6 (MaxPooling  (None, 128, 128, 32)     0         
 2D)                                                             
                                                                 
 conv2d_9 (Conv2D)           (None, 128, 128, 64)      18496     
                                                                 
 dropout_4 (Dropout)         (None, 128, 128, 64)      0         
                                                                 
 batch_normalization_8 (Batc  (None, 128, 128, 64)    

In [20]:
model.save('latest_model.h5')

In [4]:
model = models.load_model('tuberculosis_model.h5')

In [15]:
history = model.fit(train_ds, epochs=20,verbose=1, validation_data=val_ds)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [16]:
# Evaluate the model
test_loss, test_acc = model.evaluate(test_ds)
print('Test accuracy:', test_acc)

Test accuracy: 0.942307710647583


In [17]:
model.save('tuberculosis_model.h5')