In [None]:
import urllib.request
import os
import zipfile
import random

import tensorflow as tf
from tensorflow import keras as ks
from tensorflow.keras.preprocessing.image import ImageDataGenerator, load_img, img_to_array
from tensorflow.keras import layers, Model
from tensorflow.keras.applications.inception_v3 import InceptionV3
from shutil import copyfile
import matplotlib.pyplot as plt
import cv2 as cv
import numpy as np
import warnings

warnings.filrwarnings('ignore')

In [None]:
data_url ='https://storage.googleapis.com/kaggle-data-sets/891819/2332307/bundle/archive.zip?X-Goog-Algorithm=GOOG4-RSA-SHA256&X-Goog-Credential=gcp-kaggle-com%40kaggle-161607.iam.gserviceaccount.com%2F20220316%2Fauto%2Fstorage%2Fgoog4_request&X-Goog-Date=20220316T020013Z&X-Goog-Expires=259199&X-Goog-SignedHeaders=host&X-Goog-Signature=02349f5f34b70ab7858629b69b2ac2b23986f1dad499f559c09a805cf06956d41310b33473cc9ea41487246283490eac5c853151c52d318e6fa16a3940926631f330db1319959d4a2c158aef711107db1f1e6586b31e9c91e298140b9afddcacc8f64a78821428161a0cf73881ca9b79e8a5a2a4a5b585214f66b94f1f3a34e7d8b35bce31694773ddaa6ad5d0a2b14af026cb94f30efb5708c7f91e42392b3f3018e25b4b575de183519e7fb7323e042afbe07c85f2ec936351baef1618aebaab5eb964d2cf762b4dd29428a38aa7ba4294e82192b2db889a582487b0727ca8a388658133b78b7a9b9b0f2b76415fa49120bc09c94e6e7ae1e909ecf69543e6'


In [None]:
data_file_name = 'tbdataset.zip'
download_dir = './dataset/'
urllib.request.urlretrieve(data_url, data_file_name)
zip_ref = zipfile.ZipFile(data_file_name, 'r')
zip_ref.extractall(download_dir)
zip_ref.close()

In [None]:
try:
   
    os.mkdir('./dataset/training')
    os.mkdir('./dataset/testing')
    os.mkdir('./dataset/training/normal')
    os.mkdir('./dataset/training/tb')
    os.mkdir('./dataset/testing/normal')
    os.mkdir('./dataset/testing/tb')
except:
    print('error')

error


In [None]:
def split_data(source, training, testing, split_size):
    files = []
    for filename in os.listdir(source):
        file = source + filename
        if os.path.getsize(file) > 0:
            files.append(filename)
        else:
            print(f'{filename} is corrupt or empty')
        
    training_length = int(len(files)* split_size)
    testing_length = int(len(files) - training_length)
    shuffled_set = random.sample(files, len(files))
    training_set = shuffled_set[:training_length]
    testing_set = shuffled_set[training_length:]
    
    for filename in training_set:
        this_file = source + filename
        dest = training + filename
        copyfile(this_file, dest)
    
    for filename in testing_set:
        this_file = source + filename
        dest = testing + filename
        copyfile(this_file, dest)

norm_src_dir = '/content/dataset/TB_Chest_Radiography_Database/Normal/'
tb_src_dir = '/content/dataset/TB_Chest_Radiography_Database/Tuberculosis/'
training_norm_dir = './dataset/training/normal/'
training_tb_dir = './dataset/training/tb/'
testing_norm_dir = './dataset/testing/normal/'
testing_tb_dir = './dataset/testing/tb/'
split_size = 0.9
split_data(norm_src_dir,training_norm_dir,testing_norm_dir,split_size)
split_data(tb_src_dir,training_tb_dir,testing_tb_dir,split_size)

In [None]:
def resize_img(data_path):
    for filename in os.listdir(data_path):
        try:
            img = plt.imread(os.path.join(data_path, filename))
            img = cv.resize(img, (256, 256))
            img = cv.cvtColor(img, cv.COLOR_BGR2RGB)
            cv.imwrite(data_path+filename, img)
        except:
            continue
resize_img('/content/dataset/training/normal')
resize_img('/content/dataset/training/tb')
resize_img('/content/dataset/testing/normal')
resize_img('/content/dataset/testing/tb')

In [None]:
gpus = tf.config.experimental.list_logical_devices('GPU')
strategy = tf.distribute.MirroredStrategy([gpu.name for gpu in gpus])
print('running on single gpu')

INFO:tensorflow:Using MirroredStrategy with devices ('/job:localhost/replica:0/task:0/device:CPU:0',)
running on single gpu


In [None]:
train_datagen = ImageDataGenerator(
    rescale=1/255.0, 
    rotation_range=40, 
    width_shift_range=0.2, 
    height_shift_range=0.2, 
    shear_range=0.2, 
    zoom_range=0.2, 
    horizontal_flip=True, 
    fill_mode='nearest'
)

validation_datagen = ImageDataGenerator(rescale=1/255.0)



training_generator = train_datagen.flow_from_directory(
    './dataset/training/', 
    batch_size=100, 
    class_mode='binary', 
    target_size=(256, 256)
)

validation_generator = validation_datagen.flow_from_directory(
    './dataset/testing/', 
    batch_size=100, 
    class_mode='binary', 
    target_size=(256, 256)
)

Found 4199 images belonging to 2 classes.
Found 1451 images belonging to 2 classes.


In [None]:
weights_url = 'https://storage.googleapis.com/mledu-datasets/inception_v3_weights_tf_dim_ordering_tf_kernels_notop.h5'
weights_file = 'inception_v3.h5'
urllib.request.urlretrieve(weights_url, weights_file)

('inception_v3.h5', <http.client.HTTPMessage at 0x7f3fff70fc10>)

In [None]:
weights_file = 'inception_v3.h5'
with strategy.scope():
    pre_trained_model = InceptionV3(
        input_shape=(256, 256, 3), 
        include_top=False, 
        weights=None
    )
    pre_trained_model.load_weights(weights_file)

    for layer in pre_trained_model.layers:
        layer.trainable = False
    last_layer = pre_trained_model.get_layer('mixed7')
    print(last_layer.output_shape)
    last_output = last_layer.output

(None, 14, 14, 768)


In [None]:
def model(inputs):
    x = layers.Flatten()(inputs)
    x = layers.Dense(1024, activation='relu')(x)
    x = layers.Dense(1, activation='sigmoid')(x)
    model = Model(pre_trained_model.input, x)
    return model

with strategy.scope():
    model = model(last_output)
    model.compile(optimizer=ks.optimizers.RMSprop(lr=0.0001), loss='binary_crossentropy', metrics=['acc'])

In [None]:
class myCallback(tf.keras.callbacks.Callback):
    def on_epoch_end(self, epoch, logs={}):
        print(logs.get('acc'))
        if (logs.get('acc')>0.90):
            print('\nReached 99% accuracy so cancelling training')
            self.model.stop_training = True

callbacks = myCallback()

NameError: ignored

In [None]:
history= model.fit(training_generator, validation_data=validation_generator, epochs=10, verbose=0, callbacks=[callbacks])

0.8721123933792114
0.9397475719451904
0.9557037353515625
0.9623720049858093
0.9759466648101807
0.9754703640937805
0.9788044691085815


KeyboardInterrupt: ignored

In [None]:
def plot_metrics(metric_name):
    train_loss = history.history[metric_name]
    epochs = range(len(train_loss))
    val_loss = history.history['val_' + metric_name]
    plt.figure(figsize=(5, 5))
    plt.plot(epochs, train_loss, color='r')
    plt.plot(epochs, val_loss, color='b')
    plt.title(f'Model {metric_name}')
    plt.legend([f'training_{metric_name}', f'val_{metric_name}'])
    plt.xlabel('epochs')
    plt.ylabel(f'{metric_name}')
    plt.show()
    
plt.style.use('seaborn')
plot_metrics('loss')
plot_metrics('acc')