In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
!pip install --upgrade tensorflow_hub

In [None]:
import re
import os
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import tensorflow_hub as hub
import tensorflow_datasets as tfds
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from kaggle_datasets import KaggleDatasets
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

In [None]:
try:
    tpu = tf.distribute.cluster_resolver.TPUClusterResolver()
    print('Device:', tpu.master())
    tf.config.experimental_connect_to_cluster(tpu)
    tf.tpu.experimental.initialize_tpu_system(tpu)
    strategy = tf.distribute.experimental.TPUStrategy(tpu)
except:
    strategy = tf.distribute.get_strategy()
print('Number of replicas:', strategy.num_replicas_in_sync)
    
print(tf.__version__)

In [None]:
AUTOTUNE = tf.data.experimental.AUTOTUNE
GCS_PATH_1 = KaggleDatasets().get_gcs_path('chest-xray-pneumonia')
GCS_PATH_2 = KaggleDatasets().get_gcs_path('covid19-radiography-database')
GCS_PATH_3 = KaggleDatasets().get_gcs_path('covid19-xray-dataset-train-test-sets')
GCS_PATH_4 = KaggleDatasets().get_gcs_path('covid19-pneumonia-normal-chest-xray-pa-dataset')
GCS_PATH_5 = KaggleDatasets().get_gcs_path('pneumonia-xray-images')
BATCH_SIZE = 30 * strategy.num_replicas_in_sync
IMAGE_SIZE = [180, 180]
EPOCHS = 35
print(BATCH_SIZE)

In [None]:
GCS_PATH_6 = KaggleDatasets().get_gcs_path('rsna-pneumonia-dataset-in-jpg-format')

In [None]:
files= tf.io.gfile.glob(str(GCS_PATH_1 + '/chest_xray/train/*/*'))
files.extend(tf.io.gfile.glob(str(GCS_PATH_2 + '/COVID-19_Radiography_Dataset/Viral Pneumonia/*')))
files.extend(tf.io.gfile.glob(str(GCS_PATH_3 + '/xray_dataset_covid19/train/PNEUMONIA/*')))
files.extend(tf.io.gfile.glob(str(GCS_PATH_4 + '/normal/*')))
files.extend(tf.io.gfile.glob(str(GCS_PATH_4 + '/pneumonia/*')))
files.extend(tf.io.gfile.glob(str(GCS_PATH_5 + '/val/*/*')))
normal_maxi=tf.io.gfile.glob(str(GCS_PATH_2 + '/COVID-19_Radiography_Dataset/Normal/*'))
normal_min,extra=train_test_split(normal_maxi,test_size=0.44)
files=files+extra

In [None]:
train_base,test_base=train_test_split(files,test_size=0.1)
test_1,test_2=train_test_split(test_base,test_size=0.5)
train_base_1,train_base_2=train_test_split(train_base,test_size=0.5)
train_1,val_1=train_test_split(train_base_1,test_size=0.2)
train_2,val_2=train_test_split(train_base_2,test_size=0.2)

In [None]:
def count_normal_pneu(file):
    print("\n\tTotal images count in training set : " + str(len(file)))
    COUNT_NORMAL = len([filename for filename in file if (("NORMAL" in filename) or ("Normal" in filename) or ("normal" in filename) or ("nopneumonia" in filename))])
    print("\n\tNormal images count in training set : " + str(COUNT_NORMAL))

    COUNT_PNEUMONIA = len([filename for filename in file if (("PNEUMONIA" in filename) or ("Viral Pneumonia" in filename) or ("pneumonia" in filename))])
    print("\tPneumonia images count in training set : " + str(COUNT_PNEUMONIA))
    
    if (len(file)==(COUNT_NORMAL+COUNT_PNEUMONIA)):
        print("\n\tThe function classified the data correctly ",COUNT_NORMAL+COUNT_PNEUMONIA)
    else:
        print("\n\tThe function classification is wrong ",COUNT_NORMAL+COUNT_PNEUMONIA)
    return COUNT_NORMAL,COUNT_PNEUMONIA

In [None]:
print("\nTRAIN_1_DETAILS :")
COUNT_NORMAL,COUNT_PNEUMONIA=count_normal_pneu(train_1)
print("\nVAL_1_DETAILS :")
count_normal_pneu(val_1)
print("\nTRAIN_2_DETAILS :")
count_normal_pneu(train_2)
print("\nVAL_2_DETAILS :")
count_normal_pneu(val_2)

print("\nTEST_1_DETAILS :")
count_normal_pneu(test_1)
print("\nTEST_2_DETAILS :")
count_normal_pneu(test_2)

In [None]:
def file_info(file):
    list_ds = tf.data.Dataset.from_tensor_slices(file)
    IMG_COUNT = tf.data.experimental.cardinality(list_ds).numpy()
    print("Images count: " + str(IMG_COUNT),"\n")
    
    return list_ds,IMG_COUNT
    

In [None]:
train_list_ds_1,TRAIN_IMG_COUNT_1= file_info(train_1)
val_list_ds_1,VAL_IMG_COUNT_1=file_info(val_1)

for f in train_list_ds_1.take(5):
    print(f.numpy())

In [None]:
train_list_ds_2,TRAIN_IMG_COUNT_2= file_info(train_2)
val_list_ds_2,VAL_IMG_COUNT_2=file_info(val_2)

for f in train_list_ds_2.take(5):
    print(f.numpy())

In [None]:
test_list_ds_1,TEST_IMG_COUNT_1=file_info(test_1)
test_list_ds_2,TEST_IMG_COUNT_2=file_info(test_2)

test_1 and test_2 are altered as test_base

In [None]:
test_list_ds_base,TEST_IMG_COUNT_base=file_info(test_base)

In [None]:
def get_label(file_path):

    parts = tf.strings.split(file_path, os.path.sep)
    if ((parts[-2]== "PNEUMONIA") or (parts[-2]=="pneumonia") or (parts[-2]=="opacity") or (parts[-2]=="Viral Pneumonia")):
        return True
    elif ((parts[-2]== "Normal") or (parts[-2]=="NORMAL") or (parts[-2]=="normal")):
        return False
    else:
        return False

In [None]:
def decode_img(img):
    img=tf.image.decode_image(img, channels=3,expand_animations=False)
    img=tf.image.convert_image_dtype(img,tf.float32)
    return tf.image.resize(img, IMAGE_SIZE)

In [None]:
def process_path(file_path):
    label = get_label(file_path)
    img = tf.io.read_file(file_path)
    img = decode_img(img)
    return img, label

In [None]:
test_max= test_max.map(process_path, num_parallel_calls=AUTOTUNE)

In [None]:
test= test.map(process_path, num_parallel_calls=AUTOTUNE)

In [None]:
train_ds_1 = train_list_ds_1.map(process_path, num_parallel_calls=AUTOTUNE)

val_ds_1 = val_list_ds_1.map(process_path, num_parallel_calls=AUTOTUNE)


In [None]:
train_ds_2 = train_list_ds_2.map(process_path, num_parallel_calls=AUTOTUNE)

val_ds_2 = val_list_ds_2.map(process_path, num_parallel_calls=AUTOTUNE)

In [None]:
test_ds_1 = test_list_ds_1.map(process_path, num_parallel_calls=AUTOTUNE)
test_ds_2 = test_list_ds_2.map(process_path, num_parallel_calls=AUTOTUNE)

In [None]:
test_ds_base = test_list_ds_base.map(process_path, num_parallel_calls=AUTOTUNE)

In [None]:
def prepare_for_Data_Aug(ds, cache=False,repeat=False,shuffle=False, shuffle_buffer_size=1000):

    if cache: # only for training and validation data set
        if isinstance(cache, str):
            ds = ds.cache(cache)
        else:
            ds = ds.cache()
            
    if shuffle: # only for training and validation data set
        ds = ds.shuffle(buffer_size=shuffle_buffer_size)
    
    normalization_layer = layers.experimental.preprocessing.Rescaling(1./255)
    ds = ds.map(lambda x, y: (normalization_layer(x), y))
    
    if repeat: # only for training and validation data set
        ds = ds.repeat()

    ds = ds.batch(BATCH_SIZE).prefetch(buffer_size=AUTOTUNE)


    return ds

In [None]:
train_ds_1 = prepare_for_Data_Aug(train_ds_1,cache=True,repeat=True,shuffle=True,shuffle_buffer_size=(TRAIN_IMG_COUNT_1//4))

val_ds_1 = prepare_for_Data_Aug(val_ds_1,cache=True,repeat=True,shuffle=True,shuffle_buffer_size=(VAL_IMG_COUNT_1//4))

In [None]:
train_ds_2 = prepare_for_Data_Aug(train_ds_2,cache=True,repeat=True,shuffle=True,shuffle_buffer_size=(TRAIN_IMG_COUNT_2//4))

val_ds_2 = prepare_for_Data_Aug(val_ds_2,cache=True,repeat=True,shuffle=True,shuffle_buffer_size=(VAL_IMG_COUNT_2//4))

In [None]:
test_ds_1= prepare_for_Data_Aug(test_ds_1)
test_ds_2= prepare_for_Data_Aug(test_ds_2)

In [None]:
test_ds_base= prepare_for_Data_Aug(test_ds_base)

In [None]:
def conv_block(filters):
    
    block = tf.keras.Sequential([
        
        tf.keras.layers.SeparableConv2D(filters, 3, activation='relu', padding='same'),
        tf.keras.layers.SeparableConv2D(filters, 3, activation='relu', padding='same'),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.MaxPool2D()
        
    ])
    
    return block

In [None]:
def dense_block(units, dropout_rate):
    
    block = tf.keras.Sequential([
        
        tf.keras.layers.Dense(units, activation='relu'),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.Dropout(dropout_rate)
        
    ])
    
    return block

In [None]:
def data_aug():
    
    block = tf.keras.Sequential([

            tf.keras.layers.experimental.preprocessing.RandomFlip("horizontal",),
            tf.keras.layers.experimental.preprocessing.RandomFlip("vertical",),

    ])
    
    return block

In [None]:
def build_model():
    model = tf.keras.Sequential([
        
        tf.keras.Input(shape=(IMAGE_SIZE[0], IMAGE_SIZE[1], 3)),
        
        data_aug(),
        
        tf.keras.layers.Conv2D(16, 3, activation='relu', padding='same'),
        tf.keras.layers.Conv2D(16, 3, activation='relu', padding='same'),
        tf.keras.layers.MaxPool2D(),
        
        conv_block(32),
        conv_block(64),
        
        conv_block(128),
        tf.keras.layers.Dropout(0.2),
        
        conv_block(256),
        tf.keras.layers.Dropout(0.2),
        
        tf.keras.layers.Flatten(),
        dense_block(512, 0.7),
        dense_block(128, 0.5),
        dense_block(64, 0.3),
        
        tf.keras.layers.Dense(1, activation='sigmoid')
        
    ])
    
    return model

In [None]:
with strategy.scope():
    
    model_1 = build_model()

    METRICS = [
        'accuracy',
        tf.keras.metrics.Precision(name='precision'),
        tf.keras.metrics.Recall(name='recall')
    ]
    
    model_1.compile(
        optimizer=keras.optimizers.Adam(),
        loss=keras.losses.BinaryCrossentropy(from_logits=True),
        metrics=METRICS
    )

In [None]:
checkpoint_cb = tf.keras.callbacks.ModelCheckpoint("pneu_model.h5",
                                                    save_best_only=True)

early_stopping_cb = tf.keras.callbacks.EarlyStopping(patience=10,
                                                     restore_best_weights=True)

In [None]:
def exponential_decay(lr0, s):
    def exponential_decay_fn(epoch):
        return lr0 * 0.1 **(epoch / s)
    return exponential_decay_fn

exponential_decay_fn = exponential_decay(0.01, 20)

lr_scheduler = tf.keras.callbacks.LearningRateScheduler(exponential_decay_fn)

In [None]:
hist_1 = model_1.fit(
    train_ds_1,
    steps_per_epoch=int(np.ceil(TRAIN_IMG_COUNT_1 // float(BATCH_SIZE))),
    epochs=EPOCHS,
    validation_data=val_ds_1,
    validation_steps=int(np.ceil(VAL_IMG_COUNT_1 // float(BATCH_SIZE))),
    callbacks=[checkpoint_cb, early_stopping_cb, lr_scheduler]
)

In [None]:
model_1.summary()

In [None]:
model_1.evaluate(test_ds_base)

In [None]:
model_1.save('./Pneumonia_1_tf',save_format='tf')

In [None]:
model_2 = tf.keras.models.load_model('./Pneumonia_1_tf')

In [None]:
model_2.summary()

In [None]:
hist_2= model_2.fit(
    train_ds_2,
    steps_per_epoch=int(np.ceil(TRAIN_IMG_COUNT_2 // float(BATCH_SIZE))),
    epochs=EPOCHS,
    validation_data=val_ds_2,
    validation_steps=int(np.ceil(VAL_IMG_COUNT_2 // float(BATCH_SIZE))),
)

In [None]:
model_2.evaluate(test_ds_base)

In [None]:
fig, ax = plt.subplots(1, 4, figsize=(20, 3))
ax = ax.ravel()

for i, met in enumerate(['precision', 'recall', 'accuracy', 'loss']):
    ax[i].plot(hist_2.history[met])
    ax[i].plot(hist_2.history['val_' + met])
    ax[i].set_title('Model {}'.format(met))
    ax[i].set_xlabel('epochs')
    ax[i].set_ylabel(met)
    ax[i].legend(['train', 'val'])

In [None]:
!zip -r pneumonia_again.zip {'./Pneumonia_1_tf'}

In [None]:
model_2.save('./Pneumonia_2_tf',save_format='tf')
!zip -r Pneumonia_again_2.zip {'./Pneumonia_2_tf'}

In [None]:
!zip -r Pneumonia_again_2.zip {'./Pneumonia_2_tf'}

In [None]:
data1_full= tf.io.gfile.glob(str(GCS_PATH_1 + '/chest_xray/train/*/*'))
data2_pneu=tf.io.gfile.glob(str(GCS_PATH_6 + '/Data/pneumonia/*'))
data2_nor=tf.io.gfile.glob(str(GCS_PATH_6 + '/Data/nopneumonia/*'))

In [None]:
train_data2_pneu,test_data2_pneu=train_test_split(data2_pneu,test_size=0.09)
train_data2_nor,test_data2_nor=train_test_split(data2_nor,test_size=0.02)
waste,train_data2_nor_final=train_test_split(train_data2_nor,test_size=0.4)
final_data=train_data2_nor_final+train_data2_pneu+data1_full

In [None]:
bus_1_2,bus_3=train_test_split(final_data,test_size=0.32)
bus_1,bus_2=train_test_split(bus_1_2,test_size=0.5)
bus_train_1,bus_val_1=train_test_split(bus_1,test_size=0.2)
bus_train_2,bus_val_2=train_test_split(bus_2,test_size=0.2)
bus_train_3,bus_val_3=train_test_split(bus_3,test_size=0.2)

In [None]:
bus_train_list_ds_1,BUS_TRAIN_IMG_COUNT_1= file_info(bus_train_1)
bus_val_list_ds_1,BUS_VAL_IMG_COUNT_1=file_info(bus_val_1)

for f in bus_train_list_ds_1.take(5):
    print(f.numpy())

In [None]:
bus_train_list_ds_2,BUS_TRAIN_IMG_COUNT_2= file_info(bus_train_2)
bus_val_list_ds_2,BUS_VAL_IMG_COUNT_2=file_info(bus_val_2)

for f in bus_train_list_ds_2.take(5):
    print(f.numpy())

In [None]:
bus_train_list_ds_3,BUS_TRAIN_IMG_COUNT_3= file_info(bus_train_3)
bus_val_list_ds_3,BUS_VAL_IMG_COUNT_3=file_info(bus_val_3)

for f in bus_train_list_ds_3.take(5):
    print(f.numpy())

In [None]:
bus_train_ds_1 = bus_train_list_ds_1.map(process_path, num_parallel_calls=AUTOTUNE)

bus_val_ds_1 = bus_val_list_ds_1.map(process_path, num_parallel_calls=AUTOTUNE)

In [None]:
bus_train_ds_2 = bus_train_list_ds_2.map(process_path, num_parallel_calls=AUTOTUNE)

bus_val_ds_2 = bus_val_list_ds_2.map(process_path, num_parallel_calls=AUTOTUNE)

In [None]:
bus_train_ds_3 = bus_train_list_ds_3.map(process_path, num_parallel_calls=AUTOTUNE)

bus_val_ds_3 = bus_val_list_ds_3.map(process_path, num_parallel_calls=AUTOTUNE)

In [None]:
bus_train_ds_1 = prepare_for_Data_Aug(bus_train_ds_1,cache=True,repeat=True,shuffle=True,shuffle_buffer_size=(BUS_TRAIN_IMG_COUNT_1//4))

bus_val_ds_1 = prepare_for_Data_Aug(bus_val_ds_1,cache=True,repeat=True,shuffle=True,shuffle_buffer_size=(BUS_VAL_IMG_COUNT_1//4))

In [None]:
bus_train_ds_2 = prepare_for_Data_Aug(bus_train_ds_2,cache=True,repeat=True,shuffle=True,shuffle_buffer_size=(BUS_TRAIN_IMG_COUNT_2//4))

bus_val_ds_2 = prepare_for_Data_Aug(bus_val_ds_2,cache=True,repeat=True,shuffle=True,shuffle_buffer_size=(BUS_VAL_IMG_COUNT_2//4))

In [None]:
bus_train_ds_3 = prepare_for_Data_Aug(bus_train_ds_3,cache=True,repeat=True,shuffle=True,shuffle_buffer_size=(BUS_TRAIN_IMG_COUNT_3//4))

bus_val_ds_3 = prepare_for_Data_Aug(bus_val_ds_3,cache=True,repeat=True,shuffle=True,shuffle_buffer_size=(BUS_VAL_IMG_COUNT_3//4))

In [None]:
model_3=tf.keras.models.load_model('./Pneumonia_2_tf')

In [None]:
hist_3= model_3.fit(
    bus_train_ds_3,
    steps_per_epoch=int(np.ceil(BUS_TRAIN_IMG_COUNT_3 // float(BATCH_SIZE))),
    epochs=EPOCHS,
    validation_data=bus_val_ds_3,
    validation_steps=int(np.ceil(BUS_VAL_IMG_COUNT_3 // float(BATCH_SIZE))),
)

In [None]:
model_3.save('./Pneumonia_3_tf',save_format='tf')
!zip -r pneumonia_again_3.zip {'./Pneumonia_3_tf'}

In [None]:
model_4=tf.keras.models.load_model('./Pneumonia_3_tf')

In [None]:
model_4.summary()

In [None]:
hist_4= model_4.fit(
    bus_train_ds_1,
    steps_per_epoch=int(np.ceil(BUS_TRAIN_IMG_COUNT_1 // float(BATCH_SIZE))),
    epochs=EPOCHS,
    validation_data=bus_val_ds_1,
    validation_steps=int(np.ceil(BUS_VAL_IMG_COUNT_1 // float(BATCH_SIZE))),
)

In [None]:
model_4.save('./Pneumonia_4_tf',save_format='tf')
!zip -r pneumonia_again_4.zip {'./Pneumonia_4_tf'}

In [None]:
model_5=tf.keras.models.load_model('./Pneumonia_4_tf')

In [None]:
hist_5= model_5.fit(
    bus_train_ds_2,
    steps_per_epoch=int(np.ceil(BUS_TRAIN_IMG_COUNT_2 // float(BATCH_SIZE))),
    epochs=EPOCHS,
    validation_data=bus_val_ds_2,
    validation_steps=int(np.ceil(BUS_VAL_IMG_COUNT_2 // float(BATCH_SIZE))),
)

In [None]:
model_5.save('./Pneumonia_5_tf',save_format='tf')


In [None]:
!zip -r pneumonia_again_5.zip {'./Pneumonia_5_tf'}