<a href="https://colab.research.google.com/github/Logic06183/AB-testing/blob/main/Bone_Suppression_CNN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv).
import random

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os

import tensorflow as tf
from PIL import Image
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv2D, Flatten, Input, MaxPooling2D, concatenate, Conv2DTranspose
import tqdm
# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All"
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
#since the dataset isn't organized, we can't rely on existing keras datagenerator objects. Getting the filenames lets us match input/output for batch generation
output_files = []
input_files = []
files=[]
for dirname, _, filenames in os.walk('../content/drive/My Drive/Colab Notebooks/augmented/source/'):
    for filename in filenames:
        #input_files.append(os.path.join(dirname, filename))
        files.append(filename)


num_files = len(files)
print(num_files)

#split files into training, validation, testing (without bothering to restructure file structure)

random.shuffle(files)

training_files = files[:int(num_files*.8)]
validation_files = files[int(num_files*.8):int(num_files*.9)]
test_files = files[int(num_files*.9):]



print(len(training_files) + len(validation_files) + len(test_files)) #should be the same as num_files



0
0


In [None]:
def dataGrab(input_path,output_path,files):
    input = []
    for file in tqdm.tqdm(files):
        im = Image.open(os.path.join(input_path,file))
        im = im.resize((256,256)) #needed to stop running out of RAM
        data = np.asarray(im).astype('float32')/255.0
        input.append(data)


    output = []
    for file in tqdm.tqdm(files):
        im = Image.open(os.path.join(output_path,file))
        im = im.resize((256,256))
        data = np.asarray(im).astype('float32')/255.0
        output.append(data)

    return input, output



class DataGenerator(tf.keras.utils.Sequence):
    def __init__(self,files,input_path,output_path,batch_size = 32):
        self.files = files
        self.batch_size = batch_size
        #self.dim = dim
        self.input_path = input_path
        self.output_path  = output_path
        self.indexes = np.arange(len(self.files))

        #Here is where we load in all the data
        self.input_data,self.output_data = dataGrab(self.input_path,self.output_path,self.files)


    def __len__(self):
        #Gives number of batches per epoch
        return int(np.floor(len(self.files)/self.batch_size))

    def on_epoch_end(self):
        random.shuffle(self.indexes)



    def __getitem__(self,index):
        indexes = self.indexes[index*self.batch_size:(index+1)*self.batch_size]

        X = np.zeros((self.batch_size,256,256,1))
        y = np.zeros((self.batch_size,256,256,1))

        for i, ID in enumerate(indexes):
            X[i,:,:,0] = self.input_data[ID]
            y[i,:,:,0] = self.output_data[ID]

        return X,y



In [None]:

training_generator = DataGenerator(training_files,
                             input_path = '../content/drive/My Drive/Colab Notebooks/augmented/source/',
                             output_path = '../content/drive/My Drive/Colab Notebooks/augmented/target/')

val_generator = DataGenerator(validation_files,
                             input_path = '../content/drive/My Drive/Colab Notebooks/augmented/source/',
                             output_path = '../content/drive/My Drive/Colab Notebooks/augmented/target/')


model = tf.keras.models.Sequential([
    Input(shape=(256,256,1)),
    Conv2D(64, kernel_size=(7,7), activation='elu',padding="same"),
    Conv2D(32, kernel_size=(5,5), activation='elu',padding="same"),
    Conv2D(32, kernel_size=(5,5), activation='elu',padding="same"),
    Conv2D(32, kernel_size=(3,3), activation='elu',padding="same"),
    Conv2D(32, kernel_size=(3,3), activation='elu',padding="same"),
    Conv2D(1, kernel_size=3, activation='relu',padding="same"),  
])
filepath = "epoch-{epoch:02d}-{val_accuracy:.2f}.hdf5"
checkpoint = tf.keras.callbacks.ModelCheckpoint(filepath, monitor='val_accuracy', verbose=1, save_best_only=True, mode='max')
model.compile(optimizer ="adam",loss="mse",metrics = ["Accuracy"])
model.summary()

In [None]:
#No idea to create a U-Net, but let's try
#borrowed from https://www.kaggle.com/eduardomineo/u-net-lung-segmentation-montgomery-shenzhen#3.-Segmentation-training
def unet(input_size=(256,256,1)):
    inputs = Input(input_size)

    conv1 = Conv2D(32, (3, 3), activation='relu', padding='same')(inputs)
    conv1 = Conv2D(32, (3, 3), activation='relu', padding='same')(conv1)
    pool1 = MaxPooling2D(pool_size=(2, 2))(conv1)

    conv2 = Conv2D(64, (3, 3), activation='relu', padding='same')(pool1)
    conv2 = Conv2D(64, (3, 3), activation='relu', padding='same')(conv2)
    pool2 = MaxPooling2D(pool_size=(2, 2))(conv2)

    conv3 = Conv2D(128, (3, 3), activation='relu', padding='same')(pool2)
    conv3 = Conv2D(128, (3, 3), activation='relu', padding='same')(conv3)
    pool3 = MaxPooling2D(pool_size=(2, 2))(conv3)

    conv4 = Conv2D(256, (3, 3), activation='relu', padding='same')(pool3)
    conv4 = Conv2D(256, (3, 3), activation='relu', padding='same')(conv4)
    pool4 = MaxPooling2D(pool_size=(2, 2))(conv4)

    conv5 = Conv2D(512, (3, 3), activation='relu', padding='same')(pool4)
    conv5 = Conv2D(512, (3, 3), activation='relu', padding='same')(conv5)

    up6 = concatenate([Conv2DTranspose(256, (2, 2), strides=(2, 2), padding='same')(conv5), conv4], axis=3)
    conv6 = Conv2D(256, (3, 3), activation='relu', padding='same')(up6)
    conv6 = Conv2D(256, (3, 3), activation='relu', padding='same')(conv6)

    up7 = concatenate([Conv2DTranspose(128, (2, 2), strides=(2, 2), padding='same')(conv6), conv3], axis=3)
    conv7 = Conv2D(128, (3, 3), activation='relu', padding='same')(up7)
    conv7 = Conv2D(128, (3, 3), activation='relu', padding='same')(conv7)

    up8 = concatenate([Conv2DTranspose(64, (2, 2), strides=(2, 2), padding='same')(conv7), conv2], axis=3)
    conv8 = Conv2D(64, (3, 3), activation='relu', padding='same')(up8)
    conv8 = Conv2D(64, (3, 3), activation='relu', padding='same')(conv8)

    up9 = concatenate([Conv2DTranspose(32, (2, 2), strides=(2, 2), padding='same')(conv8), conv1], axis=3)
    conv9 = Conv2D(32, (3, 3), activation='relu', padding='same')(up9)
    conv9 = Conv2D(32, (3, 3), activation='relu', padding='same')(conv9)

    conv10 = Conv2D(1, (1, 1), activation='sigmoid')(conv9)

    return tf.keras.Model(inputs=[inputs], outputs=[conv10])

model = unet()
filepath = "epoch-{epoch:02d}-{val_loss:.2f}.hdf5"
checkpoint = tf.keras.callbacks.ModelCheckpoint(filepath, monitor='loss', verbose=1, save_best_only=False, mode='min')
model.compile(optimizer ="adam",loss="mse")
model.summary()

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_1 (InputLayer)           [(None, 256, 256, 1  0           []                               
                                )]                                                                
                                                                                                  
 conv2d (Conv2D)                (None, 256, 256, 32  320         ['input_1[0][0]']                
                                )                                                                 
                                                                                                  
 conv2d_1 (Conv2D)              (None, 256, 256, 32  9248        ['conv2d[0][0]']                 
                                )                                                             

In [None]:
model.fit(training_generator,validation_data=val_generator,epochs=20,callbacks = [checkpoint,tf.keras.callbacks.TensorBoard("logs"),tf.keras.callbacks.CSVLogger("training_loss.csv")])
#model.fit(val_generator,epochs=3)

In [None]:
# convert testing set to numpy array to fit in memory (don't do that when testing
# set is too large)
y_test = np.zeros((n_testing_samples,))
X_test = np.zeros((n_testing_samples, 299, 299, 3))
for i, (img, label) in enumerate(test_ds.take(n_testing_samples)):
  # print(img.shape, label.shape)
  X_test[i] = img
  y_test[i] = label.numpy()

print("y_test.shape:", y_test.shape)

In [None]:
# load the weights with the least loss
model.load_weights("benign-vs-malignant_64_rmsprop_0.410.h5")

In [None]:
loss, accuracy = m.evaluate(X_test, y_test, verbose=0)

In [None]:
import cv2
img = cv2.imread('/kaggle/input/padchest-chest-xrays-sample/sample/216840111366964012819207061112010316094555679_04-017-068.png', 0)
pred = model.predict(img)