In [None]:
# IMPORTANT: RUN THIS CELL IN ORDER TO IMPORT YOUR KAGGLE DATA SOURCES
# TO THE CORRECT LOCATION (/kaggle/input) IN YOUR NOTEBOOK,
# THEN FEEL FREE TO DELETE THIS CELL.
# NOTE: THIS NOTEBOOK ENVIRONMENT DIFFERS FROM KAGGLE'S PYTHON
# ENVIRONMENT SO THERE MAY BE MISSING LIBRARIES USED BY YOUR
# NOTEBOOK.

import os
import sys
from tempfile import NamedTemporaryFile
from urllib.request import urlopen
from urllib.parse import unquote, urlparse
from urllib.error import HTTPError
from zipfile import ZipFile
import tarfile
import shutil

CHUNK_SIZE = 40960
DATA_SOURCE_MAPPING = 'finding-lungs-in-ct-data:https%3A%2F%2Fstorage.googleapis.com%2Fkaggle-data-sets%2F1172%2F2106%2Fbundle%2Farchive.zip%3FX-Goog-Algorithm%3DGOOG4-RSA-SHA256%26X-Goog-Credential%3Dgcp-kaggle-com%2540kaggle-161607.iam.gserviceaccount.com%252F20240328%252Fauto%252Fstorage%252Fgoog4_request%26X-Goog-Date%3D20240328T065852Z%26X-Goog-Expires%3D259200%26X-Goog-SignedHeaders%3Dhost%26X-Goog-Signature%3D4f41a84e40501900883a511e440ea79462ee37e8d1943ff3c9dd76a320a0aa5ddb1c36f9267e7949d7d89f34729d7a81b12bd8a71148cbd978dd40196f61a2c7538aff55ef3875fcc71ee96355a7054457733f75c495051fcb082910202e11dce541e84e022b93af05fa8f83cbdbd7aefeef8ab6da7be70c706f3aa870a20015d625bd03d17e8a68557d9481611c39f3c69c621b046abf6a389e92e352025cbd5a4bb1d58bf9e41eb09246bc40d68dd1c65fff41ce380775dee64eb2b4330d44e3228379eed4b8976364cb16daabfa999943b3cee02452560813879194e0e0fce039af03c707e924f742edccdf817943eb2ea8b0081ad87fd74ed14802fe56b9'

KAGGLE_INPUT_PATH='/kaggle/input'
KAGGLE_WORKING_PATH='/kaggle/working'
KAGGLE_SYMLINK='kaggle'

!umount /kaggle/input/ 2> /dev/null
shutil.rmtree('/kaggle/input', ignore_errors=True)
os.makedirs(KAGGLE_INPUT_PATH, 0o777, exist_ok=True)
os.makedirs(KAGGLE_WORKING_PATH, 0o777, exist_ok=True)

try:
  os.symlink(KAGGLE_INPUT_PATH, os.path.join("..", 'input'), target_is_directory=True)
except FileExistsError:
  pass
try:
  os.symlink(KAGGLE_WORKING_PATH, os.path.join("..", 'working'), target_is_directory=True)
except FileExistsError:
  pass

for data_source_mapping in DATA_SOURCE_MAPPING.split(','):
    directory, download_url_encoded = data_source_mapping.split(':')
    download_url = unquote(download_url_encoded)
    filename = urlparse(download_url).path
    destination_path = os.path.join(KAGGLE_INPUT_PATH, directory)
    try:
        with urlopen(download_url) as fileres, NamedTemporaryFile() as tfile:
            total_length = fileres.headers['content-length']
            print(f'Downloading {directory}, {total_length} bytes compressed')
            dl = 0
            data = fileres.read(CHUNK_SIZE)
            while len(data) > 0:
                dl += len(data)
                tfile.write(data)
                done = int(50 * dl / int(total_length))
                sys.stdout.write(f"\r[{'=' * done}{' ' * (50-done)}] {dl} bytes downloaded")
                sys.stdout.flush()
                data = fileres.read(CHUNK_SIZE)
            if filename.endswith('.zip'):
              with ZipFile(tfile) as zfile:
                zfile.extractall(destination_path)
            else:
              with tarfile.open(tfile.name) as tarfile:
                tarfile.extractall(destination_path)
            print(f'\nDownloaded and uncompressed: {directory}')
    except HTTPError as e:
        print(f'Failed to load (likely expired) {download_url} to path {destination_path}')
        continue
    except OSError as e:
        print(f'Failed to load {download_url} to path {destination_path}')
        continue

print('Data source import complete.')


In [None]:
!pip install segmentation-models

In [None]:
import os
import cv2
import zipfile
import numpy as np
import pandas as pd

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.utils import load_img, img_to_array

from sklearn.model_selection import train_test_split

os.environ["SM_FRAMEWORK"] = "tf.keras"
import segmentation_models as sm

import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

## Extract Data

In [None]:
def Extract_Data(filepath, name):
  with zipfile.ZipFile(file=filepath) as file:
    file.extractall(os.path.join(os.getcwd(), name))

In [None]:
Extract_Data("/kaggle/input/finding-lungs-in-ct-data/2d_images.zip", "2d_images")
Extract_Data("/kaggle/input/finding-lungs-in-ct-data/2d_masks.zip", "2d_masks")

In [None]:
image_dir = "/content/2d_images"
mask_dir = "/content/2d_masks"

In [None]:
image_reader = lambda x: np.expand_dims(cv2.imread(x, cv2.IMREAD_GRAYSCALE)[::2, ::2], axis=-1)

In [None]:
image_path = [os.path.join(image_dir, i) for i in sorted(os.listdir(image_dir))]
mask_path = [os.path.join(mask_dir, i) for i in sorted(os.listdir(mask_dir))]

## EDA

In [None]:
fig, ax = plt.subplots(nrows=1, ncols=2)

ax[0].imshow(image_reader(image_path[1]))
ax[0].set_title("Image")
ax[0].axis("off")

ax[1].imshow(image_reader(mask_path[1]))
ax[1].set_title("Mask")
ax[1].axis("off")

plt.show()

In [None]:
loaded_images = np.stack([image_reader(i) for i in image_path])/255
loaded_masks = np.stack([image_reader(i) for i in mask_path])/255

print(len(loaded_images))

## SPLIT DATA

In [None]:
Xtrain, Xtest, ytrain, ytest = train_test_split(loaded_images, loaded_masks, test_size=.001)

print(Xtrain.shape, Xtest.shape)

In [None]:
input_layer = keras.Input(shape=Xtrain.shape[1:], name="Input_Layer")
x = keras.layers.Conv2D(filters=32, kernel_size=(3, 3), strides=(1, 1), padding="same", activation="relu")(input_layer)
x = keras.layers.Conv2D(filters=64, kernel_size=(3, 3), strides=(1, 1), padding="same", activation="relu")(x)
x = keras.layers.Conv2D(filters=128, kernel_size=(3, 3), strides=(1, 1), padding="same", activation="relu")(x)
x = keras.layers.MaxPooling2D(pool_size=(2, 2))(x)
x = keras.layers.Dense(units=126, activation="relu")(x)
x = keras.layers.Conv2D(filters=1, kernel_size=(3, 3), strides=(1, 1), padding="same", activation="relu")(x)
model_output = keras.layers.UpSampling2D(size=(2, 2))(x)

model = keras.Model(inputs=input_layer, outputs=model_output)

model.summary()

In [None]:
model.compile(optimizer=keras.optimizers.RMSprop(learning_rate=1e-4), loss="binary_crossentropy", metrics=["accuracy"])

In [None]:
history = model.fit(Xtrain, ytrain,
                    validation_split=.1,
                    batch_size=32,
                    epochs=20,
                    callbacks=[keras.callbacks.EarlyStopping(patience=5, monitor="val_accuracy"),
                              keras.callbacks.ReduceLROnPlateau(monitor="val_accuracy", min_lr=1e-5, patience=2)])

## Trying a differen image reader

In [None]:
image_reader_ = lambda x: img_to_array(load_img(x, color_mode="grayscale"))[::2, ::2]

In [None]:
fig, ax = plt.subplots(nrows=1, ncols=2)

ax[0].imshow(image_reader_(image_path[1]))
ax[0].set_title("Image")
ax[0].axis("off")

ax[1].imshow(image_reader_(mask_path[1]))
ax[1].set_title("Mask")
ax[1].axis("off")

plt.show()

In [None]:
loaded_images_ = np.stack([image_reader_(i) for i in image_path])/255
loaded_masks_ = np.stack([image_reader_(i) for i in mask_path])/255

print(len(loaded_images_))

In [None]:
Xtrain_, Xtest_, ytrain_, ytest_ = train_test_split(loaded_images_, loaded_masks_, test_size=.001)

print(Xtrain_.shape, Xtest_.shape)

In [None]:
model.compile(optimizer=keras.optimizers.RMSprop(learning_rate=1e-3), loss="binary_crossentropy", metrics=["accuracy"])

In [None]:
model.fit(Xtrain_, ytrain_,
          validation_split=.1,
          batch_size=32,
          epochs=20,
          callbacks=[keras.callbacks.EarlyStopping(patience=5, monitor="val_accuracy"),
                              keras.callbacks.ReduceLROnPlateau(monitor="val_accuracy", min_lr=1e-5, patience=2)])

In [None]:
image_generator = keras.preprocessing.image.ImageDataGenerator(width_shift_range=0.1,
                                                               height_shift_range=0.1,
                                                               rotation_range=10,
                                                               zoom_range=0.1)

In [None]:
Xtrain_, Xtest_, ytrain_, ytest_ = train_test_split(loaded_images_, loaded_masks_, test_size=.2)

print(Xtrain_.shape, Xtest_.shape)

In [None]:
model.compile(optimizer=keras.optimizers.Adam(learning_rate=1e-4), loss="binary_crossentropy", metrics=["accuracy"])

In [None]:
model.fit(image_generator.flow(Xtrain_, ytrain_),
          validation_data=(image_generator.flow(Xtest_, ytest_)),
          batch_size=32,
          epochs=100,
          callbacks=[keras.callbacks.EarlyStopping(patience=5, monitor="val_accuracy"),
                              keras.callbacks.ReduceLROnPlateau(monitor="val_accuracy", min_lr=1e-5, patience=2)])

In [None]:
Xtest_[0].shape

In [None]:
plt.imshow(model.predict(np.expand_dims(Xtest_[1], axis=0)).reshape(256, 256, 1))

In [None]:
plt.imshow(Xtest_[1])