In [1]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

In [2]:
import zipfile
with zipfile.ZipFile("../input/data-science-bowl-2018/stage1_train.zip","r") as zip_ref:
    zip_ref.extractall("stage1_train")
with zipfile.ZipFile("../input/data-science-bowl-2018/stage1_test.zip","r") as zip_ref:
    zip_ref.extractall("stage1_test")
with zipfile.ZipFile("../input/data-science-bowl-2018/stage1_train_labels.csv.zip","r") as zip_ref:
    zip_ref.extractall("stage1_train_labels.csv.zip")
with zipfile.ZipFile("/kaggle/input/data-science-bowl-2018/stage1_solution.csv.zip","r") as zip_ref:
    zip_ref.extractall("stage1_train_labels.csv.zip")

In [3]:
seed = 42
np.random.seed = seed

In [4]:
!pip install keras-unet-collection

In [5]:
import tensorflow as tf
from tensorflow import keras
from keras.layers import (Conv2D, Conv2DTranspose,
                          MaxPooling2D, Dropout,
                          concatenate)
from keras import Model
from keras_unet_collection.losses import dice
import os
import glob
from tqdm import tqdm
import cv2 
import matplotlib.pyplot as plt
import numpy as np

In [7]:
TRAIN_DIR = 'stage1_train'
TEST_DIR = 'stage1_test'
TRAIN_IMAGES = next(os.walk(TRAIN_DIR))[1]
TRAIN_MASKS = next(os.walk(TEST_DIR))[1]
TEST_IMAGES = [os.listdir(f"{TEST_DIR}/{i}/images")[0] for i in os.listdir(TEST_DIR) if '.csv' not in i]

In [8]:
train_ids = next(os.walk(TRAIN_DIR))[1]
test_ids = next(os.walk(TEST_DIR))[1]
IMG_WIDTH = 256
IMG_HEIGHT = 256
IMG_CHANNELS = 3
X_train = np.zeros((len(train_ids), IMG_HEIGHT, IMG_WIDTH, IMG_CHANNELS), dtype=np.uint8)
Y_train = np.zeros((len(train_ids), IMG_HEIGHT, IMG_WIDTH, 1), dtype=np.bool)

In [11]:
from tqdm import tqdm
from skimage.io import imread, imshow
from skimage.transform import resize
print('Resizing training images and masks')
for n, id_ in tqdm(enumerate(train_ids), total=len(train_ids)):  
    path = os.path.join(TRAIN_DIR,id_)
    img = imread(path + '/images/' + id_ + '.png')[:,:,:IMG_CHANNELS]
#     print(img.shape)
    img = resize(img, (IMG_HEIGHT, IMG_WIDTH), mode='constant', preserve_range=True)
    X_train[n] = img  #Fill empty X_train with values from img
    mask = np.zeros((IMG_HEIGHT, IMG_WIDTH, 1), dtype=np.bool)
#     break
    for mask_file in next(os.walk(path + '/masks/'))[2]:
        mask_ = cv2.imread(path + '/masks/' + mask_file, cv2.IMREAD_GRAYSCALE)
        mask_ = np.expand_dims(resize(mask_, (IMG_HEIGHT, IMG_WIDTH), mode='constant',  
                                      preserve_range=True), axis=-1)
        mask = np.maximum(mask, mask_)
#     imshow(mask_[:,:,0])
#     print(mask_.shape, np.unique(mask_[:,:,0]))
    Y_train[n] = mask  

In [12]:
X_test = np.zeros((len(test_ids), IMG_HEIGHT, IMG_WIDTH, IMG_CHANNELS), dtype=np.uint8)
sizes_test = []
print('Resizing test images') 
for n, id_ in tqdm(enumerate(test_ids), total=len(test_ids)):
    path = os.path.join(TEST_DIR, id_)
    img = imread(path + '/images/' + id_ + '.png')[:,:,:IMG_CHANNELS]
    sizes_test.append([img.shape[0], img.shape[1]])
    img = resize(img, (IMG_HEIGHT, IMG_WIDTH), mode='constant', preserve_range=True)
    X_test[n] = img

print('Done!')

In [13]:
import random
image_x = random.randint(0, len(train_ids))
imshow(X_train[image_x])
plt.show()
imshow(np.squeeze(Y_train[image_x]))
plt.show()

# UNET

In [14]:
def conv_block(input, filt):
    C_1 = Conv2D(filt, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(input)
    C_1 = Dropout(0.1)(C_1)
    C_1 = Conv2D(filt, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(C_1)
    P_1 = MaxPooling2D((2, 2))(C_1)
    return C_1, P_1

def define_model(outchannels, img_height, img_width, img_channels):
    inputs = tf.keras.layers.Input((img_height, img_width, img_channels))
    filters = 64
    c1, p1 = conv_block(inputs, filters)
    c2, p2 = conv_block(p1, filters*2)
    c3, p3 = conv_block(p2, filters*4)
    c4, p4 = conv_block(p3, filters*8)

    ## BRIDGE/BOTTLENECK
    c5 = Conv2D(
        filters*16, (3, 3), 
        activation='relu', 
        kernel_initializer='he_normal', 
        padding='same')(p4)
    c5 = Dropout(0.2)(c5)
    c5 = Conv2D(
        filters*16, (3, 3), 
        activation='relu', 
        kernel_initializer='he_normal', 
        padding='same')(c5)

    ## Decoder
    u6 = Conv2DTranspose(
        filters=filters*8,
        kernel_size=(2,2),
        strides=(2,2),
        padding=('same'))(c5)
    # c4 = Reshape(
    #     target_shape=u6.shape[1:]
    # )(c4)
    u6 = concatenate([u6, c4])
    c6 = Conv2D(filters*8, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(u6)
    c6 = Dropout(0.2)(c6)
    c6 = Conv2D(filters*8, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(c6)

    u7 = Conv2DTranspose(
        filters=filters*4,
        kernel_size=(2,2),
        strides=(2,2),
        padding='same')(c6)
    u7 = concatenate([u7, c3])
    c7 = Conv2D(filters*4, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(u7)
    c7 = Dropout(0.2)(c7)
    c7 = Conv2D(filters*4, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(c7)

    u8 = Conv2DTranspose(
        filters=filters*2,
        kernel_size=(2,2),
        strides=(2,2),
        padding='same')(c7)
    u8 = concatenate([u8, c2])
    c8 = Conv2D(filters*4, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(u8)
    c8 = Dropout(0.1)(c8)
    c8 = Conv2D(filters*4, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(c8)
    
    u9 = Conv2DTranspose(
        filters=filters,
        kernel_size=(2,2),
        strides=(2,2),
        padding='same')(c8)
    u9 = concatenate([u9, c1], axis=3)
    c9 = Conv2D(filters, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(u9)
    c9 = Dropout(0.1)(c9)
    c9 = Conv2D(filters, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(c9)

    outputs = Conv2D(outchannels, (1, 1), activation='sigmoid')(c9)

    model = Model(inputs=[inputs], outputs=[outputs])
    return model

In [16]:
from sklearn.model_selection import train_test_split
x_train, x_val, y_train, y_val = train_test_split(X_train, Y_train, test_size=0.2, random_state=42)

In [19]:
y_train.shape

In [20]:
from keras_unet_collection.losses import dice_coef
model = define_model(1, IMG_HEIGHT, IMG_WIDTH, IMG_CHANNELS)
model.compile(
    optimizer='adam', 
    loss='binary_crossentropy', 
    metrics=[dice_coef]
)

In [22]:
from keras.callbacks import ModelCheckpoint, TensorBoard
checkpoint = ModelCheckpoint('model_unet_cell_seg_v2_data_science_bowl.h5', save_best_only=True)
!mkdir -p logs/fit
tb = TensorBoard(log_dir='logs/fit/', histogram_freq=1)

In [33]:
history = model.fit(
    x = x_train, 
    y = y_train,
    batch_size = 16,
    epochs = 15,
    validation_data = (x_val, y_val),
    shuffle = False,
    callbacks = [tb, checkpoint])

In [36]:
history.history.keys()

In [37]:
X = np.arange(5)
plt.rcParams['figure.figsize'] = (18,15)

for i,m in enumerate([221,222, 223, 224]):
    plt.subplot(m)
    plt.plot(history.history[list(history.history.keys())[i]])
    plt.ylabel('loss')
    plt.xlabel('epoch')
plt.show()

In [38]:
!lshw -C display

In [39]:
model.save('KaggleData_v1_320x288_e-40_bs-16.h5')
# !cp Neuroblastoma_v1_320x288_e-45_bs-8.h5 /content/drive/MyDrive/Cell_seg/

In [43]:
# !printf "yes" | tensorboard dev upload \
#   --logdir logs/fit \
#   --name "KaggleData_v1_320x288_e-45_bs-16" \
#   --description " dice coef of 0.9250 bruh" \
#   --one_shot
!zip -r logs.zip logs

# Prediction

In [44]:
idx = random.randint(0, len(X_train))


preds_train = model.predict(X_train[:int(X_train.shape[0]*0.9)], verbose=1)
preds_val = model.predict(X_train[int(X_train.shape[0]*0.9):], verbose=1)
preds_test = model.predict(X_test, verbose=1)

In [45]:
preds_test.shape

In [46]:
preds_train_t = (preds_train > 0.5).astype(np.uint8)
preds_val_t = (preds_val > 0.5).astype(np.uint8)
preds_test_t = (preds_test > 0.5).astype(np.uint8)

In [48]:
# Perform a sanity check on some random training samples
ix = random.randint(0, len(preds_train_t))
plt.rcParams['figure.figsize'] = (8,8)
imshow(X_train[ix])
plt.show()
imshow(np.squeeze(Y_train[ix]))
plt.show()
imshow(np.squeeze(preds_train_t[ix]))
plt.show()

# Perform a sanity check on some random validation samples
ix = random.randint(0, len(preds_val_t))
imshow(X_train[int(X_train.shape[0]*0.9):][ix])
plt.show()
imshow(np.squeeze(Y_train[int(Y_train.shape[0]*0.9):][ix]))
plt.show()
imshow(np.squeeze(preds_val_t[ix]))
plt.show()

In [56]:
!mkdir predictions
plt.rcParams['figure.figsize'] = (15,8)
for i in range(15):
    test_img = X_test[i]
    test_pred = preds_test_t[i]
    plt.subplot(121)
    plt.imshow(test_img)
    plt.subplot(122)
    plt.imshow(test_pred)
    plt.savefig(f"predictions/prediction_{i}.png")
!zip -r predictions.zip predictions